mirror of
https://github.com/gabehf/Koito.git
synced 2026-04-22 12:01:52 -07:00
Add bulk import optimization: track_lookup cache, batch inserts, BulkSubmitter
This commit is contained in:
parent
0ec7b458cc
commit
ae373a7090
21 changed files with 1296 additions and 125 deletions
|
|
@ -122,17 +122,18 @@ func createOrUpdateAlbumWithMbzReleaseID(ctx context.Context, d db.DB, opts Asso
|
|||
}
|
||||
}
|
||||
|
||||
l.Debug().Msg("Searching for album images...")
|
||||
var imgid uuid.UUID
|
||||
imgUrl, err := images.GetAlbumImage(ctx, images.AlbumImageOpts{
|
||||
Artists: utils.UniqueIgnoringCase(slices.Concat(utils.FlattenMbzArtistCreditNames(release.ArtistCredit), utils.FlattenArtistNames(opts.Artists))),
|
||||
Album: release.Title,
|
||||
ReleaseMbzID: &opts.ReleaseMbzID,
|
||||
})
|
||||
var imgUrl string
|
||||
if !opts.SkipCacheImage {
|
||||
l.Debug().Msg("Searching for album images...")
|
||||
imgUrl, err = images.GetAlbumImage(ctx, images.AlbumImageOpts{
|
||||
Artists: utils.UniqueIgnoringCase(slices.Concat(utils.FlattenMbzArtistCreditNames(release.ArtistCredit), utils.FlattenArtistNames(opts.Artists))),
|
||||
Album: release.Title,
|
||||
ReleaseMbzID: &opts.ReleaseMbzID,
|
||||
})
|
||||
|
||||
if err == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
if !opts.SkipCacheImage {
|
||||
if err == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
var size ImageSize
|
||||
if cfg.FullImageCacheEnabled() {
|
||||
size = ImageSizeFull
|
||||
|
|
@ -144,13 +145,11 @@ func createOrUpdateAlbumWithMbzReleaseID(ctx context.Context, d db.DB, opts Asso
|
|||
if err != nil {
|
||||
l.Err(err).Msg("createOrUpdateAlbumWithMbzReleaseID: failed to cache image")
|
||||
}
|
||||
} else if err != nil {
|
||||
l.Debug().Msgf("createOrUpdateAlbumWithMbzReleaseID: failed to get album images for %s: %s", release.Title, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
l.Debug().Msgf("createOrUpdateAlbumWithMbzReleaseID: failed to get album images for %s: %s", release.Title, err.Error())
|
||||
}
|
||||
|
||||
album, err = d.SaveAlbum(ctx, db.SaveAlbumOpts{
|
||||
Title: release.Title,
|
||||
MusicBrainzID: opts.ReleaseMbzID,
|
||||
|
|
@ -217,14 +216,15 @@ func matchAlbumByTitle(ctx context.Context, d db.DB, opts AssociateAlbumOpts) (*
|
|||
return nil, fmt.Errorf("matchAlbumByTitle: %w", err)
|
||||
} else {
|
||||
var imgid uuid.UUID
|
||||
imgUrl, err := images.GetAlbumImage(ctx, images.AlbumImageOpts{
|
||||
Artists: utils.FlattenArtistNames(opts.Artists),
|
||||
Album: opts.ReleaseName,
|
||||
ReleaseMbzID: &opts.ReleaseMbzID,
|
||||
})
|
||||
if err == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
if !opts.SkipCacheImage {
|
||||
var imgUrl string
|
||||
if !opts.SkipCacheImage {
|
||||
imgUrl, err = images.GetAlbumImage(ctx, images.AlbumImageOpts{
|
||||
Artists: utils.FlattenArtistNames(opts.Artists),
|
||||
Album: opts.ReleaseName,
|
||||
ReleaseMbzID: &opts.ReleaseMbzID,
|
||||
})
|
||||
if err == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
var size ImageSize
|
||||
if cfg.FullImageCacheEnabled() {
|
||||
size = ImageSizeFull
|
||||
|
|
@ -234,13 +234,12 @@ func matchAlbumByTitle(ctx context.Context, d db.DB, opts AssociateAlbumOpts) (*
|
|||
l.Debug().Msg("Downloading album image from source...")
|
||||
err = DownloadAndCacheImage(ctx, imgid, imgUrl, size)
|
||||
if err != nil {
|
||||
l.Err(err).Msg("createOrUpdateAlbumWithMbzReleaseID: failed to cache image")
|
||||
l.Err(err).Msg("matchAlbumByTitle: failed to cache image")
|
||||
}
|
||||
} else if err != nil {
|
||||
l.Debug().AnErr("error", err).Msgf("matchAlbumByTitle: failed to get album images for %s", opts.ReleaseName)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
l.Debug().AnErr("error", err).Msgf("matchAlbumByTitle: failed to get album images for %s", opts.ReleaseName)
|
||||
}
|
||||
|
||||
a, err = d.SaveAlbum(ctx, db.SaveAlbumOpts{
|
||||
Title: releaseName,
|
||||
|
|
|
|||
|
|
@ -127,12 +127,14 @@ func matchArtistsByMBIDMappings(ctx context.Context, d db.DB, opts AssociateArti
|
|||
l.Warn().AnErr("error", err).Msg("matchArtistsByMBIDMappings: MusicBrainz unreachable, creating new artist with provided MusicBrainz ID mapping")
|
||||
|
||||
var imgid uuid.UUID
|
||||
imgUrl, imgErr := images.GetArtistImage(ctx, images.ArtistImageOpts{
|
||||
Aliases: []string{a.Artist},
|
||||
})
|
||||
if imgErr == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
if !opts.SkipCacheImage {
|
||||
var imgUrl string
|
||||
if !opts.SkipCacheImage {
|
||||
var imgErr error
|
||||
imgUrl, imgErr = images.GetArtistImage(ctx, images.ArtistImageOpts{
|
||||
Aliases: []string{a.Artist},
|
||||
})
|
||||
if imgErr == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
var size ImageSize
|
||||
if cfg.FullImageCacheEnabled() {
|
||||
size = ImageSizeFull
|
||||
|
|
@ -144,9 +146,9 @@ func matchArtistsByMBIDMappings(ctx context.Context, d db.DB, opts AssociateArti
|
|||
if err != nil {
|
||||
l.Err(err).Msg("Failed to cache image")
|
||||
}
|
||||
} else if imgErr != nil {
|
||||
l.Err(imgErr).Msgf("matchArtistsByMBIDMappings: Failed to get artist image for artist '%s'", a.Artist)
|
||||
}
|
||||
} else {
|
||||
l.Err(imgErr).Msgf("matchArtistsByMBIDMappings: Failed to get artist image for artist '%s'", a.Artist)
|
||||
}
|
||||
|
||||
artist, err = d.SaveArtist(ctx, db.SaveArtistOpts{
|
||||
|
|
@ -246,12 +248,13 @@ func resolveAliasOrCreateArtist(ctx context.Context, mbzID uuid.UUID, names []st
|
|||
}
|
||||
|
||||
var imgid uuid.UUID
|
||||
imgUrl, err := images.GetArtistImage(ctx, images.ArtistImageOpts{
|
||||
Aliases: aliases,
|
||||
})
|
||||
if err == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
if !opts.SkipCacheImage {
|
||||
var imgUrl string
|
||||
if !opts.SkipCacheImage {
|
||||
imgUrl, err = images.GetArtistImage(ctx, images.ArtistImageOpts{
|
||||
Aliases: aliases,
|
||||
})
|
||||
if err == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
var size ImageSize
|
||||
if cfg.FullImageCacheEnabled() {
|
||||
size = ImageSizeFull
|
||||
|
|
@ -263,9 +266,9 @@ func resolveAliasOrCreateArtist(ctx context.Context, mbzID uuid.UUID, names []st
|
|||
if err != nil {
|
||||
l.Err(err).Msg("Failed to cache image")
|
||||
}
|
||||
} else if err != nil {
|
||||
l.Warn().AnErr("error", err).Msg("Failed to get artist image from ImageSrc")
|
||||
}
|
||||
} else if err != nil {
|
||||
l.Warn().AnErr("error", err).Msg("Failed to get artist image from ImageSrc")
|
||||
}
|
||||
|
||||
u, err := d.SaveArtist(ctx, db.SaveArtistOpts{
|
||||
|
|
@ -301,12 +304,13 @@ func matchArtistsByNames(ctx context.Context, names []string, existing []*models
|
|||
}
|
||||
if errors.Is(err, pgx.ErrNoRows) {
|
||||
var imgid uuid.UUID
|
||||
imgUrl, err := images.GetArtistImage(ctx, images.ArtistImageOpts{
|
||||
Aliases: []string{name},
|
||||
})
|
||||
if err == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
if !opts.SkipCacheImage {
|
||||
var imgUrl string
|
||||
if !opts.SkipCacheImage {
|
||||
imgUrl, err = images.GetArtistImage(ctx, images.ArtistImageOpts{
|
||||
Aliases: []string{name},
|
||||
})
|
||||
if err == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
var size ImageSize
|
||||
if cfg.FullImageCacheEnabled() {
|
||||
size = ImageSizeFull
|
||||
|
|
@ -318,9 +322,9 @@ func matchArtistsByNames(ctx context.Context, names []string, existing []*models
|
|||
if err != nil {
|
||||
l.Err(err).Msg("Failed to cache image")
|
||||
}
|
||||
} else if err != nil {
|
||||
l.Debug().AnErr("error", err).Msgf("Failed to get artist images for %s", name)
|
||||
}
|
||||
} else if err != nil {
|
||||
l.Debug().AnErr("error", err).Msgf("Failed to get artist images for %s", name)
|
||||
}
|
||||
a, err = d.SaveArtist(ctx, db.SaveArtistOpts{Name: name, Image: imgid, ImageSrc: imgUrl})
|
||||
if err != nil {
|
||||
|
|
|
|||
|
|
@ -77,6 +77,21 @@ func SubmitListen(ctx context.Context, store db.DB, opts SubmitListenOpts) error
|
|||
// bandaid to ensure new activity does not have sub-second precision
|
||||
opts.Time = opts.Time.Truncate(time.Second)
|
||||
|
||||
// Fast path: check lookup cache for known entity combo
|
||||
if !opts.SkipSaveListen {
|
||||
key := TrackLookupKey(opts.Artist, opts.TrackTitle, opts.ReleaseTitle)
|
||||
cached, err := store.GetTrackLookup(ctx, key)
|
||||
if err == nil && cached != nil {
|
||||
l.Debug().Msg("Track lookup cache hit — skipping entity resolution")
|
||||
return store.SaveListen(ctx, db.SaveListenOpts{
|
||||
TrackID: cached.TrackID,
|
||||
Time: opts.Time,
|
||||
UserID: opts.UserID,
|
||||
Client: opts.Client,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
artists, err := AssociateArtists(
|
||||
ctx,
|
||||
store,
|
||||
|
|
@ -168,6 +183,16 @@ func SubmitListen(ctx context.Context, store db.DB, opts SubmitListenOpts) error
|
|||
}
|
||||
}
|
||||
|
||||
// Populate lookup cache for future fast-path hits
|
||||
if len(artists) > 0 {
|
||||
store.SaveTrackLookup(ctx, db.SaveTrackLookupOpts{
|
||||
Key: TrackLookupKey(opts.Artist, opts.TrackTitle, opts.ReleaseTitle),
|
||||
ArtistID: artists[0].ID,
|
||||
AlbumID: rg.ID,
|
||||
TrackID: track.ID,
|
||||
})
|
||||
}
|
||||
|
||||
if opts.IsNowPlaying {
|
||||
if track.Duration == 0 {
|
||||
memkv.Store.Set(strconv.Itoa(int(opts.UserID)), track.ID)
|
||||
|
|
|
|||
9
internal/catalog/lookup_key.go
Normal file
9
internal/catalog/lookup_key.go
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
package catalog
|
||||
|
||||
import "strings"
|
||||
|
||||
// TrackLookupKey builds a normalized cache key for entity resolution.
|
||||
// Uses null-byte separators to avoid collisions between field values.
|
||||
func TrackLookupKey(artist, track, album string) string {
|
||||
return strings.ToLower(artist) + "\x00" + strings.ToLower(track) + "\x00" + strings.ToLower(album)
|
||||
}
|
||||
|
|
@ -101,6 +101,16 @@ type DB interface {
|
|||
MergeAlbums(ctx context.Context, fromId, toId int32, replaceImage bool) error
|
||||
MergeArtists(ctx context.Context, fromId, toId int32, replaceImage bool) error
|
||||
|
||||
// Track Lookup Cache
|
||||
|
||||
GetTrackLookup(ctx context.Context, key string) (*TrackLookupResult, error)
|
||||
SaveTrackLookup(ctx context.Context, opts SaveTrackLookupOpts) error
|
||||
InvalidateTrackLookup(ctx context.Context, opts InvalidateTrackLookupOpts) error
|
||||
|
||||
// Batch
|
||||
|
||||
SaveListensBatch(ctx context.Context, opts []SaveListenOpts) (int64, error)
|
||||
|
||||
// Etc
|
||||
|
||||
ImageHasAssociation(ctx context.Context, image uuid.UUID) (bool, error)
|
||||
|
|
|
|||
|
|
@ -160,3 +160,22 @@ type GetInterestOpts struct {
|
|||
ArtistID int32
|
||||
TrackID int32
|
||||
}
|
||||
|
||||
type TrackLookupResult struct {
|
||||
ArtistID int32
|
||||
AlbumID int32
|
||||
TrackID int32
|
||||
}
|
||||
|
||||
type SaveTrackLookupOpts struct {
|
||||
Key string
|
||||
ArtistID int32
|
||||
AlbumID int32
|
||||
TrackID int32
|
||||
}
|
||||
|
||||
type InvalidateTrackLookupOpts struct {
|
||||
ArtistID int32
|
||||
AlbumID int32
|
||||
TrackID int32
|
||||
}
|
||||
|
|
|
|||
|
|
@ -338,6 +338,7 @@ func (d *Psql) SaveAlbumAliases(ctx context.Context, id int32, aliases []string,
|
|||
}
|
||||
|
||||
func (d *Psql) DeleteAlbum(ctx context.Context, id int32) error {
|
||||
d.q.DeleteTrackLookupByAlbum(ctx, id)
|
||||
return d.q.DeleteRelease(ctx, id)
|
||||
}
|
||||
func (d *Psql) DeleteAlbumAlias(ctx context.Context, id int32, alias string) error {
|
||||
|
|
|
|||
|
|
@ -119,6 +119,7 @@ func (d *Psql) SaveArtistAliases(ctx context.Context, id int32, aliases []string
|
|||
}
|
||||
|
||||
func (d *Psql) DeleteArtist(ctx context.Context, id int32) error {
|
||||
d.q.DeleteTrackLookupByArtist(ctx, id)
|
||||
return d.q.DeleteArtist(ctx, id)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import (
|
|||
"github.com/gabehf/koito/internal/logger"
|
||||
"github.com/gabehf/koito/internal/models"
|
||||
"github.com/gabehf/koito/internal/repository"
|
||||
"github.com/jackc/pgx/v5"
|
||||
)
|
||||
|
||||
func (d *Psql) GetListensPaginated(ctx context.Context, opts db.GetItemsOpts) (*db.PaginatedResponse[*models.Listen], error) {
|
||||
|
|
@ -197,6 +198,67 @@ func (d *Psql) SaveListen(ctx context.Context, opts db.SaveListenOpts) error {
|
|||
})
|
||||
}
|
||||
|
||||
func (d *Psql) SaveListensBatch(ctx context.Context, opts []db.SaveListenOpts) (int64, error) {
|
||||
if len(opts) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
tx, err := d.conn.BeginTx(ctx, pgx.TxOptions{})
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("SaveListensBatch: BeginTx: %w", err)
|
||||
}
|
||||
defer tx.Rollback(ctx)
|
||||
|
||||
_, err = tx.Exec(ctx, `
|
||||
CREATE TEMP TABLE tmp_listens (
|
||||
track_id INT,
|
||||
listened_at TIMESTAMPTZ,
|
||||
user_id INT,
|
||||
client TEXT
|
||||
) ON COMMIT DROP
|
||||
`)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("SaveListensBatch: create temp table: %w", err)
|
||||
}
|
||||
|
||||
rows := make([][]interface{}, len(opts))
|
||||
for i, o := range opts {
|
||||
var client interface{}
|
||||
if o.Client != "" {
|
||||
client = o.Client
|
||||
}
|
||||
t := o.Time
|
||||
if t.IsZero() {
|
||||
t = time.Now()
|
||||
}
|
||||
rows[i] = []interface{}{o.TrackID, t, o.UserID, client}
|
||||
}
|
||||
|
||||
_, err = tx.CopyFrom(ctx,
|
||||
pgx.Identifier{"tmp_listens"},
|
||||
[]string{"track_id", "listened_at", "user_id", "client"},
|
||||
pgx.CopyFromRows(rows),
|
||||
)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("SaveListensBatch: CopyFrom: %w", err)
|
||||
}
|
||||
|
||||
tag, err := tx.Exec(ctx, `
|
||||
INSERT INTO listens (track_id, listened_at, user_id, client)
|
||||
SELECT track_id, listened_at, user_id, client FROM tmp_listens
|
||||
ON CONFLICT DO NOTHING
|
||||
`)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("SaveListensBatch: insert: %w", err)
|
||||
}
|
||||
|
||||
if err := tx.Commit(ctx); err != nil {
|
||||
return 0, fmt.Errorf("SaveListensBatch: Commit: %w", err)
|
||||
}
|
||||
|
||||
return tag.RowsAffected(), nil
|
||||
}
|
||||
|
||||
func (d *Psql) DeleteListen(ctx context.Context, trackId int32, listenedAt time.Time) error {
|
||||
l := logger.FromContext(ctx)
|
||||
if trackId == 0 {
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ import (
|
|||
func (d *Psql) MergeTracks(ctx context.Context, fromId, toId int32) error {
|
||||
l := logger.FromContext(ctx)
|
||||
l.Info().Msgf("Merging track %d into track %d", fromId, toId)
|
||||
d.q.DeleteTrackLookupByTrack(ctx, fromId)
|
||||
d.q.DeleteTrackLookupByTrack(ctx, toId)
|
||||
tx, err := d.conn.BeginTx(ctx, pgx.TxOptions{})
|
||||
if err != nil {
|
||||
l.Err(err).Msg("Failed to begin transaction")
|
||||
|
|
@ -61,6 +63,8 @@ func (d *Psql) MergeTracks(ctx context.Context, fromId, toId int32) error {
|
|||
func (d *Psql) MergeAlbums(ctx context.Context, fromId, toId int32, replaceImage bool) error {
|
||||
l := logger.FromContext(ctx)
|
||||
l.Info().Msgf("Merging album %d into album %d", fromId, toId)
|
||||
d.q.DeleteTrackLookupByAlbum(ctx, fromId)
|
||||
d.q.DeleteTrackLookupByAlbum(ctx, toId)
|
||||
tx, err := d.conn.BeginTx(ctx, pgx.TxOptions{})
|
||||
if err != nil {
|
||||
l.Err(err).Msg("Failed to begin transaction")
|
||||
|
|
@ -117,6 +121,8 @@ func (d *Psql) MergeAlbums(ctx context.Context, fromId, toId int32, replaceImage
|
|||
func (d *Psql) MergeArtists(ctx context.Context, fromId, toId int32, replaceImage bool) error {
|
||||
l := logger.FromContext(ctx)
|
||||
l.Info().Msgf("Merging artist %d into artist %d", fromId, toId)
|
||||
d.q.DeleteTrackLookupByArtist(ctx, fromId)
|
||||
d.q.DeleteTrackLookupByArtist(ctx, toId)
|
||||
tx, err := d.conn.BeginTx(ctx, pgx.TxOptions{})
|
||||
if err != nil {
|
||||
l.Err(err).Msg("Failed to begin transaction")
|
||||
|
|
|
|||
|
|
@ -241,6 +241,9 @@ func (d *Psql) SaveTrackAliases(ctx context.Context, id int32, aliases []string,
|
|||
|
||||
func (d *Psql) DeleteTrack(ctx context.Context, id int32) error {
|
||||
l := logger.FromContext(ctx)
|
||||
|
||||
d.q.DeleteTrackLookupByTrack(ctx, id)
|
||||
|
||||
tx, err := d.conn.BeginTx(ctx, pgx.TxOptions{})
|
||||
if err != nil {
|
||||
l.Err(err).Msg("Failed to begin transaction")
|
||||
|
|
|
|||
52
internal/db/psql/track_lookup.go
Normal file
52
internal/db/psql/track_lookup.go
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
package psql
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/gabehf/koito/internal/db"
|
||||
"github.com/gabehf/koito/internal/repository"
|
||||
"github.com/jackc/pgx/v5"
|
||||
)
|
||||
|
||||
func (d *Psql) GetTrackLookup(ctx context.Context, key string) (*db.TrackLookupResult, error) {
|
||||
row, err := d.q.GetTrackLookup(ctx, key)
|
||||
if err != nil {
|
||||
if err == pgx.ErrNoRows {
|
||||
return nil, err
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &db.TrackLookupResult{
|
||||
ArtistID: row.ArtistID,
|
||||
AlbumID: row.AlbumID,
|
||||
TrackID: row.TrackID,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Psql) SaveTrackLookup(ctx context.Context, opts db.SaveTrackLookupOpts) error {
|
||||
return d.q.InsertTrackLookup(ctx, repository.InsertTrackLookupParams{
|
||||
LookupKey: opts.Key,
|
||||
ArtistID: opts.ArtistID,
|
||||
AlbumID: opts.AlbumID,
|
||||
TrackID: opts.TrackID,
|
||||
})
|
||||
}
|
||||
|
||||
func (d *Psql) InvalidateTrackLookup(ctx context.Context, opts db.InvalidateTrackLookupOpts) error {
|
||||
if opts.ArtistID != 0 {
|
||||
if err := d.q.DeleteTrackLookupByArtist(ctx, opts.ArtistID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if opts.AlbumID != 0 {
|
||||
if err := d.q.DeleteTrackLookupByAlbum(ctx, opts.AlbumID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if opts.TrackID != 0 {
|
||||
if err := d.q.DeleteTrackLookupByTrack(ctx, opts.TrackID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
157
internal/importer/bulk.go
Normal file
157
internal/importer/bulk.go
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
package importer
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/gabehf/koito/internal/catalog"
|
||||
"github.com/gabehf/koito/internal/db"
|
||||
"github.com/gabehf/koito/internal/logger"
|
||||
"github.com/gabehf/koito/internal/mbz"
|
||||
)
|
||||
|
||||
// BulkSubmitter is a reusable import accelerator. It pre-deduplicates scrobbles
|
||||
// in memory, resolves entities via the track_lookup cache (falling back to
|
||||
// SubmitListen on cache miss with a worker pool for parallelism), and batch-inserts
|
||||
// listens via SaveListensBatch.
|
||||
type BulkSubmitter struct {
|
||||
store db.DB
|
||||
mbzc mbz.MusicBrainzCaller
|
||||
ctx context.Context
|
||||
buffer []catalog.SubmitListenOpts
|
||||
workers int
|
||||
}
|
||||
|
||||
type BulkSubmitterOpts struct {
|
||||
Store db.DB
|
||||
Mbzc mbz.MusicBrainzCaller
|
||||
Workers int // default 4
|
||||
}
|
||||
|
||||
func NewBulkSubmitter(ctx context.Context, opts BulkSubmitterOpts) *BulkSubmitter {
|
||||
workers := opts.Workers
|
||||
if workers <= 0 {
|
||||
workers = 4
|
||||
}
|
||||
return &BulkSubmitter{
|
||||
store: opts.Store,
|
||||
mbzc: opts.Mbzc,
|
||||
ctx: ctx,
|
||||
workers: workers,
|
||||
}
|
||||
}
|
||||
|
||||
// Accept buffers a scrobble for later batch processing.
|
||||
func (bs *BulkSubmitter) Accept(opts catalog.SubmitListenOpts) {
|
||||
bs.buffer = append(bs.buffer, opts)
|
||||
}
|
||||
|
||||
// Flush processes all buffered scrobbles: deduplicates, resolves entities, and batch-inserts listens.
|
||||
// Returns the number of listens successfully inserted.
|
||||
func (bs *BulkSubmitter) Flush() (int, error) {
|
||||
l := logger.FromContext(bs.ctx)
|
||||
if len(bs.buffer) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
l.Info().Msgf("BulkSubmitter: Processing %d scrobbles", len(bs.buffer))
|
||||
|
||||
// Phase A: Deduplicate — find unique (artist, track, album) tuples
|
||||
unique := make(map[string]catalog.SubmitListenOpts)
|
||||
for _, opts := range bs.buffer {
|
||||
key := catalog.TrackLookupKey(opts.Artist, opts.TrackTitle, opts.ReleaseTitle)
|
||||
if _, exists := unique[key]; !exists {
|
||||
unique[key] = opts
|
||||
}
|
||||
}
|
||||
l.Info().Msgf("BulkSubmitter: %d unique entity combos from %d scrobbles", len(unique), len(bs.buffer))
|
||||
|
||||
// Phase B: Resolve entities — check cache, create on miss
|
||||
resolved := make(map[string]int32) // key → trackID
|
||||
var mu sync.Mutex
|
||||
var wg sync.WaitGroup
|
||||
sem := make(chan struct{}, bs.workers)
|
||||
cacheHits := 0
|
||||
|
||||
for key, opts := range unique {
|
||||
// Check track_lookup cache first
|
||||
cached, err := bs.store.GetTrackLookup(bs.ctx, key)
|
||||
if err == nil && cached != nil {
|
||||
mu.Lock()
|
||||
resolved[key] = cached.TrackID
|
||||
cacheHits++
|
||||
mu.Unlock()
|
||||
continue
|
||||
}
|
||||
|
||||
// Cache miss — create entities via SubmitListen (with worker pool)
|
||||
wg.Add(1)
|
||||
sem <- struct{}{} // acquire worker slot
|
||||
go func(k string, o catalog.SubmitListenOpts) {
|
||||
defer wg.Done()
|
||||
defer func() { <-sem }() // release worker slot
|
||||
|
||||
o.SkipSaveListen = true
|
||||
o.SkipCacheImage = true
|
||||
err := catalog.SubmitListen(bs.ctx, bs.store, o)
|
||||
if err != nil {
|
||||
l.Err(err).Msgf("BulkSubmitter: Failed to create entities for '%s' by '%s'", o.TrackTitle, o.Artist)
|
||||
return
|
||||
}
|
||||
|
||||
// Re-check cache (SubmitListen populates it via Phase 1's integration)
|
||||
cached, err := bs.store.GetTrackLookup(bs.ctx, k)
|
||||
if err == nil && cached != nil {
|
||||
mu.Lock()
|
||||
resolved[k] = cached.TrackID
|
||||
mu.Unlock()
|
||||
}
|
||||
}(key, opts)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
l.Info().Msgf("BulkSubmitter: Resolved %d/%d entity combos (%d cache hits)",
|
||||
len(resolved), len(unique), cacheHits)
|
||||
|
||||
// Phase C: Build listen batch
|
||||
batch := make([]db.SaveListenOpts, 0, len(bs.buffer))
|
||||
skipped := 0
|
||||
for _, opts := range bs.buffer {
|
||||
key := catalog.TrackLookupKey(opts.Artist, opts.TrackTitle, opts.ReleaseTitle)
|
||||
trackID, ok := resolved[key]
|
||||
if !ok {
|
||||
skipped++
|
||||
continue
|
||||
}
|
||||
batch = append(batch, db.SaveListenOpts{
|
||||
TrackID: trackID,
|
||||
Time: opts.Time.Truncate(time.Second),
|
||||
UserID: opts.UserID,
|
||||
Client: opts.Client,
|
||||
})
|
||||
}
|
||||
if skipped > 0 {
|
||||
l.Warn().Msgf("BulkSubmitter: Skipped %d scrobbles with unresolved entities", skipped)
|
||||
}
|
||||
|
||||
// Phase D: Batch insert listens (in chunks to avoid huge transactions)
|
||||
const chunkSize = 5000
|
||||
var totalInserted int64
|
||||
for i := 0; i < len(batch); i += chunkSize {
|
||||
end := i + chunkSize
|
||||
if end > len(batch) {
|
||||
end = len(batch)
|
||||
}
|
||||
inserted, err := bs.store.SaveListensBatch(bs.ctx, batch[i:end])
|
||||
if err != nil {
|
||||
return int(totalInserted), fmt.Errorf("BulkSubmitter: SaveListensBatch: %w", err)
|
||||
}
|
||||
totalInserted += inserted
|
||||
}
|
||||
|
||||
l.Info().Msgf("BulkSubmitter: Inserted %d listens (%d duplicates skipped)",
|
||||
totalInserted, int64(len(batch))-totalInserted)
|
||||
return int(totalInserted), nil
|
||||
}
|
||||
|
|
@ -50,18 +50,17 @@ func ImportLastFMFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrainzCall
|
|||
return fmt.Errorf("ImportLastFMFile: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
var throttleFunc = func() {}
|
||||
if ms := cfg.ThrottleImportMs(); ms > 0 {
|
||||
throttleFunc = func() {
|
||||
time.Sleep(time.Duration(ms) * time.Millisecond)
|
||||
}
|
||||
}
|
||||
export := make([]LastFMExportPage, 0)
|
||||
err = json.NewDecoder(file).Decode(&export)
|
||||
if err != nil {
|
||||
return fmt.Errorf("ImportLastFMFile: %w", err)
|
||||
}
|
||||
count := 0
|
||||
|
||||
bs := NewBulkSubmitter(ctx, BulkSubmitterOpts{
|
||||
Store: store,
|
||||
Mbzc: mbzc,
|
||||
})
|
||||
|
||||
for _, item := range export {
|
||||
for _, track := range item.Track {
|
||||
album := track.Album.Text
|
||||
|
|
@ -96,7 +95,6 @@ func ImportLastFMFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrainzCall
|
|||
ts = time.Unix(unix, 0).UTC()
|
||||
}
|
||||
if !inImportTimeWindow(ts) {
|
||||
l.Debug().Msgf("Skipping import due to import time rules")
|
||||
continue
|
||||
}
|
||||
|
||||
|
|
@ -105,7 +103,7 @@ func ImportLastFMFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrainzCall
|
|||
artistMbidMap = append(artistMbidMap, catalog.ArtistMbidMap{Artist: track.Artist.Text, Mbid: artistMbzID})
|
||||
}
|
||||
|
||||
opts := catalog.SubmitListenOpts{
|
||||
bs.Accept(catalog.SubmitListenOpts{
|
||||
MbzCaller: mbzc,
|
||||
Artist: track.Artist.Text,
|
||||
ArtistNames: []string{track.Artist.Text},
|
||||
|
|
@ -118,16 +116,14 @@ func ImportLastFMFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrainzCall
|
|||
Client: "lastfm",
|
||||
Time: ts,
|
||||
UserID: 1,
|
||||
SkipCacheImage: !cfg.FetchImagesDuringImport(),
|
||||
}
|
||||
err = catalog.SubmitListen(ctx, store, opts)
|
||||
if err != nil {
|
||||
l.Err(err).Msg("Failed to import LastFM playback item")
|
||||
return fmt.Errorf("ImportLastFMFile: %w", err)
|
||||
}
|
||||
count++
|
||||
throttleFunc()
|
||||
SkipCacheImage: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
count, err := bs.Flush()
|
||||
if err != nil {
|
||||
return fmt.Errorf("ImportLastFMFile: %w", err)
|
||||
}
|
||||
return finishImport(ctx, filename, count)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,13 +63,11 @@ func ImportListenBrainzFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrai
|
|||
|
||||
scanner := bufio.NewScanner(r)
|
||||
|
||||
var throttleFunc = func() {}
|
||||
if ms := cfg.ThrottleImportMs(); ms > 0 {
|
||||
throttleFunc = func() {
|
||||
time.Sleep(time.Duration(ms) * time.Millisecond)
|
||||
}
|
||||
}
|
||||
count := 0
|
||||
bs := NewBulkSubmitter(ctx, BulkSubmitterOpts{
|
||||
Store: store,
|
||||
Mbzc: mbzc,
|
||||
})
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Bytes()
|
||||
payload := new(handlers.LbzSubmitListenPayload)
|
||||
|
|
@ -80,7 +78,6 @@ func ImportListenBrainzFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrai
|
|||
}
|
||||
ts := time.Unix(payload.ListenedAt, 0)
|
||||
if !inImportTimeWindow(ts) {
|
||||
l.Debug().Msgf("Skipping import due to import time rules")
|
||||
continue
|
||||
}
|
||||
artistMbzIDs, err := utils.ParseUUIDSlice(payload.TrackMeta.AdditionalInfo.ArtistMBIDs)
|
||||
|
|
@ -139,7 +136,7 @@ func ImportListenBrainzFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrai
|
|||
artistMbidMap = append(artistMbidMap, catalog.ArtistMbidMap{Artist: a.ArtistName, Mbid: mbid})
|
||||
}
|
||||
|
||||
opts := catalog.SubmitListenOpts{
|
||||
bs.Accept(catalog.SubmitListenOpts{
|
||||
MbzCaller: mbzc,
|
||||
ArtistNames: payload.TrackMeta.AdditionalInfo.ArtistNames,
|
||||
Artist: payload.TrackMeta.ArtistName,
|
||||
|
|
@ -154,15 +151,13 @@ func ImportListenBrainzFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrai
|
|||
Time: ts,
|
||||
UserID: 1,
|
||||
Client: client,
|
||||
SkipCacheImage: !cfg.FetchImagesDuringImport(),
|
||||
}
|
||||
err = catalog.SubmitListen(ctx, store, opts)
|
||||
if err != nil {
|
||||
l.Err(err).Msg("Failed to import LastFM playback item")
|
||||
return fmt.Errorf("ImportListenBrainzFile: %w", err)
|
||||
}
|
||||
count++
|
||||
throttleFunc()
|
||||
SkipCacheImage: true,
|
||||
})
|
||||
}
|
||||
|
||||
count, err := bs.Flush()
|
||||
if err != nil {
|
||||
return fmt.Errorf("ImportListenBrainzFile: %w", err)
|
||||
}
|
||||
l.Info().Msgf("Finished importing %s; imported %d items", filename, count)
|
||||
return nil
|
||||
|
|
|
|||
|
|
@ -41,21 +41,19 @@ func ImportMalojaFile(ctx context.Context, store db.DB, filename string) error {
|
|||
return fmt.Errorf("ImportMalojaFile: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
var throttleFunc = func() {}
|
||||
if ms := cfg.ThrottleImportMs(); ms > 0 {
|
||||
throttleFunc = func() {
|
||||
time.Sleep(time.Duration(ms) * time.Millisecond)
|
||||
}
|
||||
}
|
||||
export := new(MalojaExport)
|
||||
err = json.NewDecoder(file).Decode(&export)
|
||||
if err != nil {
|
||||
return fmt.Errorf("ImportMalojaFile: %w", err)
|
||||
}
|
||||
|
||||
bs := NewBulkSubmitter(ctx, BulkSubmitterOpts{
|
||||
Store: store,
|
||||
Mbzc: &mbz.MbzErrorCaller{},
|
||||
})
|
||||
|
||||
for _, item := range export.Scrobbles {
|
||||
martists := make([]string, 0)
|
||||
// Maloja has a tendency to have the the artist order ['feature', 'main \u2022 feature'], so
|
||||
// here we try to turn that artist array into ['main', 'feature']
|
||||
item.Track.Artists = utils.MoveFirstMatchToFront(item.Track.Artists, " \u2022 ")
|
||||
for _, an := range item.Track.Artists {
|
||||
ans := strings.Split(an, " \u2022 ")
|
||||
|
|
@ -68,11 +66,10 @@ func ImportMalojaFile(ctx context.Context, store db.DB, filename string) error {
|
|||
}
|
||||
ts := time.Unix(item.Time, 0)
|
||||
if !inImportTimeWindow(ts) {
|
||||
l.Debug().Msgf("Skipping import due to import time rules")
|
||||
continue
|
||||
}
|
||||
opts := catalog.SubmitListenOpts{
|
||||
MbzCaller: &mbz.MusicBrainzClient{},
|
||||
bs.Accept(catalog.SubmitListenOpts{
|
||||
MbzCaller: &mbz.MbzErrorCaller{},
|
||||
Artist: item.Track.Artists[0],
|
||||
ArtistNames: artists,
|
||||
TrackTitle: item.Track.Title,
|
||||
|
|
@ -80,14 +77,13 @@ func ImportMalojaFile(ctx context.Context, store db.DB, filename string) error {
|
|||
Time: ts.Local(),
|
||||
Client: "maloja",
|
||||
UserID: 1,
|
||||
SkipCacheImage: !cfg.FetchImagesDuringImport(),
|
||||
}
|
||||
err = catalog.SubmitListen(ctx, store, opts)
|
||||
if err != nil {
|
||||
l.Err(err).Msg("Failed to import maloja playback item")
|
||||
return fmt.Errorf("ImportMalojaFile: %w", err)
|
||||
}
|
||||
throttleFunc()
|
||||
SkipCacheImage: true,
|
||||
})
|
||||
}
|
||||
return finishImport(ctx, filename, len(export.Scrobbles))
|
||||
|
||||
count, err := bs.Flush()
|
||||
if err != nil {
|
||||
return fmt.Errorf("ImportMalojaFile: %w", err)
|
||||
}
|
||||
return finishImport(ctx, filename, count)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,48 +33,44 @@ func ImportSpotifyFile(ctx context.Context, store db.DB, filename string) error
|
|||
return fmt.Errorf("ImportSpotifyFile: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
var throttleFunc = func() {}
|
||||
if ms := cfg.ThrottleImportMs(); ms > 0 {
|
||||
throttleFunc = func() {
|
||||
time.Sleep(time.Duration(ms) * time.Millisecond)
|
||||
}
|
||||
}
|
||||
export := make([]SpotifyExportItem, 0)
|
||||
err = json.NewDecoder(file).Decode(&export)
|
||||
if err != nil {
|
||||
return fmt.Errorf("ImportSpotifyFile: %w", err)
|
||||
}
|
||||
|
||||
bs := NewBulkSubmitter(ctx, BulkSubmitterOpts{
|
||||
Store: store,
|
||||
Mbzc: &mbz.MbzErrorCaller{},
|
||||
})
|
||||
|
||||
for _, item := range export {
|
||||
if item.ReasonEnd != "trackdone" {
|
||||
continue
|
||||
}
|
||||
if !inImportTimeWindow(item.Timestamp) {
|
||||
l.Debug().Msgf("Skipping import due to import time rules")
|
||||
continue
|
||||
}
|
||||
dur := item.MsPlayed
|
||||
if item.TrackName == "" || item.ArtistName == "" {
|
||||
l.Debug().Msg("Skipping non-track item")
|
||||
continue
|
||||
}
|
||||
opts := catalog.SubmitListenOpts{
|
||||
MbzCaller: &mbz.MusicBrainzClient{},
|
||||
bs.Accept(catalog.SubmitListenOpts{
|
||||
MbzCaller: &mbz.MbzErrorCaller{},
|
||||
Artist: item.ArtistName,
|
||||
TrackTitle: item.TrackName,
|
||||
ReleaseTitle: item.AlbumName,
|
||||
Duration: dur / 1000,
|
||||
Duration: item.MsPlayed / 1000,
|
||||
Time: item.Timestamp,
|
||||
Client: "spotify",
|
||||
UserID: 1,
|
||||
SkipCacheImage: !cfg.FetchImagesDuringImport(),
|
||||
}
|
||||
err = catalog.SubmitListen(ctx, store, opts)
|
||||
if err != nil {
|
||||
l.Err(err).Msg("Failed to import spotify playback item")
|
||||
return fmt.Errorf("ImportSpotifyFile: %w", err)
|
||||
}
|
||||
throttleFunc()
|
||||
SkipCacheImage: true,
|
||||
})
|
||||
}
|
||||
return finishImport(ctx, filename, len(export))
|
||||
|
||||
count, err := bs.Flush()
|
||||
if err != nil {
|
||||
return fmt.Errorf("ImportSpotifyFile: %w", err)
|
||||
}
|
||||
return finishImport(ctx, filename, count)
|
||||
}
|
||||
|
|
|
|||
82
internal/repository/track_lookup.sql.go
Normal file
82
internal/repository/track_lookup.sql.go
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
// Code generated by sqlc. DO NOT EDIT.
|
||||
// versions:
|
||||
// sqlc v1.28.0
|
||||
// source: track_lookup.sql
|
||||
|
||||
package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
)
|
||||
|
||||
const deleteTrackLookupByAlbum = `-- name: DeleteTrackLookupByAlbum :exec
|
||||
DELETE FROM track_lookup WHERE album_id = $1
|
||||
`
|
||||
|
||||
func (q *Queries) DeleteTrackLookupByAlbum(ctx context.Context, albumID int32) error {
|
||||
_, err := q.db.Exec(ctx, deleteTrackLookupByAlbum, albumID)
|
||||
return err
|
||||
}
|
||||
|
||||
const deleteTrackLookupByArtist = `-- name: DeleteTrackLookupByArtist :exec
|
||||
DELETE FROM track_lookup WHERE artist_id = $1
|
||||
`
|
||||
|
||||
func (q *Queries) DeleteTrackLookupByArtist(ctx context.Context, artistID int32) error {
|
||||
_, err := q.db.Exec(ctx, deleteTrackLookupByArtist, artistID)
|
||||
return err
|
||||
}
|
||||
|
||||
const deleteTrackLookupByTrack = `-- name: DeleteTrackLookupByTrack :exec
|
||||
DELETE FROM track_lookup WHERE track_id = $1
|
||||
`
|
||||
|
||||
func (q *Queries) DeleteTrackLookupByTrack(ctx context.Context, trackID int32) error {
|
||||
_, err := q.db.Exec(ctx, deleteTrackLookupByTrack, trackID)
|
||||
return err
|
||||
}
|
||||
|
||||
const getTrackLookup = `-- name: GetTrackLookup :one
|
||||
SELECT artist_id, album_id, track_id
|
||||
FROM track_lookup
|
||||
WHERE lookup_key = $1
|
||||
`
|
||||
|
||||
type GetTrackLookupRow struct {
|
||||
ArtistID int32
|
||||
AlbumID int32
|
||||
TrackID int32
|
||||
}
|
||||
|
||||
func (q *Queries) GetTrackLookup(ctx context.Context, lookupKey string) (GetTrackLookupRow, error) {
|
||||
row := q.db.QueryRow(ctx, getTrackLookup, lookupKey)
|
||||
var i GetTrackLookupRow
|
||||
err := row.Scan(&i.ArtistID, &i.AlbumID, &i.TrackID)
|
||||
return i, err
|
||||
}
|
||||
|
||||
const insertTrackLookup = `-- name: InsertTrackLookup :exec
|
||||
INSERT INTO track_lookup (lookup_key, artist_id, album_id, track_id)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
ON CONFLICT (lookup_key) DO UPDATE SET
|
||||
artist_id = EXCLUDED.artist_id,
|
||||
album_id = EXCLUDED.album_id,
|
||||
track_id = EXCLUDED.track_id
|
||||
`
|
||||
|
||||
type InsertTrackLookupParams struct {
|
||||
LookupKey string
|
||||
ArtistID int32
|
||||
AlbumID int32
|
||||
TrackID int32
|
||||
}
|
||||
|
||||
func (q *Queries) InsertTrackLookup(ctx context.Context, arg InsertTrackLookupParams) error {
|
||||
_, err := q.db.Exec(ctx, insertTrackLookup,
|
||||
arg.LookupKey,
|
||||
arg.ArtistID,
|
||||
arg.AlbumID,
|
||||
arg.TrackID,
|
||||
)
|
||||
return err
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue