Add bulk import optimization: track_lookup cache, batch inserts, BulkSubmitter

Adopts ListenBrainz-inspired patterns to speed up imports from ~24h to
under 30 minutes for 49k scrobbles.

Phase 1 - track_lookup cache table:
- New migration (000006) adds persistent entity lookup cache
- Maps normalized (artist, track, album) → (artist_id, album_id, track_id)
- SubmitListen fast path: cache hit skips 18 DB queries → 2 queries
- Cache populated after entity resolution, invalidated on merge/delete
- Benefits both live scrobbles and imports

Phase 2 - SaveListensBatch:
- New batch listen insert using pgx CopyFrom → temp table → INSERT ON CONFLICT
- Thousands of inserts per second vs one-at-a-time

Phase 3 - BulkSubmitter:
- Reusable import accelerator for all importers
- Pre-deduplicates scrobbles by (artist, track, album) in memory
- Worker pool (4 goroutines) for parallel entity creation on cache miss
- Batch listen insertion via SaveListensBatch

Phase 4 - Migrate importers:
- Maloja, Spotify, LastFM, ListenBrainz importers use BulkSubmitter
- Koito importer left as-is (already fast with pre-resolved IDs)

Phase 5 - Skip image lookups during import:
- GetArtistImage/GetAlbumImage calls fully skipped when SkipCacheImage=true
- Background tasks (FetchMissingArtistImages/FetchMissingAlbumImages) backfill

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
safierinx-a 2026-03-25 04:17:50 +05:30
parent c92e93484e
commit 8ce6ec494d
21 changed files with 1294 additions and 129 deletions

View file

@ -44,12 +44,6 @@ func ImportMalojaFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrainzCall
return fmt.Errorf("ImportMalojaFile: %w", err)
}
defer file.Close()
var throttleFunc = func() {}
if ms := cfg.ThrottleImportMs(); ms > 0 {
throttleFunc = func() {
time.Sleep(time.Duration(ms) * time.Millisecond)
}
}
export := new(MalojaFile)
err = json.NewDecoder(file).Decode(&export)
if err != nil {
@ -59,12 +53,14 @@ func ImportMalojaFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrainzCall
if len(items) == 0 {
items = export.List
}
count := 0
total := len(items)
for i, item := range items {
bs := NewBulkSubmitter(ctx, BulkSubmitterOpts{
Store: store,
Mbzc: mbzc,
})
for _, item := range items {
martists := make([]string, 0)
// Maloja has a tendency to have the the artist order ['feature', 'main ● feature'], so
// here we try to turn that artist array into ['main', 'feature']
item.Track.Artists = utils.MoveFirstMatchToFront(item.Track.Artists, " \u2022 ")
for _, an := range item.Track.Artists {
ans := strings.Split(an, " \u2022 ")
@ -77,14 +73,13 @@ func ImportMalojaFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrainzCall
}
ts := time.Unix(item.Time, 0)
if !inImportTimeWindow(ts) {
l.Debug().Msgf("Skipping import due to import time rules")
continue
}
releaseTitle := ""
if item.Track.Album != nil {
releaseTitle = item.Track.Album.Title
}
opts := catalog.SubmitListenOpts{
bs.Accept(catalog.SubmitListenOpts{
MbzCaller: mbzc,
Artist: item.Track.Artists[0],
ArtistNames: artists,
@ -93,18 +88,13 @@ func ImportMalojaFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrainzCall
Time: ts.Local(),
Client: "maloja",
UserID: 1,
SkipCacheImage: !cfg.FetchImagesDuringImport(),
}
err = catalog.SubmitListen(ctx, store, opts)
if err != nil {
l.Err(err).Msgf("Failed to import maloja item %d/%d", i+1, total)
continue
}
count++
if count%500 == 0 {
l.Info().Msgf("Maloja import progress: %d/%d", count, total)
}
throttleFunc()
SkipCacheImage: true,
})
}
count, err := bs.Flush()
if err != nil {
return fmt.Errorf("ImportMalojaFile: %w", err)
}
return finishImport(ctx, filename, count)
}