Add bulk import optimization: track_lookup cache, batch inserts, BulkSubmitter

Adopts ListenBrainz-inspired patterns to speed up imports from ~24h to
under 30 minutes for 49k scrobbles.

Phase 1 - track_lookup cache table:
- New migration (000006) adds persistent entity lookup cache
- Maps normalized (artist, track, album) → (artist_id, album_id, track_id)
- SubmitListen fast path: cache hit skips 18 DB queries → 2 queries
- Cache populated after entity resolution, invalidated on merge/delete
- Benefits both live scrobbles and imports

Phase 2 - SaveListensBatch:
- New batch listen insert using pgx CopyFrom → temp table → INSERT ON CONFLICT
- Thousands of inserts per second vs one-at-a-time

Phase 3 - BulkSubmitter:
- Reusable import accelerator for all importers
- Pre-deduplicates scrobbles by (artist, track, album) in memory
- Worker pool (4 goroutines) for parallel entity creation on cache miss
- Batch listen insertion via SaveListensBatch

Phase 4 - Migrate importers:
- Maloja, Spotify, LastFM, ListenBrainz importers use BulkSubmitter
- Koito importer left as-is (already fast with pre-resolved IDs)

Phase 5 - Skip image lookups during import:
- GetArtistImage/GetAlbumImage calls fully skipped when SkipCacheImage=true
- Background tasks (FetchMissingArtistImages/FetchMissingAlbumImages) backfill

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
safierinx-a 2026-03-25 04:17:50 +05:30
parent c92e93484e
commit 8ce6ec494d
21 changed files with 1294 additions and 129 deletions

View file

@ -127,12 +127,14 @@ func matchArtistsByMBIDMappings(ctx context.Context, d db.DB, opts AssociateArti
l.Warn().AnErr("error", err).Msg("matchArtistsByMBIDMappings: MusicBrainz unreachable, creating new artist with provided MusicBrainz ID mapping")
var imgid uuid.UUID
imgUrl, imgErr := images.GetArtistImage(ctx, images.ArtistImageOpts{
Aliases: []string{a.Artist},
})
if imgErr == nil && imgUrl != "" {
imgid = uuid.New()
if !opts.SkipCacheImage {
var imgUrl string
if !opts.SkipCacheImage {
var imgErr error
imgUrl, imgErr = images.GetArtistImage(ctx, images.ArtistImageOpts{
Aliases: []string{a.Artist},
})
if imgErr == nil && imgUrl != "" {
imgid = uuid.New()
var size ImageSize
if cfg.FullImageCacheEnabled() {
size = ImageSizeFull
@ -144,9 +146,9 @@ func matchArtistsByMBIDMappings(ctx context.Context, d db.DB, opts AssociateArti
if err != nil {
l.Err(err).Msg("Failed to cache image")
}
} else if imgErr != nil {
l.Err(imgErr).Msgf("matchArtistsByMBIDMappings: Failed to get artist image for artist '%s'", a.Artist)
}
} else {
l.Err(imgErr).Msgf("matchArtistsByMBIDMappings: Failed to get artist image for artist '%s'", a.Artist)
}
artist, err = d.SaveArtist(ctx, db.SaveArtistOpts{
@ -246,12 +248,13 @@ func resolveAliasOrCreateArtist(ctx context.Context, mbzID uuid.UUID, names []st
}
var imgid uuid.UUID
imgUrl, err := images.GetArtistImage(ctx, images.ArtistImageOpts{
Aliases: aliases,
})
if err == nil && imgUrl != "" {
imgid = uuid.New()
if !opts.SkipCacheImage {
var imgUrl string
if !opts.SkipCacheImage {
imgUrl, err = images.GetArtistImage(ctx, images.ArtistImageOpts{
Aliases: aliases,
})
if err == nil && imgUrl != "" {
imgid = uuid.New()
var size ImageSize
if cfg.FullImageCacheEnabled() {
size = ImageSizeFull
@ -263,9 +266,9 @@ func resolveAliasOrCreateArtist(ctx context.Context, mbzID uuid.UUID, names []st
if err != nil {
l.Err(err).Msg("Failed to cache image")
}
} else if err != nil {
l.Warn().AnErr("error", err).Msg("Failed to get artist image from ImageSrc")
}
} else if err != nil {
l.Warn().AnErr("error", err).Msg("Failed to get artist image from ImageSrc")
}
u, err := d.SaveArtist(ctx, db.SaveArtistOpts{
@ -301,12 +304,13 @@ func matchArtistsByNames(ctx context.Context, names []string, existing []*models
}
if errors.Is(err, pgx.ErrNoRows) {
var imgid uuid.UUID
imgUrl, err := images.GetArtistImage(ctx, images.ArtistImageOpts{
Aliases: []string{name},
})
if err == nil && imgUrl != "" {
imgid = uuid.New()
if !opts.SkipCacheImage {
var imgUrl string
if !opts.SkipCacheImage {
imgUrl, err = images.GetArtistImage(ctx, images.ArtistImageOpts{
Aliases: []string{name},
})
if err == nil && imgUrl != "" {
imgid = uuid.New()
var size ImageSize
if cfg.FullImageCacheEnabled() {
size = ImageSizeFull
@ -318,9 +322,9 @@ func matchArtistsByNames(ctx context.Context, names []string, existing []*models
if err != nil {
l.Err(err).Msg("Failed to cache image")
}
} else if err != nil {
l.Debug().AnErr("error", err).Msgf("Failed to get artist images for %s", name)
}
} else if err != nil {
l.Debug().AnErr("error", err).Msgf("Failed to get artist images for %s", name)
}
a, err = d.SaveArtist(ctx, db.SaveArtistOpts{Name: name, Image: imgid, ImageSrc: imgUrl})
if err != nil {