Add bulk import optimization: track_lookup cache, batch inserts, BulkSubmitter

Adopts ListenBrainz-inspired patterns to speed up imports from ~24h to
under 30 minutes for 49k scrobbles.

Phase 1 - track_lookup cache table:
- New migration (000006) adds persistent entity lookup cache
- Maps normalized (artist, track, album) → (artist_id, album_id, track_id)
- SubmitListen fast path: cache hit skips 18 DB queries → 2 queries
- Cache populated after entity resolution, invalidated on merge/delete
- Benefits both live scrobbles and imports

Phase 2 - SaveListensBatch:
- New batch listen insert using pgx CopyFrom → temp table → INSERT ON CONFLICT
- Thousands of inserts per second vs one-at-a-time

Phase 3 - BulkSubmitter:
- Reusable import accelerator for all importers
- Pre-deduplicates scrobbles by (artist, track, album) in memory
- Worker pool (4 goroutines) for parallel entity creation on cache miss
- Batch listen insertion via SaveListensBatch

Phase 4 - Migrate importers:
- Maloja, Spotify, LastFM, ListenBrainz importers use BulkSubmitter
- Koito importer left as-is (already fast with pre-resolved IDs)

Phase 5 - Skip image lookups during import:
- GetArtistImage/GetAlbumImage calls fully skipped when SkipCacheImage=true
- Background tasks (FetchMissingArtistImages/FetchMissingAlbumImages) backfill

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
safierinx-a 2026-03-25 04:17:50 +05:30
parent c92e93484e
commit 8ce6ec494d
21 changed files with 1294 additions and 129 deletions

View file

@ -0,0 +1,82 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.28.0
// source: track_lookup.sql
package repository
import (
"context"
)
const deleteTrackLookupByAlbum = `-- name: DeleteTrackLookupByAlbum :exec
DELETE FROM track_lookup WHERE album_id = $1
`
func (q *Queries) DeleteTrackLookupByAlbum(ctx context.Context, albumID int32) error {
_, err := q.db.Exec(ctx, deleteTrackLookupByAlbum, albumID)
return err
}
const deleteTrackLookupByArtist = `-- name: DeleteTrackLookupByArtist :exec
DELETE FROM track_lookup WHERE artist_id = $1
`
func (q *Queries) DeleteTrackLookupByArtist(ctx context.Context, artistID int32) error {
_, err := q.db.Exec(ctx, deleteTrackLookupByArtist, artistID)
return err
}
const deleteTrackLookupByTrack = `-- name: DeleteTrackLookupByTrack :exec
DELETE FROM track_lookup WHERE track_id = $1
`
func (q *Queries) DeleteTrackLookupByTrack(ctx context.Context, trackID int32) error {
_, err := q.db.Exec(ctx, deleteTrackLookupByTrack, trackID)
return err
}
const getTrackLookup = `-- name: GetTrackLookup :one
SELECT artist_id, album_id, track_id
FROM track_lookup
WHERE lookup_key = $1
`
type GetTrackLookupRow struct {
ArtistID int32
AlbumID int32
TrackID int32
}
func (q *Queries) GetTrackLookup(ctx context.Context, lookupKey string) (GetTrackLookupRow, error) {
row := q.db.QueryRow(ctx, getTrackLookup, lookupKey)
var i GetTrackLookupRow
err := row.Scan(&i.ArtistID, &i.AlbumID, &i.TrackID)
return i, err
}
const insertTrackLookup = `-- name: InsertTrackLookup :exec
INSERT INTO track_lookup (lookup_key, artist_id, album_id, track_id)
VALUES ($1, $2, $3, $4)
ON CONFLICT (lookup_key) DO UPDATE SET
artist_id = EXCLUDED.artist_id,
album_id = EXCLUDED.album_id,
track_id = EXCLUDED.track_id
`
type InsertTrackLookupParams struct {
LookupKey string
ArtistID int32
AlbumID int32
TrackID int32
}
func (q *Queries) InsertTrackLookup(ctx context.Context, arg InsertTrackLookupParams) error {
_, err := q.db.Exec(ctx, insertTrackLookup,
arg.LookupKey,
arg.ArtistID,
arg.AlbumID,
arg.TrackID,
)
return err
}