mirror of
https://github.com/gabehf/Koito.git
synced 2026-04-22 12:01:52 -07:00
Add bulk import optimization: track_lookup cache, batch inserts, BulkSubmitter
Adopts ListenBrainz-inspired patterns to speed up imports from ~24h to under 30 minutes for 49k scrobbles. Phase 1 - track_lookup cache table: - New migration (000006) adds persistent entity lookup cache - Maps normalized (artist, track, album) → (artist_id, album_id, track_id) - SubmitListen fast path: cache hit skips 18 DB queries → 2 queries - Cache populated after entity resolution, invalidated on merge/delete - Benefits both live scrobbles and imports Phase 2 - SaveListensBatch: - New batch listen insert using pgx CopyFrom → temp table → INSERT ON CONFLICT - Thousands of inserts per second vs one-at-a-time Phase 3 - BulkSubmitter: - Reusable import accelerator for all importers - Pre-deduplicates scrobbles by (artist, track, album) in memory - Worker pool (4 goroutines) for parallel entity creation on cache miss - Batch listen insertion via SaveListensBatch Phase 4 - Migrate importers: - Maloja, Spotify, LastFM, ListenBrainz importers use BulkSubmitter - Koito importer left as-is (already fast with pre-resolved IDs) Phase 5 - Skip image lookups during import: - GetArtistImage/GetAlbumImage calls fully skipped when SkipCacheImage=true - Background tasks (FetchMissingArtistImages/FetchMissingAlbumImages) backfill Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c92e93484e
commit
8ce6ec494d
21 changed files with 1294 additions and 129 deletions
|
|
@ -44,12 +44,6 @@ func ImportMalojaFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrainzCall
|
|||
return fmt.Errorf("ImportMalojaFile: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
var throttleFunc = func() {}
|
||||
if ms := cfg.ThrottleImportMs(); ms > 0 {
|
||||
throttleFunc = func() {
|
||||
time.Sleep(time.Duration(ms) * time.Millisecond)
|
||||
}
|
||||
}
|
||||
export := new(MalojaFile)
|
||||
err = json.NewDecoder(file).Decode(&export)
|
||||
if err != nil {
|
||||
|
|
@ -59,12 +53,14 @@ func ImportMalojaFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrainzCall
|
|||
if len(items) == 0 {
|
||||
items = export.List
|
||||
}
|
||||
count := 0
|
||||
total := len(items)
|
||||
for i, item := range items {
|
||||
|
||||
bs := NewBulkSubmitter(ctx, BulkSubmitterOpts{
|
||||
Store: store,
|
||||
Mbzc: mbzc,
|
||||
})
|
||||
|
||||
for _, item := range items {
|
||||
martists := make([]string, 0)
|
||||
// Maloja has a tendency to have the the artist order ['feature', 'main ● feature'], so
|
||||
// here we try to turn that artist array into ['main', 'feature']
|
||||
item.Track.Artists = utils.MoveFirstMatchToFront(item.Track.Artists, " \u2022 ")
|
||||
for _, an := range item.Track.Artists {
|
||||
ans := strings.Split(an, " \u2022 ")
|
||||
|
|
@ -77,14 +73,13 @@ func ImportMalojaFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrainzCall
|
|||
}
|
||||
ts := time.Unix(item.Time, 0)
|
||||
if !inImportTimeWindow(ts) {
|
||||
l.Debug().Msgf("Skipping import due to import time rules")
|
||||
continue
|
||||
}
|
||||
releaseTitle := ""
|
||||
if item.Track.Album != nil {
|
||||
releaseTitle = item.Track.Album.Title
|
||||
}
|
||||
opts := catalog.SubmitListenOpts{
|
||||
bs.Accept(catalog.SubmitListenOpts{
|
||||
MbzCaller: mbzc,
|
||||
Artist: item.Track.Artists[0],
|
||||
ArtistNames: artists,
|
||||
|
|
@ -93,18 +88,13 @@ func ImportMalojaFile(ctx context.Context, store db.DB, mbzc mbz.MusicBrainzCall
|
|||
Time: ts.Local(),
|
||||
Client: "maloja",
|
||||
UserID: 1,
|
||||
SkipCacheImage: !cfg.FetchImagesDuringImport(),
|
||||
}
|
||||
err = catalog.SubmitListen(ctx, store, opts)
|
||||
if err != nil {
|
||||
l.Err(err).Msgf("Failed to import maloja item %d/%d", i+1, total)
|
||||
continue
|
||||
}
|
||||
count++
|
||||
if count%500 == 0 {
|
||||
l.Info().Msgf("Maloja import progress: %d/%d", count, total)
|
||||
}
|
||||
throttleFunc()
|
||||
SkipCacheImage: true,
|
||||
})
|
||||
}
|
||||
|
||||
count, err := bs.Flush()
|
||||
if err != nil {
|
||||
return fmt.Errorf("ImportMalojaFile: %w", err)
|
||||
}
|
||||
return finishImport(ctx, filename, count)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue