mirror of
https://github.com/gabehf/Koito.git
synced 2026-04-22 12:01:52 -07:00
Add bulk import optimization: track_lookup cache, batch inserts, BulkSubmitter
Adopts ListenBrainz-inspired patterns to speed up imports from ~24h to under 30 minutes for 49k scrobbles. Phase 1 - track_lookup cache table: - New migration (000006) adds persistent entity lookup cache - Maps normalized (artist, track, album) → (artist_id, album_id, track_id) - SubmitListen fast path: cache hit skips 18 DB queries → 2 queries - Cache populated after entity resolution, invalidated on merge/delete - Benefits both live scrobbles and imports Phase 2 - SaveListensBatch: - New batch listen insert using pgx CopyFrom → temp table → INSERT ON CONFLICT - Thousands of inserts per second vs one-at-a-time Phase 3 - BulkSubmitter: - Reusable import accelerator for all importers - Pre-deduplicates scrobbles by (artist, track, album) in memory - Worker pool (4 goroutines) for parallel entity creation on cache miss - Batch listen insertion via SaveListensBatch Phase 4 - Migrate importers: - Maloja, Spotify, LastFM, ListenBrainz importers use BulkSubmitter - Koito importer left as-is (already fast with pre-resolved IDs) Phase 5 - Skip image lookups during import: - GetArtistImage/GetAlbumImage calls fully skipped when SkipCacheImage=true - Background tasks (FetchMissingArtistImages/FetchMissingAlbumImages) backfill Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c92e93484e
commit
8ce6ec494d
21 changed files with 1294 additions and 129 deletions
52
internal/db/psql/track_lookup.go
Normal file
52
internal/db/psql/track_lookup.go
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
package psql
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/gabehf/koito/internal/db"
|
||||
"github.com/gabehf/koito/internal/repository"
|
||||
"github.com/jackc/pgx/v5"
|
||||
)
|
||||
|
||||
func (d *Psql) GetTrackLookup(ctx context.Context, key string) (*db.TrackLookupResult, error) {
|
||||
row, err := d.q.GetTrackLookup(ctx, key)
|
||||
if err != nil {
|
||||
if err == pgx.ErrNoRows {
|
||||
return nil, err
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &db.TrackLookupResult{
|
||||
ArtistID: row.ArtistID,
|
||||
AlbumID: row.AlbumID,
|
||||
TrackID: row.TrackID,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (d *Psql) SaveTrackLookup(ctx context.Context, opts db.SaveTrackLookupOpts) error {
|
||||
return d.q.InsertTrackLookup(ctx, repository.InsertTrackLookupParams{
|
||||
LookupKey: opts.Key,
|
||||
ArtistID: opts.ArtistID,
|
||||
AlbumID: opts.AlbumID,
|
||||
TrackID: opts.TrackID,
|
||||
})
|
||||
}
|
||||
|
||||
func (d *Psql) InvalidateTrackLookup(ctx context.Context, opts db.InvalidateTrackLookupOpts) error {
|
||||
if opts.ArtistID != 0 {
|
||||
if err := d.q.DeleteTrackLookupByArtist(ctx, opts.ArtistID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if opts.AlbumID != 0 {
|
||||
if err := d.q.DeleteTrackLookupByAlbum(ctx, opts.AlbumID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if opts.TrackID != 0 {
|
||||
if err := d.q.DeleteTrackLookupByTrack(ctx, opts.TrackID); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue