mirror of
https://github.com/gabehf/Koito.git
synced 2026-04-22 12:01:52 -07:00
Add bulk import optimization: track_lookup cache, batch inserts, BulkSubmitter
Adopts ListenBrainz-inspired patterns to speed up imports from ~24h to under 30 minutes for 49k scrobbles. Phase 1 - track_lookup cache table: - New migration (000006) adds persistent entity lookup cache - Maps normalized (artist, track, album) → (artist_id, album_id, track_id) - SubmitListen fast path: cache hit skips 18 DB queries → 2 queries - Cache populated after entity resolution, invalidated on merge/delete - Benefits both live scrobbles and imports Phase 2 - SaveListensBatch: - New batch listen insert using pgx CopyFrom → temp table → INSERT ON CONFLICT - Thousands of inserts per second vs one-at-a-time Phase 3 - BulkSubmitter: - Reusable import accelerator for all importers - Pre-deduplicates scrobbles by (artist, track, album) in memory - Worker pool (4 goroutines) for parallel entity creation on cache miss - Batch listen insertion via SaveListensBatch Phase 4 - Migrate importers: - Maloja, Spotify, LastFM, ListenBrainz importers use BulkSubmitter - Koito importer left as-is (already fast with pre-resolved IDs) Phase 5 - Skip image lookups during import: - GetArtistImage/GetAlbumImage calls fully skipped when SkipCacheImage=true - Background tasks (FetchMissingArtistImages/FetchMissingAlbumImages) backfill Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c92e93484e
commit
8ce6ec494d
21 changed files with 1294 additions and 129 deletions
|
|
@ -122,17 +122,18 @@ func createOrUpdateAlbumWithMbzReleaseID(ctx context.Context, d db.DB, opts Asso
|
|||
}
|
||||
}
|
||||
|
||||
l.Debug().Msg("Searching for album images...")
|
||||
var imgid uuid.UUID
|
||||
imgUrl, err := images.GetAlbumImage(ctx, images.AlbumImageOpts{
|
||||
Artists: utils.UniqueIgnoringCase(slices.Concat(utils.FlattenMbzArtistCreditNames(release.ArtistCredit), utils.FlattenArtistNames(opts.Artists))),
|
||||
Album: release.Title,
|
||||
ReleaseMbzID: &opts.ReleaseMbzID,
|
||||
})
|
||||
var imgUrl string
|
||||
if !opts.SkipCacheImage {
|
||||
l.Debug().Msg("Searching for album images...")
|
||||
imgUrl, err = images.GetAlbumImage(ctx, images.AlbumImageOpts{
|
||||
Artists: utils.UniqueIgnoringCase(slices.Concat(utils.FlattenMbzArtistCreditNames(release.ArtistCredit), utils.FlattenArtistNames(opts.Artists))),
|
||||
Album: release.Title,
|
||||
ReleaseMbzID: &opts.ReleaseMbzID,
|
||||
})
|
||||
|
||||
if err == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
if !opts.SkipCacheImage {
|
||||
if err == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
var size ImageSize
|
||||
if cfg.FullImageCacheEnabled() {
|
||||
size = ImageSizeFull
|
||||
|
|
@ -144,13 +145,11 @@ func createOrUpdateAlbumWithMbzReleaseID(ctx context.Context, d db.DB, opts Asso
|
|||
if err != nil {
|
||||
l.Err(err).Msg("createOrUpdateAlbumWithMbzReleaseID: failed to cache image")
|
||||
}
|
||||
} else if err != nil {
|
||||
l.Debug().Msgf("createOrUpdateAlbumWithMbzReleaseID: failed to get album images for %s: %s", release.Title, err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
l.Debug().Msgf("createOrUpdateAlbumWithMbzReleaseID: failed to get album images for %s: %s", release.Title, err.Error())
|
||||
}
|
||||
|
||||
album, err = d.SaveAlbum(ctx, db.SaveAlbumOpts{
|
||||
Title: release.Title,
|
||||
MusicBrainzID: opts.ReleaseMbzID,
|
||||
|
|
@ -217,14 +216,15 @@ func matchAlbumByTitle(ctx context.Context, d db.DB, opts AssociateAlbumOpts) (*
|
|||
return nil, fmt.Errorf("matchAlbumByTitle: %w", err)
|
||||
} else {
|
||||
var imgid uuid.UUID
|
||||
imgUrl, err := images.GetAlbumImage(ctx, images.AlbumImageOpts{
|
||||
Artists: utils.FlattenArtistNames(opts.Artists),
|
||||
Album: opts.ReleaseName,
|
||||
ReleaseMbzID: &opts.ReleaseMbzID,
|
||||
})
|
||||
if err == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
if !opts.SkipCacheImage {
|
||||
var imgUrl string
|
||||
if !opts.SkipCacheImage {
|
||||
imgUrl, err = images.GetAlbumImage(ctx, images.AlbumImageOpts{
|
||||
Artists: utils.FlattenArtistNames(opts.Artists),
|
||||
Album: opts.ReleaseName,
|
||||
ReleaseMbzID: &opts.ReleaseMbzID,
|
||||
})
|
||||
if err == nil && imgUrl != "" {
|
||||
imgid = uuid.New()
|
||||
var size ImageSize
|
||||
if cfg.FullImageCacheEnabled() {
|
||||
size = ImageSizeFull
|
||||
|
|
@ -234,13 +234,12 @@ func matchAlbumByTitle(ctx context.Context, d db.DB, opts AssociateAlbumOpts) (*
|
|||
l.Debug().Msg("Downloading album image from source...")
|
||||
err = DownloadAndCacheImage(ctx, imgid, imgUrl, size)
|
||||
if err != nil {
|
||||
l.Err(err).Msg("createOrUpdateAlbumWithMbzReleaseID: failed to cache image")
|
||||
l.Err(err).Msg("matchAlbumByTitle: failed to cache image")
|
||||
}
|
||||
} else if err != nil {
|
||||
l.Debug().AnErr("error", err).Msgf("matchAlbumByTitle: failed to get album images for %s", opts.ReleaseName)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
l.Debug().AnErr("error", err).Msgf("matchAlbumByTitle: failed to get album images for %s", opts.ReleaseName)
|
||||
}
|
||||
|
||||
a, err = d.SaveAlbum(ctx, db.SaveAlbumOpts{
|
||||
Title: releaseName,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue