mirror of
https://github.com/gabehf/Koito.git
synced 2026-04-22 12:01:52 -07:00
Adopts ListenBrainz-inspired patterns to speed up imports from ~24h to under 30 minutes for 49k scrobbles. Phase 1 - track_lookup cache table: - New migration (000006) adds persistent entity lookup cache - Maps normalized (artist, track, album) → (artist_id, album_id, track_id) - SubmitListen fast path: cache hit skips 18 DB queries → 2 queries - Cache populated after entity resolution, invalidated on merge/delete - Benefits both live scrobbles and imports Phase 2 - SaveListensBatch: - New batch listen insert using pgx CopyFrom → temp table → INSERT ON CONFLICT - Thousands of inserts per second vs one-at-a-time Phase 3 - BulkSubmitter: - Reusable import accelerator for all importers - Pre-deduplicates scrobbles by (artist, track, album) in memory - Worker pool (4 goroutines) for parallel entity creation on cache miss - Batch listen insertion via SaveListensBatch Phase 4 - Migrate importers: - Maloja, Spotify, LastFM, ListenBrainz importers use BulkSubmitter - Koito importer left as-is (already fast with pre-resolved IDs) Phase 5 - Skip image lookups during import: - GetArtistImage/GetAlbumImage calls fully skipped when SkipCacheImage=true - Background tasks (FetchMissingArtistImages/FetchMissingAlbumImages) backfill Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
185 lines
5.3 KiB
Go
185 lines
5.3 KiB
Go
package psql
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
|
|
"github.com/gabehf/koito/internal/logger"
|
|
"github.com/gabehf/koito/internal/repository"
|
|
"github.com/jackc/pgx/v5"
|
|
)
|
|
|
|
func (d *Psql) MergeTracks(ctx context.Context, fromId, toId int32) error {
|
|
l := logger.FromContext(ctx)
|
|
l.Info().Msgf("Merging track %d into track %d", fromId, toId)
|
|
d.q.DeleteTrackLookupByTrack(ctx, fromId)
|
|
d.q.DeleteTrackLookupByTrack(ctx, toId)
|
|
tx, err := d.conn.BeginTx(ctx, pgx.TxOptions{})
|
|
if err != nil {
|
|
l.Err(err).Msg("Failed to begin transaction")
|
|
return fmt.Errorf("MergeTracks: %w", err)
|
|
}
|
|
defer tx.Rollback(ctx)
|
|
qtx := d.q.WithTx(tx)
|
|
from, err := qtx.GetTrack(ctx, fromId)
|
|
if err != nil {
|
|
return fmt.Errorf("MergeTracks: GetTrack: %w", err)
|
|
}
|
|
to, err := qtx.GetTrack(ctx, toId)
|
|
if err != nil {
|
|
return fmt.Errorf("MergeTracks: GetTrack: %w", err)
|
|
}
|
|
err = qtx.UpdateTrackIdForListens(ctx, repository.UpdateTrackIdForListensParams{
|
|
TrackID: fromId,
|
|
TrackID_2: toId,
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("MergeTracks: UpdateTrackIdForListens: %w", err)
|
|
}
|
|
if from.ReleaseID != to.ReleaseID {
|
|
// tracks are from different releases, track artist should be associated with to.release
|
|
artists, err := qtx.GetTrackArtists(ctx, fromId)
|
|
if err != nil {
|
|
return fmt.Errorf("MergeTracks: GetTrackArtists: %w", err)
|
|
}
|
|
for _, artist := range artists {
|
|
err = qtx.AssociateArtistToRelease(ctx, repository.AssociateArtistToReleaseParams{
|
|
ArtistID: artist.ID,
|
|
ReleaseID: to.ReleaseID,
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("MergeTracks: AssociateArtistToRelease: %w", err)
|
|
}
|
|
}
|
|
}
|
|
err = qtx.CleanOrphanedEntries(ctx)
|
|
if err != nil {
|
|
l.Err(err).Msg("MergeTracks: Failed to clean orphaned entries")
|
|
return err
|
|
}
|
|
return tx.Commit(ctx)
|
|
}
|
|
|
|
func (d *Psql) MergeAlbums(ctx context.Context, fromId, toId int32, replaceImage bool) error {
|
|
l := logger.FromContext(ctx)
|
|
l.Info().Msgf("Merging album %d into album %d", fromId, toId)
|
|
d.q.DeleteTrackLookupByAlbum(ctx, fromId)
|
|
d.q.DeleteTrackLookupByAlbum(ctx, toId)
|
|
tx, err := d.conn.BeginTx(ctx, pgx.TxOptions{})
|
|
if err != nil {
|
|
l.Err(err).Msg("Failed to begin transaction")
|
|
return fmt.Errorf("MergeAlbums: %w", err)
|
|
}
|
|
defer tx.Rollback(ctx)
|
|
qtx := d.q.WithTx(tx)
|
|
|
|
fromArtists, err := qtx.GetReleaseArtists(ctx, fromId)
|
|
if err != nil {
|
|
return fmt.Errorf("MergeAlbums: GetReleaseArtists: %w", err)
|
|
}
|
|
|
|
err = qtx.UpdateReleaseForAll(ctx, repository.UpdateReleaseForAllParams{
|
|
ReleaseID: fromId,
|
|
ReleaseID_2: toId,
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("MergeAlbums: %w", err)
|
|
}
|
|
if replaceImage {
|
|
old, err := qtx.GetRelease(ctx, fromId)
|
|
if err != nil {
|
|
return fmt.Errorf("MergeAlbums: %w", err)
|
|
}
|
|
err = qtx.UpdateReleaseImage(ctx, repository.UpdateReleaseImageParams{
|
|
ID: toId,
|
|
Image: old.Image,
|
|
ImageSource: old.ImageSource,
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("MergeAlbums: %w", err)
|
|
}
|
|
}
|
|
|
|
for _, artist := range fromArtists {
|
|
err = qtx.AssociateArtistToRelease(ctx, repository.AssociateArtistToReleaseParams{
|
|
ArtistID: artist.ID,
|
|
ReleaseID: toId,
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("MergeAlbums: AssociateArtistToRelease: %w", err)
|
|
}
|
|
}
|
|
|
|
err = qtx.CleanOrphanedEntries(ctx)
|
|
if err != nil {
|
|
l.Err(err).Msg("Failed to clean orphaned entries")
|
|
return fmt.Errorf("MergeAlbums: CleanOrphanedEntries: %w", err)
|
|
}
|
|
return tx.Commit(ctx)
|
|
}
|
|
|
|
func (d *Psql) MergeArtists(ctx context.Context, fromId, toId int32, replaceImage bool) error {
|
|
l := logger.FromContext(ctx)
|
|
l.Info().Msgf("Merging artist %d into artist %d", fromId, toId)
|
|
d.q.DeleteTrackLookupByArtist(ctx, fromId)
|
|
d.q.DeleteTrackLookupByArtist(ctx, toId)
|
|
tx, err := d.conn.BeginTx(ctx, pgx.TxOptions{})
|
|
if err != nil {
|
|
l.Err(err).Msg("Failed to begin transaction")
|
|
return fmt.Errorf("MergeArtists: %w", err)
|
|
}
|
|
defer tx.Rollback(ctx)
|
|
qtx := d.q.WithTx(tx)
|
|
err = qtx.DeleteConflictingArtistTracks(ctx, repository.DeleteConflictingArtistTracksParams{
|
|
ArtistID: fromId,
|
|
ArtistID_2: toId,
|
|
})
|
|
if err != nil {
|
|
l.Err(err).Msg("Failed to delete conflicting artist tracks")
|
|
return fmt.Errorf("MergeArtists: %w", err)
|
|
}
|
|
err = qtx.DeleteConflictingArtistReleases(ctx, repository.DeleteConflictingArtistReleasesParams{
|
|
ArtistID: fromId,
|
|
ArtistID_2: toId,
|
|
})
|
|
if err != nil {
|
|
l.Err(err).Msg("Failed to delete conflicting artist releases")
|
|
return fmt.Errorf("MergeArtists: %w", err)
|
|
}
|
|
err = qtx.UpdateArtistTracks(ctx, repository.UpdateArtistTracksParams{
|
|
ArtistID: fromId,
|
|
ArtistID_2: toId,
|
|
})
|
|
if err != nil {
|
|
l.Err(err).Msg("Failed to update artist tracks")
|
|
return fmt.Errorf("MergeArtists: %w", err)
|
|
}
|
|
err = qtx.UpdateArtistReleases(ctx, repository.UpdateArtistReleasesParams{
|
|
ArtistID: fromId,
|
|
ArtistID_2: toId,
|
|
})
|
|
if err != nil {
|
|
l.Err(err).Msg("Failed to update artist releases")
|
|
return fmt.Errorf("MergeArtists: %w", err)
|
|
}
|
|
if replaceImage {
|
|
old, err := qtx.GetArtist(ctx, fromId)
|
|
if err != nil {
|
|
return fmt.Errorf("MergeAlbums: %w", err)
|
|
}
|
|
err = qtx.UpdateArtistImage(ctx, repository.UpdateArtistImageParams{
|
|
ID: toId,
|
|
Image: old.Image,
|
|
ImageSource: old.ImageSource,
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("MergeAlbums: %w", err)
|
|
}
|
|
}
|
|
err = qtx.CleanOrphanedEntries(ctx)
|
|
if err != nil {
|
|
l.Err(err).Msg("Failed to clean orphaned entries")
|
|
return fmt.Errorf("MergeArtists: %w", err)
|
|
}
|
|
return tx.Commit(ctx)
|
|
}
|