From e45099c71a7bd1a17c36ee187b1578dbc1f35507 Mon Sep 17 00:00:00 2001 From: Gabe Farrell <90876006+gabehf@users.noreply.github.com> Date: Mon, 12 Jan 2026 13:03:04 -0500 Subject: [PATCH] fix: improve matching with identically named albums (#126) * fix: improve matching with identically named albums * fix: incorrect sql query --- db/queries/release.sql | 13 +++++++ internal/catalog/associate_album.go | 27 ++++++-------- internal/catalog/submit_listen_test.go | 6 ++++ internal/db/db.go | 1 + internal/db/psql/album.go | 50 ++++++++++++++++++++++++++ internal/repository/release.sql.go | 33 +++++++++++++++++ 6 files changed, 113 insertions(+), 17 deletions(-) diff --git a/db/queries/release.sql b/db/queries/release.sql index 86727f4..9f54291 100644 --- a/db/queries/release.sql +++ b/db/queries/release.sql @@ -32,6 +32,19 @@ JOIN artist_releases ar ON r.id = ar.release_id WHERE r.title = ANY ($1::TEXT[]) AND ar.artist_id = $2 LIMIT 1; +-- name: GetReleaseByArtistAndTitlesNoMbzID :one +SELECT r.* +FROM releases_with_title r +JOIN artist_releases ar ON r.id = ar.release_id +WHERE r.title = ANY ($1::TEXT[]) + AND ar.artist_id = $2 + AND EXISTS ( + SELECT 1 + FROM releases r2 + WHERE r2.id = r.id + AND r2.musicbrainz_id IS NULL + ); + -- name: GetTopReleasesFromArtist :many SELECT r.*, diff --git a/internal/catalog/associate_album.go b/internal/catalog/associate_album.go index dd97244..3a63c58 100644 --- a/internal/catalog/associate_album.go +++ b/internal/catalog/associate_album.go @@ -82,26 +82,19 @@ func createOrUpdateAlbumWithMbzReleaseID(ctx context.Context, d db.DB, opts Asso titles := []string{release.Title, opts.ReleaseName} utils.Unique(&titles) - l.Debug().Msgf("Searching for albums '%v' from artist id %d in DB", titles, opts.Artists[0].ID) - album, err = d.GetAlbum(ctx, db.GetAlbumOpts{ - ArtistID: opts.Artists[0].ID, - Titles: titles, - }) + l.Debug().Msgf("Searching for albums '%v' from artist id %d and no associated MusicBrainz ID in DB", titles, opts.Artists[0].ID) + album, err = d.GetAlbumWithNoMbzIDByTitles(ctx, opts.Artists[0].ID, titles) if err == nil { l.Debug().Msgf("Found album %s, updating with MusicBrainz Release ID...", album.Title) - if album.MbzID == nil { - err := d.UpdateAlbum(ctx, db.UpdateAlbumOpts{ - ID: album.ID, - MusicBrainzID: opts.ReleaseMbzID, - }) - if err != nil { - l.Err(err).Msg("createOrUpdateAlbumWithMbzReleaseID: failed to update album with MusicBrainz Release ID") - return nil, fmt.Errorf("createOrUpdateAlbumWithMbzReleaseID: %w", err) - } - l.Debug().Msgf("Updated album '%s' with MusicBrainz Release ID", album.Title) - } else { - l.Warn().Msgf("Attempted to update album %s with MusicBrainz ID, but an existing ID was already found", album.Title) + err := d.UpdateAlbum(ctx, db.UpdateAlbumOpts{ + ID: album.ID, + MusicBrainzID: opts.ReleaseMbzID, + }) + if err != nil { + l.Err(err).Msg("createOrUpdateAlbumWithMbzReleaseID: failed to update album with MusicBrainz Release ID") + return nil, fmt.Errorf("createOrUpdateAlbumWithMbzReleaseID: %w", err) } + l.Debug().Msgf("Updated album '%s' with MusicBrainz Release ID", album.Title) if opts.ReleaseGroupMbzID != uuid.Nil { aliases, err := opts.Mbzc.GetReleaseTitles(ctx, opts.ReleaseGroupMbzID) diff --git a/internal/catalog/submit_listen_test.go b/internal/catalog/submit_listen_test.go index 34a6038..1548776 100644 --- a/internal/catalog/submit_listen_test.go +++ b/internal/catalog/submit_listen_test.go @@ -297,6 +297,7 @@ func TestSubmitListen_DoNotOverwriteMbzIDs(t *testing.T) { } artistMbzID := uuid.MustParse("10000000-0000-0000-0000-000000000000") releaseMbzID := uuid.MustParse("01000000-0000-0000-0000-000000000000") + existingReleaseMbzID := uuid.MustParse("00000000-0000-0000-0000-000000000101") trackMbzID := uuid.MustParse("00100000-0000-0000-0000-000000000000") opts := catalog.SubmitListenOpts{ MbzCaller: mbzc, @@ -337,6 +338,11 @@ func TestSubmitListen_DoNotOverwriteMbzIDs(t *testing.T) { require.NoError(t, err) assert.Equal(t, 0, count, "duplicate release group created") count, err = store.Count(ctx, ` + SELECT COUNT(*) FROM releases_with_title WHERE musicbrainz_id = $1 + `, existingReleaseMbzID) + require.NoError(t, err) + assert.Equal(t, 1, count, "existing release group should not be overwritten") + count, err = store.Count(ctx, ` SELECT COUNT(*) FROM artists_with_name WHERE musicbrainz_id = $1 `, artistMbzID) require.NoError(t, err) diff --git a/internal/db/db.go b/internal/db/db.go index a4f1b43..f2364be 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -14,6 +14,7 @@ type DB interface { GetArtist(ctx context.Context, opts GetArtistOpts) (*models.Artist, error) GetAlbum(ctx context.Context, opts GetAlbumOpts) (*models.Album, error) + GetAlbumWithNoMbzIDByTitles(ctx context.Context, artistId int32, titles []string) (*models.Album, error) GetTrack(ctx context.Context, opts GetTrackOpts) (*models.Track, error) GetArtistsForAlbum(ctx context.Context, id int32) ([]*models.Artist, error) GetArtistsForTrack(ctx context.Context, id int32) ([]*models.Artist, error) diff --git a/internal/db/psql/album.go b/internal/db/psql/album.go index 5343e08..630cf1f 100644 --- a/internal/db/psql/album.go +++ b/internal/db/psql/album.go @@ -110,6 +110,56 @@ func (d *Psql) GetAlbum(ctx context.Context, opts db.GetAlbumOpts) (*models.Albu return ret, nil } +func (d *Psql) GetAlbumWithNoMbzIDByTitles(ctx context.Context, artistId int32, titles []string) (*models.Album, error) { + l := logger.FromContext(ctx) + ret := new(models.Album) + + if artistId != 0 && len(titles) > 0 { + l.Debug().Msgf("GetAlbumWithNoMbzIDByTitles: Fetching release group from DB with artist_id %d and titles %v and no associated MusicBrainz ID", artistId, titles) + row, err := d.q.GetReleaseByArtistAndTitlesNoMbzID(ctx, repository.GetReleaseByArtistAndTitlesNoMbzIDParams{ + ArtistID: artistId, + Column1: titles, + }) + if err != nil { + return nil, fmt.Errorf("GetAlbum: %w", err) + } + ret.ID = row.ID + ret.MbzID = row.MusicBrainzID + ret.Title = row.Title + ret.Image = row.Image + ret.VariousArtists = row.VariousArtists + } else { + return nil, errors.New("GetAlbumWithNoMbzIDByTitles: insufficient information to get album") + } + count, err := d.q.CountListensFromRelease(ctx, repository.CountListensFromReleaseParams{ + ListenedAt: time.Unix(0, 0), + ListenedAt_2: time.Now(), + ReleaseID: ret.ID, + }) + if err != nil { + return nil, fmt.Errorf("GetAlbumWithNoMbzIDByTitles: CountListensFromRelease: %w", err) + } + + seconds, err := d.CountTimeListenedToItem(ctx, db.TimeListenedOpts{ + Timeframe: db.Timeframe{Period: db.PeriodAllTime}, + AlbumID: ret.ID, + }) + if err != nil { + return nil, fmt.Errorf("GetAlbumWithNoMbzIDByTitles: CountTimeListenedToItem: %w", err) + } + + firstListen, err := d.q.GetFirstListenFromRelease(ctx, ret.ID) + if err != nil && !errors.Is(err, pgx.ErrNoRows) { + return nil, fmt.Errorf("GetAlbumWithNoMbzIDByTitles: GetFirstListenFromRelease: %w", err) + } + + ret.ListenCount = count + ret.TimeListened = seconds + ret.FirstListen = firstListen.ListenedAt.Unix() + + return ret, nil +} + func (d *Psql) SaveAlbum(ctx context.Context, opts db.SaveAlbumOpts) (*models.Album, error) { l := logger.FromContext(ctx) var insertMbzID *uuid.UUID diff --git a/internal/repository/release.sql.go b/internal/repository/release.sql.go index aa791e6..3d77eef 100644 --- a/internal/repository/release.sql.go +++ b/internal/repository/release.sql.go @@ -195,6 +195,39 @@ func (q *Queries) GetReleaseByArtistAndTitles(ctx context.Context, arg GetReleas return i, err } +const getReleaseByArtistAndTitlesNoMbzID = `-- name: GetReleaseByArtistAndTitlesNoMbzID :one +SELECT r.id, r.musicbrainz_id, r.image, r.various_artists, r.image_source, r.title +FROM releases_with_title r +JOIN artist_releases ar ON r.id = ar.release_id +WHERE r.title = ANY ($1::TEXT[]) + AND ar.artist_id = $2 + AND EXISTS ( + SELECT 1 + FROM releases r2 + WHERE r2.id = r.id + AND r2.musicbrainz_id IS NULL + ) +` + +type GetReleaseByArtistAndTitlesNoMbzIDParams struct { + Column1 []string + ArtistID int32 +} + +func (q *Queries) GetReleaseByArtistAndTitlesNoMbzID(ctx context.Context, arg GetReleaseByArtistAndTitlesNoMbzIDParams) (ReleasesWithTitle, error) { + row := q.db.QueryRow(ctx, getReleaseByArtistAndTitlesNoMbzID, arg.Column1, arg.ArtistID) + var i ReleasesWithTitle + err := row.Scan( + &i.ID, + &i.MusicBrainzID, + &i.Image, + &i.VariousArtists, + &i.ImageSource, + &i.Title, + ) + return i, err +} + const getReleaseByImageID = `-- name: GetReleaseByImageID :one SELECT id, musicbrainz_id, image, various_artists, image_source FROM releases WHERE image = $1 LIMIT 1