From 2c2949940368ba7dfd09111c9effadb10fa0e52c Mon Sep 17 00:00:00 2001 From: safierinx-a Date: Wed, 25 Mar 2026 00:01:24 +0530 Subject: [PATCH] Add MusicBrainz search-by-name enrichment for scrobbles without IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a listen arrives with no MBZ IDs and no album title (the common multi-scrobbler/Last.fm case), search MusicBrainz by artist+track name to resolve recording, release, and release group IDs. This unlocks CoverArtArchive album art, proper album association, and duration data. New file: internal/mbz/search.go - SearchRecording() method with Lucene query escaping - Confidence filter: case-insensitive exact match on title + artist credit - Release selection: prefer Official status, then first available - Uses existing rate-limited queue (1 req/sec) Integration in catalog.go: - Only triggers when RecordingMbzID, ReleaseMbzID, AND ReleaseTitle are all missing — no impact on scrobbles that already have MBZ data - Soft failure — search errors don't block the listen - KOITO_DISABLE_MUSICBRAINZ handled automatically (MbzErrorCaller returns error) Interface + mocks updated: - SearchRecording added to MusicBrainzCaller interface - MbzMockCaller: SearchResults map for test data - MbzErrorCaller: returns error (existing pattern) New tests: - TestSubmitListen_SearchByName — mock search, verify album+duration resolved - TestSubmitListen_SearchByNameNoMatch — verify graceful fallback Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/catalog/catalog.go | 15 ++ internal/catalog/submit_listen_test.go | 92 +++++++++++++ internal/mbz/mbz.go | 1 + internal/mbz/mock.go | 13 ++ internal/mbz/search.go | 183 +++++++++++++++++++++++++ 5 files changed, 304 insertions(+) create mode 100644 internal/mbz/search.go diff --git a/internal/catalog/catalog.go b/internal/catalog/catalog.go index e94db03..efc63dc 100644 --- a/internal/catalog/catalog.go +++ b/internal/catalog/catalog.go @@ -102,6 +102,21 @@ func SubmitListen(ctx context.Context, store db.DB, opts SubmitListenOpts) error artistIDs[i] = artist.ID l.Debug().Any("artist", artist).Msg("Matched listen to artist") } + + // Search MusicBrainz by name when no MBZ IDs or album title are available + if opts.RecordingMbzID == uuid.Nil && opts.ReleaseMbzID == uuid.Nil && opts.ReleaseTitle == "" { + result, err := opts.MbzCaller.SearchRecording(ctx, opts.Artist, opts.TrackTitle) + if err == nil && result != nil { + opts.RecordingMbzID = result.RecordingID + opts.ReleaseMbzID = result.ReleaseID + opts.ReleaseGroupMbzID = result.ReleaseGroupID + opts.ReleaseTitle = result.ReleaseTitle + if opts.Duration == 0 && result.DurationMs > 0 { + opts.Duration = int32(result.DurationMs / 1000) + } + } + } + rg, err := AssociateAlbum(ctx, store, AssociateAlbumOpts{ ReleaseMbzID: opts.ReleaseMbzID, ReleaseGroupMbzID: opts.ReleaseGroupMbzID, diff --git a/internal/catalog/submit_listen_test.go b/internal/catalog/submit_listen_test.go index 1548776..143d7f5 100644 --- a/internal/catalog/submit_listen_test.go +++ b/internal/catalog/submit_listen_test.go @@ -1037,3 +1037,95 @@ func TestSubmitListen_MusicBrainzUnreachableMBIDMappings(t *testing.T) { require.NoError(t, err) assert.True(t, exists, "expected artist to have correct musicbrainz id") } + +func TestSubmitListen_SearchByName(t *testing.T) { + truncateTestData(t) + + // When no MBZ IDs and no release title are provided, + // SearchRecording should be called to resolve them + + ctx := context.Background() + recordingID := uuid.MustParse("aaaaaaaa-0000-0000-0000-000000000001") + releaseID := uuid.MustParse("aaaaaaaa-0000-0000-0000-000000000002") + releaseGroupID := uuid.MustParse("aaaaaaaa-0000-0000-0000-000000000003") + + mbzc := &mbz.MbzMockCaller{ + SearchResults: map[string]*mbz.MusicBrainzSearchResult{ + "Some Artist\x00Some Track": { + RecordingID: recordingID, + ReleaseID: releaseID, + ReleaseGroupID: releaseGroupID, + ReleaseTitle: "Resolved Album", + DurationMs: 240000, + }, + }, + Releases: map[uuid.UUID]*mbz.MusicBrainzRelease{ + releaseID: { + Title: "Resolved Album", + ID: releaseID.String(), + Status: "Official", + }, + }, + } + + opts := catalog.SubmitListenOpts{ + MbzCaller: mbzc, + ArtistNames: []string{"Some Artist"}, + Artist: "Some Artist", + TrackTitle: "Some Track", + Time: time.Now(), + UserID: 1, + } + + err := catalog.SubmitListen(ctx, store, opts) + require.NoError(t, err) + + // Verify that the album was resolved from search + album, err := store.GetAlbum(ctx, db.GetAlbumOpts{MusicBrainzID: releaseID}) + require.NoError(t, err) + assert.Equal(t, "Resolved Album", album.Title) + + // Verify the track was created with duration from search + track, err := store.GetTrack(ctx, db.GetTrackOpts{MusicBrainzID: recordingID}) + require.NoError(t, err) + assert.Equal(t, "Some Track", track.Title) + assert.EqualValues(t, 240, track.Duration) +} + +func TestSubmitListen_SearchByNameNoMatch(t *testing.T) { + truncateTestData(t) + + // When search returns no match, the listen should still be created + // with a fallback album title + + ctx := context.Background() + mbzc := &mbz.MbzMockCaller{ + SearchResults: map[string]*mbz.MusicBrainzSearchResult{}, + } + + opts := catalog.SubmitListenOpts{ + MbzCaller: mbzc, + ArtistNames: []string{"Unknown Artist"}, + Artist: "Unknown Artist", + TrackTitle: "Unknown Track", + Time: time.Now(), + UserID: 1, + } + + err := catalog.SubmitListen(ctx, store, opts) + require.NoError(t, err) + + // Verify the listen was saved even without search results + exists, err := store.RowExists(ctx, ` + SELECT EXISTS ( + SELECT 1 FROM listens + WHERE track_id = $1 + )`, 1) + require.NoError(t, err) + assert.True(t, exists, "expected listen row to exist") + + // Artist should still be created + artist, err := store.GetArtist(ctx, db.GetArtistOpts{Name: "Unknown Artist"}) + require.NoError(t, err) + assert.Equal(t, "Unknown Artist", artist.Name) +} diff --git a/internal/mbz/mbz.go b/internal/mbz/mbz.go index 9e3f52e..a90bb8d 100644 --- a/internal/mbz/mbz.go +++ b/internal/mbz/mbz.go @@ -31,6 +31,7 @@ type MusicBrainzCaller interface { GetTrack(ctx context.Context, id uuid.UUID) (*MusicBrainzTrack, error) GetReleaseGroup(ctx context.Context, id uuid.UUID) (*MusicBrainzReleaseGroup, error) GetRelease(ctx context.Context, id uuid.UUID) (*MusicBrainzRelease, error) + SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) Shutdown() } diff --git a/internal/mbz/mock.go b/internal/mbz/mock.go index 2b5d7d8..1e72bef 100644 --- a/internal/mbz/mock.go +++ b/internal/mbz/mock.go @@ -15,6 +15,7 @@ type MbzMockCaller struct { ReleaseGroups map[uuid.UUID]*MusicBrainzReleaseGroup Releases map[uuid.UUID]*MusicBrainzRelease Tracks map[uuid.UUID]*MusicBrainzTrack + SearchResults map[string]*MusicBrainzSearchResult } func (m *MbzMockCaller) GetReleaseGroup(ctx context.Context, id uuid.UUID) (*MusicBrainzReleaseGroup, error) { @@ -70,6 +71,14 @@ func (m *MbzMockCaller) GetArtistPrimaryAliases(ctx context.Context, id uuid.UUI return ss, nil } +func (m *MbzMockCaller) SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) { + key := artist + "\x00" + track + if result, exists := m.SearchResults[key]; exists { + return result, nil + } + return nil, nil +} + func (m *MbzMockCaller) Shutdown() {} type MbzErrorCaller struct{} @@ -94,4 +103,8 @@ func (m *MbzErrorCaller) GetArtistPrimaryAliases(ctx context.Context, id uuid.UU return nil, fmt.Errorf("error: GetArtistPrimaryAliases not implemented") } +func (m *MbzErrorCaller) SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) { + return nil, fmt.Errorf("error: SearchRecording not implemented") +} + func (m *MbzErrorCaller) Shutdown() {} diff --git a/internal/mbz/search.go b/internal/mbz/search.go new file mode 100644 index 0000000..8cc9ed9 --- /dev/null +++ b/internal/mbz/search.go @@ -0,0 +1,183 @@ +package mbz + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "strings" + + "github.com/gabehf/koito/internal/logger" + "github.com/google/uuid" +) + +// MusicBrainz search API response types + +type musicBrainzSearchResponse struct { + Recordings []musicBrainzSearchRecording `json:"recordings"` +} + +type musicBrainzSearchRecording struct { + ID string `json:"id"` + Title string `json:"title"` + Length int `json:"length"` + ArtistCredit []MusicBrainzArtistCredit `json:"artist-credit"` + Releases []musicBrainzSearchRelease `json:"releases"` +} + +type musicBrainzSearchRelease struct { + ID string `json:"id"` + Title string `json:"title"` + Status string `json:"status"` + ReleaseGroup struct { + ID string `json:"id"` + } `json:"release-group"` +} + +// MusicBrainzSearchResult holds the resolved IDs from a search-by-name query. +type MusicBrainzSearchResult struct { + RecordingID uuid.UUID + ReleaseID uuid.UUID + ReleaseGroupID uuid.UUID + ReleaseTitle string + DurationMs int +} + +// SearchRecording searches MusicBrainz for a recording by artist and track name. +// It returns the best match that passes a confidence filter (case-insensitive exact +// match on title and at least one artist credit), or nil if no confident match is found. +func (c *MusicBrainzClient) SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) { + l := logger.FromContext(ctx) + + query := fmt.Sprintf("artist:\"%s\" AND recording:\"%s\"", + escapeLucene(artist), escapeLucene(track)) + url := fmt.Sprintf("%s/ws/2/recording/?query=%s&limit=5&fmt=json", + c.url, queryEscape(query)) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, fmt.Errorf("SearchRecording: %w", err) + } + + body, err := c.queue(ctx, req) + if err != nil { + return nil, fmt.Errorf("SearchRecording: %w", err) + } + + var resp musicBrainzSearchResponse + if err := json.Unmarshal(body, &resp); err != nil { + l.Err(err).Str("body", string(body)).Msg("Failed to unmarshal MusicBrainz search response") + return nil, fmt.Errorf("SearchRecording: %w", err) + } + + for _, rec := range resp.Recordings { + if !strings.EqualFold(rec.Title, track) { + continue + } + if !hasArtistCredit(rec.ArtistCredit, artist) { + continue + } + + recordingID, err := uuid.Parse(rec.ID) + if err != nil { + continue + } + + release := pickBestRelease(rec.Releases) + if release == nil { + continue + } + + releaseID, err := uuid.Parse(release.ID) + if err != nil { + continue + } + releaseGroupID, err := uuid.Parse(release.ReleaseGroup.ID) + if err != nil { + continue + } + + l.Debug().Msgf("MBZ search matched: '%s' by '%s' -> recording=%s release=%s", + track, artist, recordingID, releaseID) + + return &MusicBrainzSearchResult{ + RecordingID: recordingID, + ReleaseID: releaseID, + ReleaseGroupID: releaseGroupID, + ReleaseTitle: release.Title, + DurationMs: rec.Length, + }, nil + } + + l.Debug().Msgf("MBZ search: no confident match for '%s' by '%s'", track, artist) + return nil, nil +} + +// hasArtistCredit checks whether at least one artist credit name matches (case-insensitive). +func hasArtistCredit(credits []MusicBrainzArtistCredit, artist string) bool { + for _, ac := range credits { + if strings.EqualFold(ac.Name, artist) || strings.EqualFold(ac.Artist.Name, artist) { + return true + } + } + return false +} + +// pickBestRelease prefers an Official release, then falls back to the first available. +func pickBestRelease(releases []musicBrainzSearchRelease) *musicBrainzSearchRelease { + if len(releases) == 0 { + return nil + } + for i := range releases { + if releases[i].Status == "Official" { + return &releases[i] + } + } + return &releases[0] +} + +// escapeLucene escapes special characters in Lucene query syntax. +func escapeLucene(s string) string { + replacer := strings.NewReplacer( + `\`, `\\`, + `+`, `\+`, + `-`, `\-`, + `!`, `\!`, + `(`, `\(`, + `)`, `\)`, + `{`, `\{`, + `}`, `\}`, + `[`, `\[`, + `]`, `\]`, + `^`, `\^`, + `"`, `\"`, + `~`, `\~`, + `*`, `\*`, + `?`, `\?`, + `:`, `\:`, + `/`, `\/`, + ) + return replacer.Replace(s) +} + +// queryEscape percent-encodes a query string value for use in a URL. +// We use a simple implementation to avoid importing net/url just for this. +func queryEscape(s string) string { + var b strings.Builder + for i := 0; i < len(s); i++ { + c := s[i] + if isUnreserved(c) { + b.WriteByte(c) + } else { + fmt.Fprintf(&b, "%%%02X", c) + } + } + return b.String() +} + +func isUnreserved(c byte) bool { + return (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || + c == '-' || c == '_' || c == '.' || c == '~' +}