Add MusicBrainz search-by-name enrichment for scrobbles without IDs

When a listen arrives with no MBZ IDs and no album title (the common
multi-scrobbler/Last.fm case), search MusicBrainz by artist+track name
to resolve recording, release, and release group IDs. This unlocks
CoverArtArchive album art, proper album association, and duration data.

New file: internal/mbz/search.go
- SearchRecording() method with Lucene query escaping
- Confidence filter: case-insensitive exact match on title + artist credit
- Release selection: prefer Official status, then first available
- Uses existing rate-limited queue (1 req/sec)

Integration in catalog.go:
- Only triggers when RecordingMbzID, ReleaseMbzID, AND ReleaseTitle are
  all missing — no impact on scrobbles that already have MBZ data
- Soft failure — search errors don't block the listen
- KOITO_DISABLE_MUSICBRAINZ handled automatically (MbzErrorCaller returns error)

Interface + mocks updated:
- SearchRecording added to MusicBrainzCaller interface
- MbzMockCaller: SearchResults map for test data
- MbzErrorCaller: returns error (existing pattern)

New tests:
- TestSubmitListen_SearchByName — mock search, verify album+duration resolved
- TestSubmitListen_SearchByNameNoMatch — verify graceful fallback

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
safierinx-a 2026-03-25 00:01:24 +05:30
parent c2d393aa03
commit 0855fb4bd7
5 changed files with 304 additions and 0 deletions

View file

@ -102,6 +102,21 @@ func SubmitListen(ctx context.Context, store db.DB, opts SubmitListenOpts) error
artistIDs[i] = artist.ID artistIDs[i] = artist.ID
l.Debug().Any("artist", artist).Msg("Matched listen to artist") l.Debug().Any("artist", artist).Msg("Matched listen to artist")
} }
// Search MusicBrainz by name when no MBZ IDs or album title are available
if opts.RecordingMbzID == uuid.Nil && opts.ReleaseMbzID == uuid.Nil && opts.ReleaseTitle == "" {
result, err := opts.MbzCaller.SearchRecording(ctx, opts.Artist, opts.TrackTitle)
if err == nil && result != nil {
opts.RecordingMbzID = result.RecordingID
opts.ReleaseMbzID = result.ReleaseID
opts.ReleaseGroupMbzID = result.ReleaseGroupID
opts.ReleaseTitle = result.ReleaseTitle
if opts.Duration == 0 && result.DurationMs > 0 {
opts.Duration = int32(result.DurationMs / 1000)
}
}
}
rg, err := AssociateAlbum(ctx, store, AssociateAlbumOpts{ rg, err := AssociateAlbum(ctx, store, AssociateAlbumOpts{
ReleaseMbzID: opts.ReleaseMbzID, ReleaseMbzID: opts.ReleaseMbzID,
ReleaseGroupMbzID: opts.ReleaseGroupMbzID, ReleaseGroupMbzID: opts.ReleaseGroupMbzID,

View file

@ -1037,3 +1037,95 @@ func TestSubmitListen_MusicBrainzUnreachableMBIDMappings(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
assert.True(t, exists, "expected artist to have correct musicbrainz id") assert.True(t, exists, "expected artist to have correct musicbrainz id")
} }
func TestSubmitListen_SearchByName(t *testing.T) {
truncateTestData(t)
// When no MBZ IDs and no release title are provided,
// SearchRecording should be called to resolve them
ctx := context.Background()
recordingID := uuid.MustParse("aaaaaaaa-0000-0000-0000-000000000001")
releaseID := uuid.MustParse("aaaaaaaa-0000-0000-0000-000000000002")
releaseGroupID := uuid.MustParse("aaaaaaaa-0000-0000-0000-000000000003")
mbzc := &mbz.MbzMockCaller{
SearchResults: map[string]*mbz.MusicBrainzSearchResult{
"Some Artist\x00Some Track": {
RecordingID: recordingID,
ReleaseID: releaseID,
ReleaseGroupID: releaseGroupID,
ReleaseTitle: "Resolved Album",
DurationMs: 240000,
},
},
Releases: map[uuid.UUID]*mbz.MusicBrainzRelease{
releaseID: {
Title: "Resolved Album",
ID: releaseID.String(),
Status: "Official",
},
},
}
opts := catalog.SubmitListenOpts{
MbzCaller: mbzc,
ArtistNames: []string{"Some Artist"},
Artist: "Some Artist",
TrackTitle: "Some Track",
Time: time.Now(),
UserID: 1,
}
err := catalog.SubmitListen(ctx, store, opts)
require.NoError(t, err)
// Verify that the album was resolved from search
album, err := store.GetAlbum(ctx, db.GetAlbumOpts{MusicBrainzID: releaseID})
require.NoError(t, err)
assert.Equal(t, "Resolved Album", album.Title)
// Verify the track was created with duration from search
track, err := store.GetTrack(ctx, db.GetTrackOpts{MusicBrainzID: recordingID})
require.NoError(t, err)
assert.Equal(t, "Some Track", track.Title)
assert.EqualValues(t, 240, track.Duration)
}
func TestSubmitListen_SearchByNameNoMatch(t *testing.T) {
truncateTestData(t)
// When search returns no match, the listen should still be created
// with a fallback album title
ctx := context.Background()
mbzc := &mbz.MbzMockCaller{
SearchResults: map[string]*mbz.MusicBrainzSearchResult{},
}
opts := catalog.SubmitListenOpts{
MbzCaller: mbzc,
ArtistNames: []string{"Unknown Artist"},
Artist: "Unknown Artist",
TrackTitle: "Unknown Track",
Time: time.Now(),
UserID: 1,
}
err := catalog.SubmitListen(ctx, store, opts)
require.NoError(t, err)
// Verify the listen was saved even without search results
exists, err := store.RowExists(ctx, `
SELECT EXISTS (
SELECT 1 FROM listens
WHERE track_id = $1
)`, 1)
require.NoError(t, err)
assert.True(t, exists, "expected listen row to exist")
// Artist should still be created
artist, err := store.GetArtist(ctx, db.GetArtistOpts{Name: "Unknown Artist"})
require.NoError(t, err)
assert.Equal(t, "Unknown Artist", artist.Name)
}

View file

@ -31,6 +31,7 @@ type MusicBrainzCaller interface {
GetTrack(ctx context.Context, id uuid.UUID) (*MusicBrainzTrack, error) GetTrack(ctx context.Context, id uuid.UUID) (*MusicBrainzTrack, error)
GetReleaseGroup(ctx context.Context, id uuid.UUID) (*MusicBrainzReleaseGroup, error) GetReleaseGroup(ctx context.Context, id uuid.UUID) (*MusicBrainzReleaseGroup, error)
GetRelease(ctx context.Context, id uuid.UUID) (*MusicBrainzRelease, error) GetRelease(ctx context.Context, id uuid.UUID) (*MusicBrainzRelease, error)
SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error)
Shutdown() Shutdown()
} }

View file

@ -15,6 +15,7 @@ type MbzMockCaller struct {
ReleaseGroups map[uuid.UUID]*MusicBrainzReleaseGroup ReleaseGroups map[uuid.UUID]*MusicBrainzReleaseGroup
Releases map[uuid.UUID]*MusicBrainzRelease Releases map[uuid.UUID]*MusicBrainzRelease
Tracks map[uuid.UUID]*MusicBrainzTrack Tracks map[uuid.UUID]*MusicBrainzTrack
SearchResults map[string]*MusicBrainzSearchResult
} }
func (m *MbzMockCaller) GetReleaseGroup(ctx context.Context, id uuid.UUID) (*MusicBrainzReleaseGroup, error) { func (m *MbzMockCaller) GetReleaseGroup(ctx context.Context, id uuid.UUID) (*MusicBrainzReleaseGroup, error) {
@ -70,6 +71,14 @@ func (m *MbzMockCaller) GetArtistPrimaryAliases(ctx context.Context, id uuid.UUI
return ss, nil return ss, nil
} }
func (m *MbzMockCaller) SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) {
key := artist + "\x00" + track
if result, exists := m.SearchResults[key]; exists {
return result, nil
}
return nil, nil
}
func (m *MbzMockCaller) Shutdown() {} func (m *MbzMockCaller) Shutdown() {}
type MbzErrorCaller struct{} type MbzErrorCaller struct{}
@ -94,4 +103,8 @@ func (m *MbzErrorCaller) GetArtistPrimaryAliases(ctx context.Context, id uuid.UU
return nil, fmt.Errorf("error: GetArtistPrimaryAliases not implemented") return nil, fmt.Errorf("error: GetArtistPrimaryAliases not implemented")
} }
func (m *MbzErrorCaller) SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) {
return nil, fmt.Errorf("error: SearchRecording not implemented")
}
func (m *MbzErrorCaller) Shutdown() {} func (m *MbzErrorCaller) Shutdown() {}

183
internal/mbz/search.go Normal file
View file

@ -0,0 +1,183 @@
package mbz
import (
"context"
"encoding/json"
"fmt"
"net/http"
"strings"
"github.com/gabehf/koito/internal/logger"
"github.com/google/uuid"
)
// MusicBrainz search API response types
type musicBrainzSearchResponse struct {
Recordings []musicBrainzSearchRecording `json:"recordings"`
}
type musicBrainzSearchRecording struct {
ID string `json:"id"`
Title string `json:"title"`
Length int `json:"length"`
ArtistCredit []MusicBrainzArtistCredit `json:"artist-credit"`
Releases []musicBrainzSearchRelease `json:"releases"`
}
type musicBrainzSearchRelease struct {
ID string `json:"id"`
Title string `json:"title"`
Status string `json:"status"`
ReleaseGroup struct {
ID string `json:"id"`
} `json:"release-group"`
}
// MusicBrainzSearchResult holds the resolved IDs from a search-by-name query.
type MusicBrainzSearchResult struct {
RecordingID uuid.UUID
ReleaseID uuid.UUID
ReleaseGroupID uuid.UUID
ReleaseTitle string
DurationMs int
}
// SearchRecording searches MusicBrainz for a recording by artist and track name.
// It returns the best match that passes a confidence filter (case-insensitive exact
// match on title and at least one artist credit), or nil if no confident match is found.
func (c *MusicBrainzClient) SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) {
l := logger.FromContext(ctx)
query := fmt.Sprintf("artist:\"%s\" AND recording:\"%s\"",
escapeLucene(artist), escapeLucene(track))
url := fmt.Sprintf("%s/ws/2/recording/?query=%s&limit=5&fmt=json",
c.url, queryEscape(query))
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, fmt.Errorf("SearchRecording: %w", err)
}
body, err := c.queue(ctx, req)
if err != nil {
return nil, fmt.Errorf("SearchRecording: %w", err)
}
var resp musicBrainzSearchResponse
if err := json.Unmarshal(body, &resp); err != nil {
l.Err(err).Str("body", string(body)).Msg("Failed to unmarshal MusicBrainz search response")
return nil, fmt.Errorf("SearchRecording: %w", err)
}
for _, rec := range resp.Recordings {
if !strings.EqualFold(rec.Title, track) {
continue
}
if !hasArtistCredit(rec.ArtistCredit, artist) {
continue
}
recordingID, err := uuid.Parse(rec.ID)
if err != nil {
continue
}
release := pickBestRelease(rec.Releases)
if release == nil {
continue
}
releaseID, err := uuid.Parse(release.ID)
if err != nil {
continue
}
releaseGroupID, err := uuid.Parse(release.ReleaseGroup.ID)
if err != nil {
continue
}
l.Debug().Msgf("MBZ search matched: '%s' by '%s' -> recording=%s release=%s",
track, artist, recordingID, releaseID)
return &MusicBrainzSearchResult{
RecordingID: recordingID,
ReleaseID: releaseID,
ReleaseGroupID: releaseGroupID,
ReleaseTitle: release.Title,
DurationMs: rec.Length,
}, nil
}
l.Debug().Msgf("MBZ search: no confident match for '%s' by '%s'", track, artist)
return nil, nil
}
// hasArtistCredit checks whether at least one artist credit name matches (case-insensitive).
func hasArtistCredit(credits []MusicBrainzArtistCredit, artist string) bool {
for _, ac := range credits {
if strings.EqualFold(ac.Name, artist) || strings.EqualFold(ac.Artist.Name, artist) {
return true
}
}
return false
}
// pickBestRelease prefers an Official release, then falls back to the first available.
func pickBestRelease(releases []musicBrainzSearchRelease) *musicBrainzSearchRelease {
if len(releases) == 0 {
return nil
}
for i := range releases {
if releases[i].Status == "Official" {
return &releases[i]
}
}
return &releases[0]
}
// escapeLucene escapes special characters in Lucene query syntax.
func escapeLucene(s string) string {
replacer := strings.NewReplacer(
`\`, `\\`,
`+`, `\+`,
`-`, `\-`,
`!`, `\!`,
`(`, `\(`,
`)`, `\)`,
`{`, `\{`,
`}`, `\}`,
`[`, `\[`,
`]`, `\]`,
`^`, `\^`,
`"`, `\"`,
`~`, `\~`,
`*`, `\*`,
`?`, `\?`,
`:`, `\:`,
`/`, `\/`,
)
return replacer.Replace(s)
}
// queryEscape percent-encodes a query string value for use in a URL.
// We use a simple implementation to avoid importing net/url just for this.
func queryEscape(s string) string {
var b strings.Builder
for i := 0; i < len(s); i++ {
c := s[i]
if isUnreserved(c) {
b.WriteByte(c)
} else {
fmt.Fprintf(&b, "%%%02X", c)
}
}
return b.String()
}
func isUnreserved(c byte) bool {
return (c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') ||
c == '-' || c == '_' || c == '.' || c == '~'
}