mirror of
https://github.com/gabehf/Koito.git
synced 2026-04-22 12:01:52 -07:00
Add MusicBrainz search-by-name enrichment for scrobbles without IDs
When a listen arrives with no MBZ IDs and no album title (the common multi-scrobbler/Last.fm case), search MusicBrainz by artist+track name to resolve recording, release, and release group IDs. This unlocks CoverArtArchive album art, proper album association, and duration data. New file: internal/mbz/search.go - SearchRecording() method with Lucene query escaping - Confidence filter: case-insensitive exact match on title + artist credit - Release selection: prefer Official status, then first available - Uses existing rate-limited queue (1 req/sec) Integration in catalog.go: - Only triggers when RecordingMbzID, ReleaseMbzID, AND ReleaseTitle are all missing — no impact on scrobbles that already have MBZ data - Soft failure — search errors don't block the listen - KOITO_DISABLE_MUSICBRAINZ handled automatically (MbzErrorCaller returns error) Interface + mocks updated: - SearchRecording added to MusicBrainzCaller interface - MbzMockCaller: SearchResults map for test data - MbzErrorCaller: returns error (existing pattern) New tests: - TestSubmitListen_SearchByName — mock search, verify album+duration resolved - TestSubmitListen_SearchByNameNoMatch — verify graceful fallback Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0ec7b458cc
commit
2c29499403
5 changed files with 304 additions and 0 deletions
|
|
@ -102,6 +102,21 @@ func SubmitListen(ctx context.Context, store db.DB, opts SubmitListenOpts) error
|
|||
artistIDs[i] = artist.ID
|
||||
l.Debug().Any("artist", artist).Msg("Matched listen to artist")
|
||||
}
|
||||
|
||||
// Search MusicBrainz by name when no MBZ IDs or album title are available
|
||||
if opts.RecordingMbzID == uuid.Nil && opts.ReleaseMbzID == uuid.Nil && opts.ReleaseTitle == "" {
|
||||
result, err := opts.MbzCaller.SearchRecording(ctx, opts.Artist, opts.TrackTitle)
|
||||
if err == nil && result != nil {
|
||||
opts.RecordingMbzID = result.RecordingID
|
||||
opts.ReleaseMbzID = result.ReleaseID
|
||||
opts.ReleaseGroupMbzID = result.ReleaseGroupID
|
||||
opts.ReleaseTitle = result.ReleaseTitle
|
||||
if opts.Duration == 0 && result.DurationMs > 0 {
|
||||
opts.Duration = int32(result.DurationMs / 1000)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rg, err := AssociateAlbum(ctx, store, AssociateAlbumOpts{
|
||||
ReleaseMbzID: opts.ReleaseMbzID,
|
||||
ReleaseGroupMbzID: opts.ReleaseGroupMbzID,
|
||||
|
|
|
|||
|
|
@ -1037,3 +1037,95 @@ func TestSubmitListen_MusicBrainzUnreachableMBIDMappings(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
assert.True(t, exists, "expected artist to have correct musicbrainz id")
|
||||
}
|
||||
|
||||
func TestSubmitListen_SearchByName(t *testing.T) {
|
||||
truncateTestData(t)
|
||||
|
||||
// When no MBZ IDs and no release title are provided,
|
||||
// SearchRecording should be called to resolve them
|
||||
|
||||
ctx := context.Background()
|
||||
recordingID := uuid.MustParse("aaaaaaaa-0000-0000-0000-000000000001")
|
||||
releaseID := uuid.MustParse("aaaaaaaa-0000-0000-0000-000000000002")
|
||||
releaseGroupID := uuid.MustParse("aaaaaaaa-0000-0000-0000-000000000003")
|
||||
|
||||
mbzc := &mbz.MbzMockCaller{
|
||||
SearchResults: map[string]*mbz.MusicBrainzSearchResult{
|
||||
"Some Artist\x00Some Track": {
|
||||
RecordingID: recordingID,
|
||||
ReleaseID: releaseID,
|
||||
ReleaseGroupID: releaseGroupID,
|
||||
ReleaseTitle: "Resolved Album",
|
||||
DurationMs: 240000,
|
||||
},
|
||||
},
|
||||
Releases: map[uuid.UUID]*mbz.MusicBrainzRelease{
|
||||
releaseID: {
|
||||
Title: "Resolved Album",
|
||||
ID: releaseID.String(),
|
||||
Status: "Official",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
opts := catalog.SubmitListenOpts{
|
||||
MbzCaller: mbzc,
|
||||
ArtistNames: []string{"Some Artist"},
|
||||
Artist: "Some Artist",
|
||||
TrackTitle: "Some Track",
|
||||
Time: time.Now(),
|
||||
UserID: 1,
|
||||
}
|
||||
|
||||
err := catalog.SubmitListen(ctx, store, opts)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify that the album was resolved from search
|
||||
album, err := store.GetAlbum(ctx, db.GetAlbumOpts{MusicBrainzID: releaseID})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "Resolved Album", album.Title)
|
||||
|
||||
// Verify the track was created with duration from search
|
||||
track, err := store.GetTrack(ctx, db.GetTrackOpts{MusicBrainzID: recordingID})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "Some Track", track.Title)
|
||||
assert.EqualValues(t, 240, track.Duration)
|
||||
}
|
||||
|
||||
func TestSubmitListen_SearchByNameNoMatch(t *testing.T) {
|
||||
truncateTestData(t)
|
||||
|
||||
// When search returns no match, the listen should still be created
|
||||
// with a fallback album title
|
||||
|
||||
ctx := context.Background()
|
||||
mbzc := &mbz.MbzMockCaller{
|
||||
SearchResults: map[string]*mbz.MusicBrainzSearchResult{},
|
||||
}
|
||||
|
||||
opts := catalog.SubmitListenOpts{
|
||||
MbzCaller: mbzc,
|
||||
ArtistNames: []string{"Unknown Artist"},
|
||||
Artist: "Unknown Artist",
|
||||
TrackTitle: "Unknown Track",
|
||||
Time: time.Now(),
|
||||
UserID: 1,
|
||||
}
|
||||
|
||||
err := catalog.SubmitListen(ctx, store, opts)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Verify the listen was saved even without search results
|
||||
exists, err := store.RowExists(ctx, `
|
||||
SELECT EXISTS (
|
||||
SELECT 1 FROM listens
|
||||
WHERE track_id = $1
|
||||
)`, 1)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, exists, "expected listen row to exist")
|
||||
|
||||
// Artist should still be created
|
||||
artist, err := store.GetArtist(ctx, db.GetArtistOpts{Name: "Unknown Artist"})
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "Unknown Artist", artist.Name)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ type MusicBrainzCaller interface {
|
|||
GetTrack(ctx context.Context, id uuid.UUID) (*MusicBrainzTrack, error)
|
||||
GetReleaseGroup(ctx context.Context, id uuid.UUID) (*MusicBrainzReleaseGroup, error)
|
||||
GetRelease(ctx context.Context, id uuid.UUID) (*MusicBrainzRelease, error)
|
||||
SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error)
|
||||
Shutdown()
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ type MbzMockCaller struct {
|
|||
ReleaseGroups map[uuid.UUID]*MusicBrainzReleaseGroup
|
||||
Releases map[uuid.UUID]*MusicBrainzRelease
|
||||
Tracks map[uuid.UUID]*MusicBrainzTrack
|
||||
SearchResults map[string]*MusicBrainzSearchResult
|
||||
}
|
||||
|
||||
func (m *MbzMockCaller) GetReleaseGroup(ctx context.Context, id uuid.UUID) (*MusicBrainzReleaseGroup, error) {
|
||||
|
|
@ -70,6 +71,14 @@ func (m *MbzMockCaller) GetArtistPrimaryAliases(ctx context.Context, id uuid.UUI
|
|||
return ss, nil
|
||||
}
|
||||
|
||||
func (m *MbzMockCaller) SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) {
|
||||
key := artist + "\x00" + track
|
||||
if result, exists := m.SearchResults[key]; exists {
|
||||
return result, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *MbzMockCaller) Shutdown() {}
|
||||
|
||||
type MbzErrorCaller struct{}
|
||||
|
|
@ -94,4 +103,8 @@ func (m *MbzErrorCaller) GetArtistPrimaryAliases(ctx context.Context, id uuid.UU
|
|||
return nil, fmt.Errorf("error: GetArtistPrimaryAliases not implemented")
|
||||
}
|
||||
|
||||
func (m *MbzErrorCaller) SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) {
|
||||
return nil, fmt.Errorf("error: SearchRecording not implemented")
|
||||
}
|
||||
|
||||
func (m *MbzErrorCaller) Shutdown() {}
|
||||
|
|
|
|||
183
internal/mbz/search.go
Normal file
183
internal/mbz/search.go
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
package mbz
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/gabehf/koito/internal/logger"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// MusicBrainz search API response types
|
||||
|
||||
type musicBrainzSearchResponse struct {
|
||||
Recordings []musicBrainzSearchRecording `json:"recordings"`
|
||||
}
|
||||
|
||||
type musicBrainzSearchRecording struct {
|
||||
ID string `json:"id"`
|
||||
Title string `json:"title"`
|
||||
Length int `json:"length"`
|
||||
ArtistCredit []MusicBrainzArtistCredit `json:"artist-credit"`
|
||||
Releases []musicBrainzSearchRelease `json:"releases"`
|
||||
}
|
||||
|
||||
type musicBrainzSearchRelease struct {
|
||||
ID string `json:"id"`
|
||||
Title string `json:"title"`
|
||||
Status string `json:"status"`
|
||||
ReleaseGroup struct {
|
||||
ID string `json:"id"`
|
||||
} `json:"release-group"`
|
||||
}
|
||||
|
||||
// MusicBrainzSearchResult holds the resolved IDs from a search-by-name query.
|
||||
type MusicBrainzSearchResult struct {
|
||||
RecordingID uuid.UUID
|
||||
ReleaseID uuid.UUID
|
||||
ReleaseGroupID uuid.UUID
|
||||
ReleaseTitle string
|
||||
DurationMs int
|
||||
}
|
||||
|
||||
// SearchRecording searches MusicBrainz for a recording by artist and track name.
|
||||
// It returns the best match that passes a confidence filter (case-insensitive exact
|
||||
// match on title and at least one artist credit), or nil if no confident match is found.
|
||||
func (c *MusicBrainzClient) SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) {
|
||||
l := logger.FromContext(ctx)
|
||||
|
||||
query := fmt.Sprintf("artist:\"%s\" AND recording:\"%s\"",
|
||||
escapeLucene(artist), escapeLucene(track))
|
||||
url := fmt.Sprintf("%s/ws/2/recording/?query=%s&limit=5&fmt=json",
|
||||
c.url, queryEscape(query))
|
||||
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("SearchRecording: %w", err)
|
||||
}
|
||||
|
||||
body, err := c.queue(ctx, req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("SearchRecording: %w", err)
|
||||
}
|
||||
|
||||
var resp musicBrainzSearchResponse
|
||||
if err := json.Unmarshal(body, &resp); err != nil {
|
||||
l.Err(err).Str("body", string(body)).Msg("Failed to unmarshal MusicBrainz search response")
|
||||
return nil, fmt.Errorf("SearchRecording: %w", err)
|
||||
}
|
||||
|
||||
for _, rec := range resp.Recordings {
|
||||
if !strings.EqualFold(rec.Title, track) {
|
||||
continue
|
||||
}
|
||||
if !hasArtistCredit(rec.ArtistCredit, artist) {
|
||||
continue
|
||||
}
|
||||
|
||||
recordingID, err := uuid.Parse(rec.ID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
release := pickBestRelease(rec.Releases)
|
||||
if release == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
releaseID, err := uuid.Parse(release.ID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
releaseGroupID, err := uuid.Parse(release.ReleaseGroup.ID)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
l.Debug().Msgf("MBZ search matched: '%s' by '%s' -> recording=%s release=%s",
|
||||
track, artist, recordingID, releaseID)
|
||||
|
||||
return &MusicBrainzSearchResult{
|
||||
RecordingID: recordingID,
|
||||
ReleaseID: releaseID,
|
||||
ReleaseGroupID: releaseGroupID,
|
||||
ReleaseTitle: release.Title,
|
||||
DurationMs: rec.Length,
|
||||
}, nil
|
||||
}
|
||||
|
||||
l.Debug().Msgf("MBZ search: no confident match for '%s' by '%s'", track, artist)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// hasArtistCredit checks whether at least one artist credit name matches (case-insensitive).
|
||||
func hasArtistCredit(credits []MusicBrainzArtistCredit, artist string) bool {
|
||||
for _, ac := range credits {
|
||||
if strings.EqualFold(ac.Name, artist) || strings.EqualFold(ac.Artist.Name, artist) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// pickBestRelease prefers an Official release, then falls back to the first available.
|
||||
func pickBestRelease(releases []musicBrainzSearchRelease) *musicBrainzSearchRelease {
|
||||
if len(releases) == 0 {
|
||||
return nil
|
||||
}
|
||||
for i := range releases {
|
||||
if releases[i].Status == "Official" {
|
||||
return &releases[i]
|
||||
}
|
||||
}
|
||||
return &releases[0]
|
||||
}
|
||||
|
||||
// escapeLucene escapes special characters in Lucene query syntax.
|
||||
func escapeLucene(s string) string {
|
||||
replacer := strings.NewReplacer(
|
||||
`\`, `\\`,
|
||||
`+`, `\+`,
|
||||
`-`, `\-`,
|
||||
`!`, `\!`,
|
||||
`(`, `\(`,
|
||||
`)`, `\)`,
|
||||
`{`, `\{`,
|
||||
`}`, `\}`,
|
||||
`[`, `\[`,
|
||||
`]`, `\]`,
|
||||
`^`, `\^`,
|
||||
`"`, `\"`,
|
||||
`~`, `\~`,
|
||||
`*`, `\*`,
|
||||
`?`, `\?`,
|
||||
`:`, `\:`,
|
||||
`/`, `\/`,
|
||||
)
|
||||
return replacer.Replace(s)
|
||||
}
|
||||
|
||||
// queryEscape percent-encodes a query string value for use in a URL.
|
||||
// We use a simple implementation to avoid importing net/url just for this.
|
||||
func queryEscape(s string) string {
|
||||
var b strings.Builder
|
||||
for i := 0; i < len(s); i++ {
|
||||
c := s[i]
|
||||
if isUnreserved(c) {
|
||||
b.WriteByte(c)
|
||||
} else {
|
||||
fmt.Fprintf(&b, "%%%02X", c)
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func isUnreserved(c byte) bool {
|
||||
return (c >= 'A' && c <= 'Z') ||
|
||||
(c >= 'a' && c <= 'z') ||
|
||||
(c >= '0' && c <= '9') ||
|
||||
c == '-' || c == '_' || c == '.' || c == '~'
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue