mirror of
https://github.com/gabehf/Koito.git
synced 2026-04-22 12:01:52 -07:00
Add MusicBrainz search-by-name enrichment for scrobbles without IDs
When a listen arrives with no MBZ IDs and no album title (the common multi-scrobbler/Last.fm case), search MusicBrainz by artist+track name to resolve recording, release, and release group IDs. This unlocks CoverArtArchive album art, proper album association, and duration data. New file: internal/mbz/search.go - SearchRecording() method with Lucene query escaping - Confidence filter: case-insensitive exact match on title + artist credit - Release selection: prefer Official status, then first available - Uses existing rate-limited queue (1 req/sec) Integration in catalog.go: - Only triggers when RecordingMbzID, ReleaseMbzID, AND ReleaseTitle are all missing — no impact on scrobbles that already have MBZ data - Soft failure — search errors don't block the listen - KOITO_DISABLE_MUSICBRAINZ handled automatically (MbzErrorCaller returns error) Interface + mocks updated: - SearchRecording added to MusicBrainzCaller interface - MbzMockCaller: SearchResults map for test data - MbzErrorCaller: returns error (existing pattern) New tests: - TestSubmitListen_SearchByName — mock search, verify album+duration resolved - TestSubmitListen_SearchByNameNoMatch — verify graceful fallback Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
0ec7b458cc
commit
2c29499403
5 changed files with 304 additions and 0 deletions
|
|
@ -102,6 +102,21 @@ func SubmitListen(ctx context.Context, store db.DB, opts SubmitListenOpts) error
|
||||||
artistIDs[i] = artist.ID
|
artistIDs[i] = artist.ID
|
||||||
l.Debug().Any("artist", artist).Msg("Matched listen to artist")
|
l.Debug().Any("artist", artist).Msg("Matched listen to artist")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Search MusicBrainz by name when no MBZ IDs or album title are available
|
||||||
|
if opts.RecordingMbzID == uuid.Nil && opts.ReleaseMbzID == uuid.Nil && opts.ReleaseTitle == "" {
|
||||||
|
result, err := opts.MbzCaller.SearchRecording(ctx, opts.Artist, opts.TrackTitle)
|
||||||
|
if err == nil && result != nil {
|
||||||
|
opts.RecordingMbzID = result.RecordingID
|
||||||
|
opts.ReleaseMbzID = result.ReleaseID
|
||||||
|
opts.ReleaseGroupMbzID = result.ReleaseGroupID
|
||||||
|
opts.ReleaseTitle = result.ReleaseTitle
|
||||||
|
if opts.Duration == 0 && result.DurationMs > 0 {
|
||||||
|
opts.Duration = int32(result.DurationMs / 1000)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
rg, err := AssociateAlbum(ctx, store, AssociateAlbumOpts{
|
rg, err := AssociateAlbum(ctx, store, AssociateAlbumOpts{
|
||||||
ReleaseMbzID: opts.ReleaseMbzID,
|
ReleaseMbzID: opts.ReleaseMbzID,
|
||||||
ReleaseGroupMbzID: opts.ReleaseGroupMbzID,
|
ReleaseGroupMbzID: opts.ReleaseGroupMbzID,
|
||||||
|
|
|
||||||
|
|
@ -1037,3 +1037,95 @@ func TestSubmitListen_MusicBrainzUnreachableMBIDMappings(t *testing.T) {
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.True(t, exists, "expected artist to have correct musicbrainz id")
|
assert.True(t, exists, "expected artist to have correct musicbrainz id")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSubmitListen_SearchByName(t *testing.T) {
|
||||||
|
truncateTestData(t)
|
||||||
|
|
||||||
|
// When no MBZ IDs and no release title are provided,
|
||||||
|
// SearchRecording should be called to resolve them
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
recordingID := uuid.MustParse("aaaaaaaa-0000-0000-0000-000000000001")
|
||||||
|
releaseID := uuid.MustParse("aaaaaaaa-0000-0000-0000-000000000002")
|
||||||
|
releaseGroupID := uuid.MustParse("aaaaaaaa-0000-0000-0000-000000000003")
|
||||||
|
|
||||||
|
mbzc := &mbz.MbzMockCaller{
|
||||||
|
SearchResults: map[string]*mbz.MusicBrainzSearchResult{
|
||||||
|
"Some Artist\x00Some Track": {
|
||||||
|
RecordingID: recordingID,
|
||||||
|
ReleaseID: releaseID,
|
||||||
|
ReleaseGroupID: releaseGroupID,
|
||||||
|
ReleaseTitle: "Resolved Album",
|
||||||
|
DurationMs: 240000,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Releases: map[uuid.UUID]*mbz.MusicBrainzRelease{
|
||||||
|
releaseID: {
|
||||||
|
Title: "Resolved Album",
|
||||||
|
ID: releaseID.String(),
|
||||||
|
Status: "Official",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := catalog.SubmitListenOpts{
|
||||||
|
MbzCaller: mbzc,
|
||||||
|
ArtistNames: []string{"Some Artist"},
|
||||||
|
Artist: "Some Artist",
|
||||||
|
TrackTitle: "Some Track",
|
||||||
|
Time: time.Now(),
|
||||||
|
UserID: 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := catalog.SubmitListen(ctx, store, opts)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Verify that the album was resolved from search
|
||||||
|
album, err := store.GetAlbum(ctx, db.GetAlbumOpts{MusicBrainzID: releaseID})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "Resolved Album", album.Title)
|
||||||
|
|
||||||
|
// Verify the track was created with duration from search
|
||||||
|
track, err := store.GetTrack(ctx, db.GetTrackOpts{MusicBrainzID: recordingID})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "Some Track", track.Title)
|
||||||
|
assert.EqualValues(t, 240, track.Duration)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSubmitListen_SearchByNameNoMatch(t *testing.T) {
|
||||||
|
truncateTestData(t)
|
||||||
|
|
||||||
|
// When search returns no match, the listen should still be created
|
||||||
|
// with a fallback album title
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
mbzc := &mbz.MbzMockCaller{
|
||||||
|
SearchResults: map[string]*mbz.MusicBrainzSearchResult{},
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := catalog.SubmitListenOpts{
|
||||||
|
MbzCaller: mbzc,
|
||||||
|
ArtistNames: []string{"Unknown Artist"},
|
||||||
|
Artist: "Unknown Artist",
|
||||||
|
TrackTitle: "Unknown Track",
|
||||||
|
Time: time.Now(),
|
||||||
|
UserID: 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := catalog.SubmitListen(ctx, store, opts)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Verify the listen was saved even without search results
|
||||||
|
exists, err := store.RowExists(ctx, `
|
||||||
|
SELECT EXISTS (
|
||||||
|
SELECT 1 FROM listens
|
||||||
|
WHERE track_id = $1
|
||||||
|
)`, 1)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.True(t, exists, "expected listen row to exist")
|
||||||
|
|
||||||
|
// Artist should still be created
|
||||||
|
artist, err := store.GetArtist(ctx, db.GetArtistOpts{Name: "Unknown Artist"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, "Unknown Artist", artist.Name)
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,7 @@ type MusicBrainzCaller interface {
|
||||||
GetTrack(ctx context.Context, id uuid.UUID) (*MusicBrainzTrack, error)
|
GetTrack(ctx context.Context, id uuid.UUID) (*MusicBrainzTrack, error)
|
||||||
GetReleaseGroup(ctx context.Context, id uuid.UUID) (*MusicBrainzReleaseGroup, error)
|
GetReleaseGroup(ctx context.Context, id uuid.UUID) (*MusicBrainzReleaseGroup, error)
|
||||||
GetRelease(ctx context.Context, id uuid.UUID) (*MusicBrainzRelease, error)
|
GetRelease(ctx context.Context, id uuid.UUID) (*MusicBrainzRelease, error)
|
||||||
|
SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error)
|
||||||
Shutdown()
|
Shutdown()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,7 @@ type MbzMockCaller struct {
|
||||||
ReleaseGroups map[uuid.UUID]*MusicBrainzReleaseGroup
|
ReleaseGroups map[uuid.UUID]*MusicBrainzReleaseGroup
|
||||||
Releases map[uuid.UUID]*MusicBrainzRelease
|
Releases map[uuid.UUID]*MusicBrainzRelease
|
||||||
Tracks map[uuid.UUID]*MusicBrainzTrack
|
Tracks map[uuid.UUID]*MusicBrainzTrack
|
||||||
|
SearchResults map[string]*MusicBrainzSearchResult
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *MbzMockCaller) GetReleaseGroup(ctx context.Context, id uuid.UUID) (*MusicBrainzReleaseGroup, error) {
|
func (m *MbzMockCaller) GetReleaseGroup(ctx context.Context, id uuid.UUID) (*MusicBrainzReleaseGroup, error) {
|
||||||
|
|
@ -70,6 +71,14 @@ func (m *MbzMockCaller) GetArtistPrimaryAliases(ctx context.Context, id uuid.UUI
|
||||||
return ss, nil
|
return ss, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *MbzMockCaller) SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) {
|
||||||
|
key := artist + "\x00" + track
|
||||||
|
if result, exists := m.SearchResults[key]; exists {
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (m *MbzMockCaller) Shutdown() {}
|
func (m *MbzMockCaller) Shutdown() {}
|
||||||
|
|
||||||
type MbzErrorCaller struct{}
|
type MbzErrorCaller struct{}
|
||||||
|
|
@ -94,4 +103,8 @@ func (m *MbzErrorCaller) GetArtistPrimaryAliases(ctx context.Context, id uuid.UU
|
||||||
return nil, fmt.Errorf("error: GetArtistPrimaryAliases not implemented")
|
return nil, fmt.Errorf("error: GetArtistPrimaryAliases not implemented")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *MbzErrorCaller) SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) {
|
||||||
|
return nil, fmt.Errorf("error: SearchRecording not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
func (m *MbzErrorCaller) Shutdown() {}
|
func (m *MbzErrorCaller) Shutdown() {}
|
||||||
|
|
|
||||||
183
internal/mbz/search.go
Normal file
183
internal/mbz/search.go
Normal file
|
|
@ -0,0 +1,183 @@
|
||||||
|
package mbz
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/gabehf/koito/internal/logger"
|
||||||
|
"github.com/google/uuid"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MusicBrainz search API response types
|
||||||
|
|
||||||
|
type musicBrainzSearchResponse struct {
|
||||||
|
Recordings []musicBrainzSearchRecording `json:"recordings"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type musicBrainzSearchRecording struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Length int `json:"length"`
|
||||||
|
ArtistCredit []MusicBrainzArtistCredit `json:"artist-credit"`
|
||||||
|
Releases []musicBrainzSearchRelease `json:"releases"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type musicBrainzSearchRelease struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
Title string `json:"title"`
|
||||||
|
Status string `json:"status"`
|
||||||
|
ReleaseGroup struct {
|
||||||
|
ID string `json:"id"`
|
||||||
|
} `json:"release-group"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// MusicBrainzSearchResult holds the resolved IDs from a search-by-name query.
|
||||||
|
type MusicBrainzSearchResult struct {
|
||||||
|
RecordingID uuid.UUID
|
||||||
|
ReleaseID uuid.UUID
|
||||||
|
ReleaseGroupID uuid.UUID
|
||||||
|
ReleaseTitle string
|
||||||
|
DurationMs int
|
||||||
|
}
|
||||||
|
|
||||||
|
// SearchRecording searches MusicBrainz for a recording by artist and track name.
|
||||||
|
// It returns the best match that passes a confidence filter (case-insensitive exact
|
||||||
|
// match on title and at least one artist credit), or nil if no confident match is found.
|
||||||
|
func (c *MusicBrainzClient) SearchRecording(ctx context.Context, artist string, track string) (*MusicBrainzSearchResult, error) {
|
||||||
|
l := logger.FromContext(ctx)
|
||||||
|
|
||||||
|
query := fmt.Sprintf("artist:\"%s\" AND recording:\"%s\"",
|
||||||
|
escapeLucene(artist), escapeLucene(track))
|
||||||
|
url := fmt.Sprintf("%s/ws/2/recording/?query=%s&limit=5&fmt=json",
|
||||||
|
c.url, queryEscape(query))
|
||||||
|
|
||||||
|
req, err := http.NewRequest("GET", url, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("SearchRecording: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := c.queue(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("SearchRecording: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var resp musicBrainzSearchResponse
|
||||||
|
if err := json.Unmarshal(body, &resp); err != nil {
|
||||||
|
l.Err(err).Str("body", string(body)).Msg("Failed to unmarshal MusicBrainz search response")
|
||||||
|
return nil, fmt.Errorf("SearchRecording: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, rec := range resp.Recordings {
|
||||||
|
if !strings.EqualFold(rec.Title, track) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !hasArtistCredit(rec.ArtistCredit, artist) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
recordingID, err := uuid.Parse(rec.ID)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
release := pickBestRelease(rec.Releases)
|
||||||
|
if release == nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
releaseID, err := uuid.Parse(release.ID)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
releaseGroupID, err := uuid.Parse(release.ReleaseGroup.ID)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
l.Debug().Msgf("MBZ search matched: '%s' by '%s' -> recording=%s release=%s",
|
||||||
|
track, artist, recordingID, releaseID)
|
||||||
|
|
||||||
|
return &MusicBrainzSearchResult{
|
||||||
|
RecordingID: recordingID,
|
||||||
|
ReleaseID: releaseID,
|
||||||
|
ReleaseGroupID: releaseGroupID,
|
||||||
|
ReleaseTitle: release.Title,
|
||||||
|
DurationMs: rec.Length,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
l.Debug().Msgf("MBZ search: no confident match for '%s' by '%s'", track, artist)
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// hasArtistCredit checks whether at least one artist credit name matches (case-insensitive).
|
||||||
|
func hasArtistCredit(credits []MusicBrainzArtistCredit, artist string) bool {
|
||||||
|
for _, ac := range credits {
|
||||||
|
if strings.EqualFold(ac.Name, artist) || strings.EqualFold(ac.Artist.Name, artist) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// pickBestRelease prefers an Official release, then falls back to the first available.
|
||||||
|
func pickBestRelease(releases []musicBrainzSearchRelease) *musicBrainzSearchRelease {
|
||||||
|
if len(releases) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
for i := range releases {
|
||||||
|
if releases[i].Status == "Official" {
|
||||||
|
return &releases[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &releases[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
// escapeLucene escapes special characters in Lucene query syntax.
|
||||||
|
func escapeLucene(s string) string {
|
||||||
|
replacer := strings.NewReplacer(
|
||||||
|
`\`, `\\`,
|
||||||
|
`+`, `\+`,
|
||||||
|
`-`, `\-`,
|
||||||
|
`!`, `\!`,
|
||||||
|
`(`, `\(`,
|
||||||
|
`)`, `\)`,
|
||||||
|
`{`, `\{`,
|
||||||
|
`}`, `\}`,
|
||||||
|
`[`, `\[`,
|
||||||
|
`]`, `\]`,
|
||||||
|
`^`, `\^`,
|
||||||
|
`"`, `\"`,
|
||||||
|
`~`, `\~`,
|
||||||
|
`*`, `\*`,
|
||||||
|
`?`, `\?`,
|
||||||
|
`:`, `\:`,
|
||||||
|
`/`, `\/`,
|
||||||
|
)
|
||||||
|
return replacer.Replace(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
// queryEscape percent-encodes a query string value for use in a URL.
|
||||||
|
// We use a simple implementation to avoid importing net/url just for this.
|
||||||
|
func queryEscape(s string) string {
|
||||||
|
var b strings.Builder
|
||||||
|
for i := 0; i < len(s); i++ {
|
||||||
|
c := s[i]
|
||||||
|
if isUnreserved(c) {
|
||||||
|
b.WriteByte(c)
|
||||||
|
} else {
|
||||||
|
fmt.Fprintf(&b, "%%%02X", c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func isUnreserved(c byte) bool {
|
||||||
|
return (c >= 'A' && c <= 'Z') ||
|
||||||
|
(c >= 'a' && c <= 'z') ||
|
||||||
|
(c >= '0' && c <= '9') ||
|
||||||
|
c == '-' || c == '_' || c == '.' || c == '~'
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue