diff --git a/docs/src/content/docs/reference/configuration.md b/docs/src/content/docs/reference/configuration.md index bf9437a..e22398f 100644 --- a/docs/src/content/docs/reference/configuration.md +++ b/docs/src/content/docs/reference/configuration.md @@ -40,6 +40,9 @@ If the environment variable is defined without **and** with the suffix at the sa ##### KOITO_LOG_LEVEL - Default: `info` - Description: One of `debug | info | warn | error | fatal` +##### KOITO_ARTIST_SEPARATORS_REGEX +- Default: `\s+·\s+` +- Description: The list of regex patterns Koito will use to separate artist strings, separated by two semicolons (`;;`). ##### KOITO_MUSICBRAINZ_URL - Default: `https://musicbrainz.org` - Description: The URL Koito will use to contact MusicBrainz. Replace this value if you have your own MusicBrainz mirror. diff --git a/internal/catalog/associate_artists.go b/internal/catalog/associate_artists.go index 232cac7..6387d4b 100644 --- a/internal/catalog/associate_artists.go +++ b/internal/catalog/associate_artists.go @@ -62,7 +62,7 @@ func AssociateArtists(ctx context.Context, d db.DB, opts AssociateArtistsOpts) ( } if len(result) < 1 { - allArtists := slices.Concat(opts.ArtistNames, ParseArtists(opts.ArtistName, opts.TrackTitle)) + allArtists := slices.Concat(opts.ArtistNames, ParseArtists(opts.ArtistName, opts.TrackTitle, cfg.ArtistSeparators())) l.Debug().Msgf("Associating artists by artist name(s) %v and track title '%s'", allArtists, opts.TrackTitle) fallbackMatches, err := matchArtistsByNames(ctx, allArtists, nil, d, opts) if err != nil { @@ -180,7 +180,7 @@ func matchArtistsByMBID(ctx context.Context, d db.DB, opts AssociateArtistsOpts, } if len(opts.ArtistNames) < 1 { - opts.ArtistNames = slices.Concat(opts.ArtistNames, ParseArtists(opts.ArtistName, opts.TrackTitle)) + opts.ArtistNames = slices.Concat(opts.ArtistNames, ParseArtists(opts.ArtistName, opts.TrackTitle, cfg.ArtistSeparators())) } a, err = resolveAliasOrCreateArtist(ctx, id, opts.ArtistNames, d, opts) diff --git a/internal/catalog/catalog.go b/internal/catalog/catalog.go index 44cf235..e94db03 100644 --- a/internal/catalog/catalog.go +++ b/internal/catalog/catalog.go @@ -201,21 +201,18 @@ func buildArtistStr(artists []*models.Artist) string { var ( // Bracketed feat patterns bracketFeatPatterns = []*regexp.Regexp{ - regexp.MustCompile(`(?i)\(feat\. ([^)]*)\)`), - regexp.MustCompile(`(?i)\[feat\. ([^\]]*)\]`), + regexp.MustCompile(`(?i)\([fF]eat\. ([^)]*)\)`), + regexp.MustCompile(`(?i)\[[fF]eat\. ([^\]]*)\]`), } // Inline feat (not in brackets) - inlineFeatPattern = regexp.MustCompile(`(?i)feat\. ([^()\[\]]+)$`) + inlineFeatPattern = regexp.MustCompile(`(?i)[fF]eat\. ([^()\[\]]+)$`) // Delimiters only used inside feat. sections featSplitDelimiters = regexp.MustCompile(`(?i)\s*(?:,|&|and|·)\s*`) - - // Delimiter for separating artists in main string (rare but real usage) - mainArtistDotSplitter = regexp.MustCompile(`\s+·\s+`) ) // ParseArtists extracts all contributing artist names from the artist and title strings -func ParseArtists(artist string, title string) []string { +func ParseArtists(artist string, title string, addlSeparators []*regexp.Regexp) []string { seen := make(map[string]struct{}) var out []string @@ -230,12 +227,9 @@ func ParseArtists(artist string, title string) []string { } } - foundFeat := false - // Extract bracketed features from artist for _, re := range bracketFeatPatterns { if matches := re.FindStringSubmatch(artist); matches != nil { - foundFeat = true artist = strings.Replace(artist, matches[0], "", 1) for _, name := range featSplitDelimiters.Split(matches[1], -1) { add(name) @@ -244,7 +238,6 @@ func ParseArtists(artist string, title string) []string { } // Extract inline feat. from artist if matches := inlineFeatPattern.FindStringSubmatch(artist); matches != nil { - foundFeat = true artist = strings.Replace(artist, matches[0], "", 1) for _, name := range featSplitDelimiters.Split(matches[1], -1) { add(name) @@ -252,14 +245,19 @@ func ParseArtists(artist string, title string) []string { } // Add base artist(s) - if foundFeat { - add(strings.TrimSpace(artist)) - } else { - // Only split on " · " in base artist string - for _, name := range mainArtistDotSplitter.Split(artist, -1) { + l1 := len(out) + for _, re := range addlSeparators { + for _, name := range re.Split(artist, -1) { + if name == artist { + continue + } add(name) } } + // Only add the full artist string if no splitters were matched + if l1 == len(out) { + add(artist) + } // Extract features from title for _, re := range bracketFeatPatterns { diff --git a/internal/catalog/catalog_test.go b/internal/catalog/catalog_test.go index e50435c..c56ba47 100644 --- a/internal/catalog/catalog_test.go +++ b/internal/catalog/catalog_test.go @@ -5,6 +5,7 @@ import ( "fmt" "log" "os" + "regexp" "testing" "time" @@ -167,15 +168,15 @@ func getTestGetenv(resource *dockertest.Resource) func(string) string { func truncateTestData(t *testing.T) { err := store.Exec(context.Background(), - `TRUNCATE - artists, + `TRUNCATE + artists, artist_aliases, - tracks, - artist_tracks, - releases, - artist_releases, + tracks, + artist_tracks, + releases, + artist_releases, release_aliases, - listens + listens RESTART IDENTITY CASCADE`) require.NoError(t, err) } @@ -184,23 +185,23 @@ func setupTestDataWithMbzIDs(t *testing.T) { truncateTestData(t) err := store.Exec(context.Background(), - `INSERT INTO artists (musicbrainz_id) + `INSERT INTO artists (musicbrainz_id) VALUES ('00000000-0000-0000-0000-000000000001')`) require.NoError(t, err) err = store.Exec(context.Background(), - `INSERT INTO artist_aliases (artist_id, alias, source, is_primary) + `INSERT INTO artist_aliases (artist_id, alias, source, is_primary) VALUES (1, 'ATARASHII GAKKO!', 'Testing', true)`) require.NoError(t, err) err = store.Exec(context.Background(), - `INSERT INTO releases (musicbrainz_id) + `INSERT INTO releases (musicbrainz_id) VALUES ('00000000-0000-0000-0000-000000000101')`) require.NoError(t, err) err = store.Exec(context.Background(), - `INSERT INTO release_aliases (release_id, alias, source, is_primary) + `INSERT INTO release_aliases (release_id, alias, source, is_primary) VALUES (1, 'AG! Calling', 'Testing', true)`) require.NoError(t, err) err = store.Exec(context.Background(), - `INSERT INTO artist_releases (artist_id, release_id) + `INSERT INTO artist_releases (artist_id, release_id) VALUES (1, 1)`) require.NoError(t, err) err = store.Exec(context.Background(), @@ -221,23 +222,23 @@ func setupTestDataSansMbzIDs(t *testing.T) { truncateTestData(t) err := store.Exec(context.Background(), - `INSERT INTO artists (musicbrainz_id) + `INSERT INTO artists (musicbrainz_id) VALUES (NULL)`) require.NoError(t, err) err = store.Exec(context.Background(), - `INSERT INTO artist_aliases (artist_id, alias, source, is_primary) + `INSERT INTO artist_aliases (artist_id, alias, source, is_primary) VALUES (1, 'ATARASHII GAKKO!', 'Testing', true)`) require.NoError(t, err) err = store.Exec(context.Background(), - `INSERT INTO releases (musicbrainz_id) + `INSERT INTO releases (musicbrainz_id) VALUES (NULL)`) require.NoError(t, err) err = store.Exec(context.Background(), - `INSERT INTO release_aliases (release_id, alias, source, is_primary) + `INSERT INTO release_aliases (release_id, alias, source, is_primary) VALUES (1, 'AG! Calling', 'Testing', true)`) require.NoError(t, err) err = store.Exec(context.Background(), - `INSERT INTO artist_releases (artist_id, release_id) + `INSERT INTO artist_releases (artist_id, release_id) VALUES (1, 1)`) require.NoError(t, err) err = store.Exec(context.Background(), @@ -358,10 +359,16 @@ func TestArtistStringParse(t *testing.T) { // artists in both {"Daft Punk feat. Julian Casablancas", "Instant Crush (feat. Julian Casablancas)"}: {"Daft Punk", "Julian Casablancas"}, {"Paramore (feat. Joy Williams)", "Hate to See Your Heart Break feat. Joy Williams"}: {"Paramore", "Joy Williams"}, + {"MINSU", "오해 금지 (Feat. BIG Naughty)"}: {"MINSU", "BIG Naughty"}, + {"MINSU", "오해 금지 [Feat. BIG Naughty]"}: {"MINSU", "BIG Naughty"}, + {"MINSU", "오해 금지 Feat. BIG Naughty"}: {"MINSU", "BIG Naughty"}, + + // custom separator + {"MIMiNARI//楠木ともり", "眠れない"}: {"MIMiNARI", "楠木ともり"}, } for in, out := range cases { - artists := catalog.ParseArtists(in.Name, in.Title) + artists := catalog.ParseArtists(in.Name, in.Title, []*regexp.Regexp{regexp.MustCompile(`\s*//\s*`), regexp.MustCompile(`\s+·\s+`)}) assert.ElementsMatch(t, out, artists) } } diff --git a/internal/cfg/cfg.go b/internal/cfg/cfg.go index b5d945e..8f40a36 100644 --- a/internal/cfg/cfg.go +++ b/internal/cfg/cfg.go @@ -3,6 +3,7 @@ package cfg import ( "errors" "fmt" + "regexp" "strconv" "strings" "sync" @@ -45,6 +46,7 @@ const ( IMPORT_BEFORE_UNIX_ENV = "KOITO_IMPORT_BEFORE_UNIX" IMPORT_AFTER_UNIX_ENV = "KOITO_IMPORT_AFTER_UNIX" FETCH_IMAGES_DURING_IMPORT_ENV = "KOITO_FETCH_IMAGES_DURING_IMPORT" + ARTIST_SEPARATORS_ENV = "KOITO_ARTIST_SEPARATORS_REGEX" ) type config struct { @@ -80,6 +82,7 @@ type config struct { userAgent string importBefore time.Time importAfter time.Time + artistSeparators []*regexp.Regexp } var ( @@ -189,6 +192,18 @@ func loadConfig(getenv func(string) string, version string) (*config, error) { rawCors := getenv(CORS_ORIGINS_ENV) cfg.allowedOrigins = strings.Split(rawCors, ",") + if getenv(ARTIST_SEPARATORS_ENV) != "" { + for pattern := range strings.SplitSeq(getenv(ARTIST_SEPARATORS_ENV), ";;") { + regex, err := regexp.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("failed to compile regex pattern %s", pattern) + } + cfg.artistSeparators = append(cfg.artistSeparators, regex) + } + } else { + cfg.artistSeparators = []*regexp.Regexp{regexp.MustCompile(`\s+·\s+`)} + } + switch strings.ToLower(getenv(LOG_LEVEL_ENV)) { case "debug": cfg.logLevel = 0 @@ -388,3 +403,9 @@ func FetchImagesDuringImport() bool { defer lock.RUnlock() return globalConfig.fetchImageDuringImport } + +func ArtistSeparators() []*regexp.Regexp { + lock.RLock() + defer lock.RUnlock() + return globalConfig.artistSeparators +}