mirror of
https://github.com/gabehf/Koito.git
synced 2026-03-11 00:10:38 -07:00
feat: custom artist separator regex
This commit is contained in:
parent
164a9dc56f
commit
acb362e6ad
5 changed files with 65 additions and 36 deletions
|
|
@ -40,6 +40,9 @@ If the environment variable is defined without **and** with the suffix at the sa
|
|||
##### KOITO_LOG_LEVEL
|
||||
- Default: `info`
|
||||
- Description: One of `debug | info | warn | error | fatal`
|
||||
##### KOITO_ARTIST_SEPARATORS_REGEX
|
||||
- Default: `\s+·\s+`
|
||||
- Description: The list of regex patterns Koito will use to separate artist strings, separated by two semicolons (`;;`).
|
||||
##### KOITO_MUSICBRAINZ_URL
|
||||
- Default: `https://musicbrainz.org`
|
||||
- Description: The URL Koito will use to contact MusicBrainz. Replace this value if you have your own MusicBrainz mirror.
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ func AssociateArtists(ctx context.Context, d db.DB, opts AssociateArtistsOpts) (
|
|||
}
|
||||
|
||||
if len(result) < 1 {
|
||||
allArtists := slices.Concat(opts.ArtistNames, ParseArtists(opts.ArtistName, opts.TrackTitle))
|
||||
allArtists := slices.Concat(opts.ArtistNames, ParseArtists(opts.ArtistName, opts.TrackTitle, cfg.ArtistSeparators()))
|
||||
l.Debug().Msgf("Associating artists by artist name(s) %v and track title '%s'", allArtists, opts.TrackTitle)
|
||||
fallbackMatches, err := matchArtistsByNames(ctx, allArtists, nil, d, opts)
|
||||
if err != nil {
|
||||
|
|
@ -180,7 +180,7 @@ func matchArtistsByMBID(ctx context.Context, d db.DB, opts AssociateArtistsOpts,
|
|||
}
|
||||
|
||||
if len(opts.ArtistNames) < 1 {
|
||||
opts.ArtistNames = slices.Concat(opts.ArtistNames, ParseArtists(opts.ArtistName, opts.TrackTitle))
|
||||
opts.ArtistNames = slices.Concat(opts.ArtistNames, ParseArtists(opts.ArtistName, opts.TrackTitle, cfg.ArtistSeparators()))
|
||||
}
|
||||
|
||||
a, err = resolveAliasOrCreateArtist(ctx, id, opts.ArtistNames, d, opts)
|
||||
|
|
|
|||
|
|
@ -201,21 +201,18 @@ func buildArtistStr(artists []*models.Artist) string {
|
|||
var (
|
||||
// Bracketed feat patterns
|
||||
bracketFeatPatterns = []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?i)\(feat\. ([^)]*)\)`),
|
||||
regexp.MustCompile(`(?i)\[feat\. ([^\]]*)\]`),
|
||||
regexp.MustCompile(`(?i)\([fF]eat\. ([^)]*)\)`),
|
||||
regexp.MustCompile(`(?i)\[[fF]eat\. ([^\]]*)\]`),
|
||||
}
|
||||
// Inline feat (not in brackets)
|
||||
inlineFeatPattern = regexp.MustCompile(`(?i)feat\. ([^()\[\]]+)$`)
|
||||
inlineFeatPattern = regexp.MustCompile(`(?i)[fF]eat\. ([^()\[\]]+)$`)
|
||||
|
||||
// Delimiters only used inside feat. sections
|
||||
featSplitDelimiters = regexp.MustCompile(`(?i)\s*(?:,|&|and|·)\s*`)
|
||||
|
||||
// Delimiter for separating artists in main string (rare but real usage)
|
||||
mainArtistDotSplitter = regexp.MustCompile(`\s+·\s+`)
|
||||
)
|
||||
|
||||
// ParseArtists extracts all contributing artist names from the artist and title strings
|
||||
func ParseArtists(artist string, title string) []string {
|
||||
func ParseArtists(artist string, title string, addlSeparators []*regexp.Regexp) []string {
|
||||
seen := make(map[string]struct{})
|
||||
var out []string
|
||||
|
||||
|
|
@ -230,12 +227,9 @@ func ParseArtists(artist string, title string) []string {
|
|||
}
|
||||
}
|
||||
|
||||
foundFeat := false
|
||||
|
||||
// Extract bracketed features from artist
|
||||
for _, re := range bracketFeatPatterns {
|
||||
if matches := re.FindStringSubmatch(artist); matches != nil {
|
||||
foundFeat = true
|
||||
artist = strings.Replace(artist, matches[0], "", 1)
|
||||
for _, name := range featSplitDelimiters.Split(matches[1], -1) {
|
||||
add(name)
|
||||
|
|
@ -244,7 +238,6 @@ func ParseArtists(artist string, title string) []string {
|
|||
}
|
||||
// Extract inline feat. from artist
|
||||
if matches := inlineFeatPattern.FindStringSubmatch(artist); matches != nil {
|
||||
foundFeat = true
|
||||
artist = strings.Replace(artist, matches[0], "", 1)
|
||||
for _, name := range featSplitDelimiters.Split(matches[1], -1) {
|
||||
add(name)
|
||||
|
|
@ -252,14 +245,19 @@ func ParseArtists(artist string, title string) []string {
|
|||
}
|
||||
|
||||
// Add base artist(s)
|
||||
if foundFeat {
|
||||
add(strings.TrimSpace(artist))
|
||||
} else {
|
||||
// Only split on " · " in base artist string
|
||||
for _, name := range mainArtistDotSplitter.Split(artist, -1) {
|
||||
l1 := len(out)
|
||||
for _, re := range addlSeparators {
|
||||
for _, name := range re.Split(artist, -1) {
|
||||
if name == artist {
|
||||
continue
|
||||
}
|
||||
add(name)
|
||||
}
|
||||
}
|
||||
// Only add the full artist string if no splitters were matched
|
||||
if l1 == len(out) {
|
||||
add(artist)
|
||||
}
|
||||
|
||||
// Extract features from title
|
||||
for _, re := range bracketFeatPatterns {
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import (
|
|||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
|
|
@ -167,15 +168,15 @@ func getTestGetenv(resource *dockertest.Resource) func(string) string {
|
|||
|
||||
func truncateTestData(t *testing.T) {
|
||||
err := store.Exec(context.Background(),
|
||||
`TRUNCATE
|
||||
artists,
|
||||
`TRUNCATE
|
||||
artists,
|
||||
artist_aliases,
|
||||
tracks,
|
||||
artist_tracks,
|
||||
releases,
|
||||
artist_releases,
|
||||
tracks,
|
||||
artist_tracks,
|
||||
releases,
|
||||
artist_releases,
|
||||
release_aliases,
|
||||
listens
|
||||
listens
|
||||
RESTART IDENTITY CASCADE`)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
|
@ -184,23 +185,23 @@ func setupTestDataWithMbzIDs(t *testing.T) {
|
|||
truncateTestData(t)
|
||||
|
||||
err := store.Exec(context.Background(),
|
||||
`INSERT INTO artists (musicbrainz_id)
|
||||
`INSERT INTO artists (musicbrainz_id)
|
||||
VALUES ('00000000-0000-0000-0000-000000000001')`)
|
||||
require.NoError(t, err)
|
||||
err = store.Exec(context.Background(),
|
||||
`INSERT INTO artist_aliases (artist_id, alias, source, is_primary)
|
||||
`INSERT INTO artist_aliases (artist_id, alias, source, is_primary)
|
||||
VALUES (1, 'ATARASHII GAKKO!', 'Testing', true)`)
|
||||
require.NoError(t, err)
|
||||
err = store.Exec(context.Background(),
|
||||
`INSERT INTO releases (musicbrainz_id)
|
||||
`INSERT INTO releases (musicbrainz_id)
|
||||
VALUES ('00000000-0000-0000-0000-000000000101')`)
|
||||
require.NoError(t, err)
|
||||
err = store.Exec(context.Background(),
|
||||
`INSERT INTO release_aliases (release_id, alias, source, is_primary)
|
||||
`INSERT INTO release_aliases (release_id, alias, source, is_primary)
|
||||
VALUES (1, 'AG! Calling', 'Testing', true)`)
|
||||
require.NoError(t, err)
|
||||
err = store.Exec(context.Background(),
|
||||
`INSERT INTO artist_releases (artist_id, release_id)
|
||||
`INSERT INTO artist_releases (artist_id, release_id)
|
||||
VALUES (1, 1)`)
|
||||
require.NoError(t, err)
|
||||
err = store.Exec(context.Background(),
|
||||
|
|
@ -221,23 +222,23 @@ func setupTestDataSansMbzIDs(t *testing.T) {
|
|||
truncateTestData(t)
|
||||
|
||||
err := store.Exec(context.Background(),
|
||||
`INSERT INTO artists (musicbrainz_id)
|
||||
`INSERT INTO artists (musicbrainz_id)
|
||||
VALUES (NULL)`)
|
||||
require.NoError(t, err)
|
||||
err = store.Exec(context.Background(),
|
||||
`INSERT INTO artist_aliases (artist_id, alias, source, is_primary)
|
||||
`INSERT INTO artist_aliases (artist_id, alias, source, is_primary)
|
||||
VALUES (1, 'ATARASHII GAKKO!', 'Testing', true)`)
|
||||
require.NoError(t, err)
|
||||
err = store.Exec(context.Background(),
|
||||
`INSERT INTO releases (musicbrainz_id)
|
||||
`INSERT INTO releases (musicbrainz_id)
|
||||
VALUES (NULL)`)
|
||||
require.NoError(t, err)
|
||||
err = store.Exec(context.Background(),
|
||||
`INSERT INTO release_aliases (release_id, alias, source, is_primary)
|
||||
`INSERT INTO release_aliases (release_id, alias, source, is_primary)
|
||||
VALUES (1, 'AG! Calling', 'Testing', true)`)
|
||||
require.NoError(t, err)
|
||||
err = store.Exec(context.Background(),
|
||||
`INSERT INTO artist_releases (artist_id, release_id)
|
||||
`INSERT INTO artist_releases (artist_id, release_id)
|
||||
VALUES (1, 1)`)
|
||||
require.NoError(t, err)
|
||||
err = store.Exec(context.Background(),
|
||||
|
|
@ -358,10 +359,16 @@ func TestArtistStringParse(t *testing.T) {
|
|||
// artists in both
|
||||
{"Daft Punk feat. Julian Casablancas", "Instant Crush (feat. Julian Casablancas)"}: {"Daft Punk", "Julian Casablancas"},
|
||||
{"Paramore (feat. Joy Williams)", "Hate to See Your Heart Break feat. Joy Williams"}: {"Paramore", "Joy Williams"},
|
||||
{"MINSU", "오해 금지 (Feat. BIG Naughty)"}: {"MINSU", "BIG Naughty"},
|
||||
{"MINSU", "오해 금지 [Feat. BIG Naughty]"}: {"MINSU", "BIG Naughty"},
|
||||
{"MINSU", "오해 금지 Feat. BIG Naughty"}: {"MINSU", "BIG Naughty"},
|
||||
|
||||
// custom separator
|
||||
{"MIMiNARI//楠木ともり", "眠れない"}: {"MIMiNARI", "楠木ともり"},
|
||||
}
|
||||
|
||||
for in, out := range cases {
|
||||
artists := catalog.ParseArtists(in.Name, in.Title)
|
||||
artists := catalog.ParseArtists(in.Name, in.Title, []*regexp.Regexp{regexp.MustCompile(`\s*//\s*`), regexp.MustCompile(`\s+·\s+`)})
|
||||
assert.ElementsMatch(t, out, artists)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package cfg
|
|||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
|
@ -45,6 +46,7 @@ const (
|
|||
IMPORT_BEFORE_UNIX_ENV = "KOITO_IMPORT_BEFORE_UNIX"
|
||||
IMPORT_AFTER_UNIX_ENV = "KOITO_IMPORT_AFTER_UNIX"
|
||||
FETCH_IMAGES_DURING_IMPORT_ENV = "KOITO_FETCH_IMAGES_DURING_IMPORT"
|
||||
ARTIST_SEPARATORS_ENV = "KOITO_ARTIST_SEPARATORS_REGEX"
|
||||
)
|
||||
|
||||
type config struct {
|
||||
|
|
@ -80,6 +82,7 @@ type config struct {
|
|||
userAgent string
|
||||
importBefore time.Time
|
||||
importAfter time.Time
|
||||
artistSeparators []*regexp.Regexp
|
||||
}
|
||||
|
||||
var (
|
||||
|
|
@ -189,6 +192,18 @@ func loadConfig(getenv func(string) string, version string) (*config, error) {
|
|||
rawCors := getenv(CORS_ORIGINS_ENV)
|
||||
cfg.allowedOrigins = strings.Split(rawCors, ",")
|
||||
|
||||
if getenv(ARTIST_SEPARATORS_ENV) != "" {
|
||||
for pattern := range strings.SplitSeq(getenv(ARTIST_SEPARATORS_ENV), ";;") {
|
||||
regex, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to compile regex pattern %s", pattern)
|
||||
}
|
||||
cfg.artistSeparators = append(cfg.artistSeparators, regex)
|
||||
}
|
||||
} else {
|
||||
cfg.artistSeparators = []*regexp.Regexp{regexp.MustCompile(`\s+·\s+`)}
|
||||
}
|
||||
|
||||
switch strings.ToLower(getenv(LOG_LEVEL_ENV)) {
|
||||
case "debug":
|
||||
cfg.logLevel = 0
|
||||
|
|
@ -388,3 +403,9 @@ func FetchImagesDuringImport() bool {
|
|||
defer lock.RUnlock()
|
||||
return globalConfig.fetchImageDuringImport
|
||||
}
|
||||
|
||||
func ArtistSeparators() []*regexp.Regexp {
|
||||
lock.RLock()
|
||||
defer lock.RUnlock()
|
||||
return globalConfig.artistSeparators
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue