mirror of
https://github.com/gabehf/Koito.git
synced 2026-03-15 10:25:55 -07:00
feat: custom artist separator regex (#95)
This commit is contained in:
parent
164a9dc56f
commit
56ffe0a041
5 changed files with 65 additions and 36 deletions
|
|
@ -40,6 +40,9 @@ If the environment variable is defined without **and** with the suffix at the sa
|
||||||
##### KOITO_LOG_LEVEL
|
##### KOITO_LOG_LEVEL
|
||||||
- Default: `info`
|
- Default: `info`
|
||||||
- Description: One of `debug | info | warn | error | fatal`
|
- Description: One of `debug | info | warn | error | fatal`
|
||||||
|
##### KOITO_ARTIST_SEPARATORS_REGEX
|
||||||
|
- Default: `\s+·\s+`
|
||||||
|
- Description: The list of regex patterns Koito will use to separate artist strings, separated by two semicolons (`;;`).
|
||||||
##### KOITO_MUSICBRAINZ_URL
|
##### KOITO_MUSICBRAINZ_URL
|
||||||
- Default: `https://musicbrainz.org`
|
- Default: `https://musicbrainz.org`
|
||||||
- Description: The URL Koito will use to contact MusicBrainz. Replace this value if you have your own MusicBrainz mirror.
|
- Description: The URL Koito will use to contact MusicBrainz. Replace this value if you have your own MusicBrainz mirror.
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,7 @@ func AssociateArtists(ctx context.Context, d db.DB, opts AssociateArtistsOpts) (
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(result) < 1 {
|
if len(result) < 1 {
|
||||||
allArtists := slices.Concat(opts.ArtistNames, ParseArtists(opts.ArtistName, opts.TrackTitle))
|
allArtists := slices.Concat(opts.ArtistNames, ParseArtists(opts.ArtistName, opts.TrackTitle, cfg.ArtistSeparators()))
|
||||||
l.Debug().Msgf("Associating artists by artist name(s) %v and track title '%s'", allArtists, opts.TrackTitle)
|
l.Debug().Msgf("Associating artists by artist name(s) %v and track title '%s'", allArtists, opts.TrackTitle)
|
||||||
fallbackMatches, err := matchArtistsByNames(ctx, allArtists, nil, d, opts)
|
fallbackMatches, err := matchArtistsByNames(ctx, allArtists, nil, d, opts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -180,7 +180,7 @@ func matchArtistsByMBID(ctx context.Context, d db.DB, opts AssociateArtistsOpts,
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(opts.ArtistNames) < 1 {
|
if len(opts.ArtistNames) < 1 {
|
||||||
opts.ArtistNames = slices.Concat(opts.ArtistNames, ParseArtists(opts.ArtistName, opts.TrackTitle))
|
opts.ArtistNames = slices.Concat(opts.ArtistNames, ParseArtists(opts.ArtistName, opts.TrackTitle, cfg.ArtistSeparators()))
|
||||||
}
|
}
|
||||||
|
|
||||||
a, err = resolveAliasOrCreateArtist(ctx, id, opts.ArtistNames, d, opts)
|
a, err = resolveAliasOrCreateArtist(ctx, id, opts.ArtistNames, d, opts)
|
||||||
|
|
|
||||||
|
|
@ -201,21 +201,18 @@ func buildArtistStr(artists []*models.Artist) string {
|
||||||
var (
|
var (
|
||||||
// Bracketed feat patterns
|
// Bracketed feat patterns
|
||||||
bracketFeatPatterns = []*regexp.Regexp{
|
bracketFeatPatterns = []*regexp.Regexp{
|
||||||
regexp.MustCompile(`(?i)\(feat\. ([^)]*)\)`),
|
regexp.MustCompile(`(?i)\([fF]eat\. ([^)]*)\)`),
|
||||||
regexp.MustCompile(`(?i)\[feat\. ([^\]]*)\]`),
|
regexp.MustCompile(`(?i)\[[fF]eat\. ([^\]]*)\]`),
|
||||||
}
|
}
|
||||||
// Inline feat (not in brackets)
|
// Inline feat (not in brackets)
|
||||||
inlineFeatPattern = regexp.MustCompile(`(?i)feat\. ([^()\[\]]+)$`)
|
inlineFeatPattern = regexp.MustCompile(`(?i)[fF]eat\. ([^()\[\]]+)$`)
|
||||||
|
|
||||||
// Delimiters only used inside feat. sections
|
// Delimiters only used inside feat. sections
|
||||||
featSplitDelimiters = regexp.MustCompile(`(?i)\s*(?:,|&|and|·)\s*`)
|
featSplitDelimiters = regexp.MustCompile(`(?i)\s*(?:,|&|and|·)\s*`)
|
||||||
|
|
||||||
// Delimiter for separating artists in main string (rare but real usage)
|
|
||||||
mainArtistDotSplitter = regexp.MustCompile(`\s+·\s+`)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// ParseArtists extracts all contributing artist names from the artist and title strings
|
// ParseArtists extracts all contributing artist names from the artist and title strings
|
||||||
func ParseArtists(artist string, title string) []string {
|
func ParseArtists(artist string, title string, addlSeparators []*regexp.Regexp) []string {
|
||||||
seen := make(map[string]struct{})
|
seen := make(map[string]struct{})
|
||||||
var out []string
|
var out []string
|
||||||
|
|
||||||
|
|
@ -230,12 +227,9 @@ func ParseArtists(artist string, title string) []string {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
foundFeat := false
|
|
||||||
|
|
||||||
// Extract bracketed features from artist
|
// Extract bracketed features from artist
|
||||||
for _, re := range bracketFeatPatterns {
|
for _, re := range bracketFeatPatterns {
|
||||||
if matches := re.FindStringSubmatch(artist); matches != nil {
|
if matches := re.FindStringSubmatch(artist); matches != nil {
|
||||||
foundFeat = true
|
|
||||||
artist = strings.Replace(artist, matches[0], "", 1)
|
artist = strings.Replace(artist, matches[0], "", 1)
|
||||||
for _, name := range featSplitDelimiters.Split(matches[1], -1) {
|
for _, name := range featSplitDelimiters.Split(matches[1], -1) {
|
||||||
add(name)
|
add(name)
|
||||||
|
|
@ -244,7 +238,6 @@ func ParseArtists(artist string, title string) []string {
|
||||||
}
|
}
|
||||||
// Extract inline feat. from artist
|
// Extract inline feat. from artist
|
||||||
if matches := inlineFeatPattern.FindStringSubmatch(artist); matches != nil {
|
if matches := inlineFeatPattern.FindStringSubmatch(artist); matches != nil {
|
||||||
foundFeat = true
|
|
||||||
artist = strings.Replace(artist, matches[0], "", 1)
|
artist = strings.Replace(artist, matches[0], "", 1)
|
||||||
for _, name := range featSplitDelimiters.Split(matches[1], -1) {
|
for _, name := range featSplitDelimiters.Split(matches[1], -1) {
|
||||||
add(name)
|
add(name)
|
||||||
|
|
@ -252,14 +245,19 @@ func ParseArtists(artist string, title string) []string {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add base artist(s)
|
// Add base artist(s)
|
||||||
if foundFeat {
|
l1 := len(out)
|
||||||
add(strings.TrimSpace(artist))
|
for _, re := range addlSeparators {
|
||||||
} else {
|
for _, name := range re.Split(artist, -1) {
|
||||||
// Only split on " · " in base artist string
|
if name == artist {
|
||||||
for _, name := range mainArtistDotSplitter.Split(artist, -1) {
|
continue
|
||||||
|
}
|
||||||
add(name)
|
add(name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Only add the full artist string if no splitters were matched
|
||||||
|
if l1 == len(out) {
|
||||||
|
add(artist)
|
||||||
|
}
|
||||||
|
|
||||||
// Extract features from title
|
// Extract features from title
|
||||||
for _, re := range bracketFeatPatterns {
|
for _, re := range bracketFeatPatterns {
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
"regexp"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
|
@ -358,10 +359,16 @@ func TestArtistStringParse(t *testing.T) {
|
||||||
// artists in both
|
// artists in both
|
||||||
{"Daft Punk feat. Julian Casablancas", "Instant Crush (feat. Julian Casablancas)"}: {"Daft Punk", "Julian Casablancas"},
|
{"Daft Punk feat. Julian Casablancas", "Instant Crush (feat. Julian Casablancas)"}: {"Daft Punk", "Julian Casablancas"},
|
||||||
{"Paramore (feat. Joy Williams)", "Hate to See Your Heart Break feat. Joy Williams"}: {"Paramore", "Joy Williams"},
|
{"Paramore (feat. Joy Williams)", "Hate to See Your Heart Break feat. Joy Williams"}: {"Paramore", "Joy Williams"},
|
||||||
|
{"MINSU", "오해 금지 (Feat. BIG Naughty)"}: {"MINSU", "BIG Naughty"},
|
||||||
|
{"MINSU", "오해 금지 [Feat. BIG Naughty]"}: {"MINSU", "BIG Naughty"},
|
||||||
|
{"MINSU", "오해 금지 Feat. BIG Naughty"}: {"MINSU", "BIG Naughty"},
|
||||||
|
|
||||||
|
// custom separator
|
||||||
|
{"MIMiNARI//楠木ともり", "眠れない"}: {"MIMiNARI", "楠木ともり"},
|
||||||
}
|
}
|
||||||
|
|
||||||
for in, out := range cases {
|
for in, out := range cases {
|
||||||
artists := catalog.ParseArtists(in.Name, in.Title)
|
artists := catalog.ParseArtists(in.Name, in.Title, []*regexp.Regexp{regexp.MustCompile(`\s*//\s*`), regexp.MustCompile(`\s+·\s+`)})
|
||||||
assert.ElementsMatch(t, out, artists)
|
assert.ElementsMatch(t, out, artists)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package cfg
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
|
@ -45,6 +46,7 @@ const (
|
||||||
IMPORT_BEFORE_UNIX_ENV = "KOITO_IMPORT_BEFORE_UNIX"
|
IMPORT_BEFORE_UNIX_ENV = "KOITO_IMPORT_BEFORE_UNIX"
|
||||||
IMPORT_AFTER_UNIX_ENV = "KOITO_IMPORT_AFTER_UNIX"
|
IMPORT_AFTER_UNIX_ENV = "KOITO_IMPORT_AFTER_UNIX"
|
||||||
FETCH_IMAGES_DURING_IMPORT_ENV = "KOITO_FETCH_IMAGES_DURING_IMPORT"
|
FETCH_IMAGES_DURING_IMPORT_ENV = "KOITO_FETCH_IMAGES_DURING_IMPORT"
|
||||||
|
ARTIST_SEPARATORS_ENV = "KOITO_ARTIST_SEPARATORS_REGEX"
|
||||||
)
|
)
|
||||||
|
|
||||||
type config struct {
|
type config struct {
|
||||||
|
|
@ -80,6 +82,7 @@ type config struct {
|
||||||
userAgent string
|
userAgent string
|
||||||
importBefore time.Time
|
importBefore time.Time
|
||||||
importAfter time.Time
|
importAfter time.Time
|
||||||
|
artistSeparators []*regexp.Regexp
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
@ -189,6 +192,18 @@ func loadConfig(getenv func(string) string, version string) (*config, error) {
|
||||||
rawCors := getenv(CORS_ORIGINS_ENV)
|
rawCors := getenv(CORS_ORIGINS_ENV)
|
||||||
cfg.allowedOrigins = strings.Split(rawCors, ",")
|
cfg.allowedOrigins = strings.Split(rawCors, ",")
|
||||||
|
|
||||||
|
if getenv(ARTIST_SEPARATORS_ENV) != "" {
|
||||||
|
for pattern := range strings.SplitSeq(getenv(ARTIST_SEPARATORS_ENV), ";;") {
|
||||||
|
regex, err := regexp.Compile(pattern)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to compile regex pattern %s", pattern)
|
||||||
|
}
|
||||||
|
cfg.artistSeparators = append(cfg.artistSeparators, regex)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cfg.artistSeparators = []*regexp.Regexp{regexp.MustCompile(`\s+·\s+`)}
|
||||||
|
}
|
||||||
|
|
||||||
switch strings.ToLower(getenv(LOG_LEVEL_ENV)) {
|
switch strings.ToLower(getenv(LOG_LEVEL_ENV)) {
|
||||||
case "debug":
|
case "debug":
|
||||||
cfg.logLevel = 0
|
cfg.logLevel = 0
|
||||||
|
|
@ -388,3 +403,9 @@ func FetchImagesDuringImport() bool {
|
||||||
defer lock.RUnlock()
|
defer lock.RUnlock()
|
||||||
return globalConfig.fetchImageDuringImport
|
return globalConfig.fetchImageDuringImport
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ArtistSeparators() []*regexp.Regexp {
|
||||||
|
lock.RLock()
|
||||||
|
defer lock.RUnlock()
|
||||||
|
return globalConfig.artistSeparators
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue