artist image refetching

This commit is contained in:
Gabe Farrell 2026-01-20 11:33:34 -05:00
parent 5e294b839c
commit ce4bcdd7da
13 changed files with 205 additions and 41 deletions

5
.env.example Normal file
View file

@ -0,0 +1,5 @@
KOITO_ALLOWED_HOSTS=*
KOITO_LOG_LEVEL=debug
KOITO_CONFIG_DIR=test_config_dir
KOITO_DATABASE_URL=postgres://postgres:secret@localhost:5432?sslmode=disable
TZ=Etc/UTC

1
.gitignore vendored
View file

@ -1 +1,2 @@
test_config_dir test_config_dir
.env

View file

@ -1,3 +1,8 @@
ifneq (,$(wildcard ./.env))
include .env
export
endif
.PHONY: all test clean client .PHONY: all test clean client
postgres.schemadump: postgres.schemadump:
@ -28,10 +33,10 @@ postgres.remove-scratch:
docker stop koito-scratch && docker rm koito-scratch docker stop koito-scratch && docker rm koito-scratch
api.debug: postgres.start api.debug: postgres.start
KOITO_ALLOWED_HOSTS=* KOITO_LOG_LEVEL=debug KOITO_CONFIG_DIR=test_config_dir KOITO_DATABASE_URL=postgres://postgres:secret@localhost:5432?sslmode=disable go run cmd/api/main.go go run cmd/api/main.go
api.scratch: postgres.run-scratch api.scratch: postgres.run-scratch
KOITO_ALLOWED_HOSTS=* KOITO_LOG_LEVEL=debug KOITO_CONFIG_DIR=test_config_dir/scratch KOITO_DATABASE_URL=postgres://postgres:secret@localhost:5433?sslmode=disable go run cmd/api/main.go KOITO_DATABASE_URL=postgres://postgres:secret@localhost:5433?sslmode=disable go run cmd/api/main.go
api.test: api.test:
go test ./... -timeout 60s go test ./... -timeout 60s

View file

@ -56,6 +56,15 @@ LEFT JOIN artist_aliases aa ON a.id = aa.artist_id
WHERE a.musicbrainz_id = $1 WHERE a.musicbrainz_id = $1
GROUP BY a.id, a.musicbrainz_id, a.image, a.image_source, a.name; GROUP BY a.id, a.musicbrainz_id, a.image, a.image_source, a.name;
-- name: GetArtistsWithoutImages :many
SELECT
*
FROM artists_with_name
WHERE image IS NULL
AND id > $2
ORDER BY id ASC
LIMIT $1;
-- name: GetTopArtistsPaginated :many -- name: GetTopArtistsPaginated :many
SELECT SELECT
x.id, x.id,

View file

@ -211,6 +211,8 @@ func Run(
} }
}() }()
l.Info().Msg("Engine: Beginning startup tasks...")
l.Debug().Msg("Engine: Checking import configuration") l.Debug().Msg("Engine: Checking import configuration")
if !cfg.SkipImport() { if !cfg.SkipImport() {
go func() { go func() {
@ -218,18 +220,12 @@ func Run(
}() }()
} }
// l.Info().Msg("Creating test export file")
// go func() {
// err := export.ExportData(ctx, "koito", store)
// if err != nil {
// l.Err(err).Msg("Failed to generate export file")
// }
// }()
l.Info().Msg("Engine: Pruning orphaned images") l.Info().Msg("Engine: Pruning orphaned images")
go catalog.PruneOrphanedImages(logger.NewContext(l), store) go catalog.PruneOrphanedImages(logger.NewContext(l), store)
l.Info().Msg("Engine: Running duration backfill task") l.Info().Msg("Engine: Running duration backfill task")
go catalog.BackfillTrackDurationsFromMusicBrainz(ctx, store, mbzC) go catalog.BackfillTrackDurationsFromMusicBrainz(ctx, store, mbzC)
l.Info().Msg("Engine: Attempting to fetch missing artist images")
go catalog.FetchMissingArtistImages(ctx, store)
l.Info().Msg("Engine: Initialization finished") l.Info().Msg("Engine: Initialization finished")
quit := make(chan os.Signal, 1) quit := make(chan os.Signal, 1)

View file

@ -9,6 +9,7 @@ import (
"github.com/gabehf/koito/internal/catalog" "github.com/gabehf/koito/internal/catalog"
"github.com/gabehf/koito/internal/cfg" "github.com/gabehf/koito/internal/cfg"
"github.com/gabehf/koito/internal/db" "github.com/gabehf/koito/internal/db"
"github.com/gabehf/koito/internal/images"
"github.com/gabehf/koito/internal/logger" "github.com/gabehf/koito/internal/logger"
"github.com/gabehf/koito/internal/utils" "github.com/gabehf/koito/internal/utils"
"github.com/google/uuid" "github.com/google/uuid"
@ -75,7 +76,7 @@ func ReplaceImageHandler(store db.DB) http.HandlerFunc {
fileUrl := r.FormValue("image_url") fileUrl := r.FormValue("image_url")
if fileUrl != "" { if fileUrl != "" {
l.Debug().Msg("ReplaceImageHandler: Image identified as remote file") l.Debug().Msg("ReplaceImageHandler: Image identified as remote file")
err = catalog.ValidateImageURL(fileUrl) err = images.ValidateImageURL(fileUrl)
if err != nil { if err != nil {
l.Debug().AnErr("error", err).Msg("ReplaceImageHandler: Invalid image URL") l.Debug().AnErr("error", err).Msg("ReplaceImageHandler: Invalid image URL")
utils.WriteError(w, "url is invalid or not an image file", http.StatusBadRequest) utils.WriteError(w, "url is invalid or not an image file", http.StatusBadRequest)

View file

@ -13,7 +13,9 @@ import (
"github.com/gabehf/koito/internal/cfg" "github.com/gabehf/koito/internal/cfg"
"github.com/gabehf/koito/internal/db" "github.com/gabehf/koito/internal/db"
"github.com/gabehf/koito/internal/images"
"github.com/gabehf/koito/internal/logger" "github.com/gabehf/koito/internal/logger"
"github.com/gabehf/koito/internal/utils"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/h2non/bimg" "github.com/h2non/bimg"
) )
@ -78,30 +80,10 @@ func SourceImageDir() string {
} }
} }
// ValidateImageURL checks if the URL points to a valid image by performing a HEAD request.
func ValidateImageURL(url string) error {
resp, err := http.Head(url)
if err != nil {
return fmt.Errorf("ValidateImageURL: http.Head: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("ValidateImageURL: HEAD request failed, status code: %d", resp.StatusCode)
}
contentType := resp.Header.Get("Content-Type")
if !strings.HasPrefix(contentType, "image/") {
return fmt.Errorf("ValidateImageURL: URL does not point to an image, content type: %s", contentType)
}
return nil
}
// DownloadAndCacheImage downloads an image from the given URL, then calls CompressAndSaveImage. // DownloadAndCacheImage downloads an image from the given URL, then calls CompressAndSaveImage.
func DownloadAndCacheImage(ctx context.Context, id uuid.UUID, url string, size ImageSize) error { func DownloadAndCacheImage(ctx context.Context, id uuid.UUID, url string, size ImageSize) error {
l := logger.FromContext(ctx) l := logger.FromContext(ctx)
err := ValidateImageURL(url) err := images.ValidateImageURL(url)
if err != nil { if err != nil {
return fmt.Errorf("DownloadAndCacheImage: %w", err) return fmt.Errorf("DownloadAndCacheImage: %w", err)
} }
@ -285,3 +267,68 @@ func pruneDirImgs(ctx context.Context, store db.DB, path string, memo map[string
} }
return count, nil return count, nil
} }
func FetchMissingArtistImages(ctx context.Context, store db.DB) error {
l := logger.FromContext(ctx)
l.Info().Msg("FetchMissingArtistImages: Starting backfill of missing artist images")
var from int32 = 0
for {
l.Debug().Int32("ID", from).Msg("Fetching artist images to backfill from ID")
artists, err := store.ArtistsWithoutImages(ctx, from)
if err != nil {
return fmt.Errorf("FetchMissingArtistImages: failed to fetch artists for image backfill: %w", err)
}
if len(artists) == 0 {
if from == 0 {
l.Info().Msg("FetchMissingArtistImages: No artists with missing images found")
} else {
l.Info().Msg("FetchMissingArtistImages: Finished fetching missing artist images")
}
return nil
}
for _, artist := range artists {
from = artist.ID
l.Debug().
Str("title", artist.Name).
Msg("FetchMissingArtistImages: Attempting to fetch missing artist image")
var aliases []string
if aliasrow, err := store.GetAllArtistAliases(ctx, artist.ID); err != nil {
aliases = utils.FlattenAliases(aliasrow)
} else {
aliases = []string{artist.Name}
}
var imgid uuid.UUID
imgUrl, imgErr := images.GetArtistImage(ctx, images.ArtistImageOpts{
Aliases: aliases,
})
if imgErr == nil && imgUrl != "" {
imgid = uuid.New()
err = store.UpdateArtist(ctx, db.UpdateArtistOpts{
ID: artist.ID,
Image: imgid,
ImageSrc: imgUrl,
})
if err != nil {
l.Err(err).
Str("title", artist.Name).
Msg("FetchMissingArtistImages: Failed to update artist with image in database")
continue
}
l.Info().
Str("name", artist.Name).
Msg("FetchMissingArtistImages: Successfully fetched missing artist image")
} else {
l.Err(err).
Str("name", artist.Name).
Msg("FetchMissingArtistImages: Failed to fetch artist image")
}
}
}
}

View file

@ -88,6 +88,7 @@ type DB interface {
// in seconds // in seconds
CountTimeListenedToItem(ctx context.Context, opts TimeListenedOpts) (int64, error) CountTimeListenedToItem(ctx context.Context, opts TimeListenedOpts) (int64, error)
CountUsers(ctx context.Context) (int64, error) CountUsers(ctx context.Context) (int64, error)
// Search // Search
SearchArtists(ctx context.Context, q string) ([]*models.Artist, error) SearchArtists(ctx context.Context, q string) ([]*models.Artist, error)
@ -105,6 +106,7 @@ type DB interface {
ImageHasAssociation(ctx context.Context, image uuid.UUID) (bool, error) ImageHasAssociation(ctx context.Context, image uuid.UUID) (bool, error)
GetImageSource(ctx context.Context, image uuid.UUID) (string, error) GetImageSource(ctx context.Context, image uuid.UUID) (string, error)
AlbumsWithoutImages(ctx context.Context, from int32) ([]*models.Album, error) AlbumsWithoutImages(ctx context.Context, from int32) ([]*models.Album, error)
ArtistsWithoutImages(ctx context.Context, from int32) ([]*models.Artist, error)
GetExportPage(ctx context.Context, opts GetExportPageOpts) ([]*ExportItem, error) GetExportPage(ctx context.Context, opts GetExportPageOpts) ([]*ExportItem, error)
Ping(ctx context.Context) error Ping(ctx context.Context) error
Close(ctx context.Context) Close(ctx context.Context)

View file

@ -72,3 +72,26 @@ func (d *Psql) AlbumsWithoutImages(ctx context.Context, from int32) ([]*models.A
} }
return albums, nil return albums, nil
} }
// returns nil, nil on no results
func (d *Psql) ArtistsWithoutImages(ctx context.Context, from int32) ([]*models.Artist, error) {
rows, err := d.q.GetArtistsWithoutImages(ctx, repository.GetArtistsWithoutImagesParams{
Limit: 20,
ID: from,
})
if errors.Is(err, pgx.ErrNoRows) {
return nil, nil
} else if err != nil {
return nil, fmt.Errorf("ArtistsWithoutImages: %w", err)
}
ret := make([]*models.Artist, len(rows))
for i, row := range rows {
ret[i] = &models.Artist{
ID: row.ID,
Name: row.Name,
MbzID: row.MusicBrainzID,
}
}
return ret, nil
}

View file

@ -110,6 +110,9 @@ func (c *DeezerClient) getEntity(ctx context.Context, endpoint string, result an
return nil return nil
} }
// Deezer behavior is that it serves a default image when it can't find one for an artist, so
// this function will just download the default image thinking that it is an actual artist image.
// I don't know how to fix this yet.
func (c *DeezerClient) GetArtistImages(ctx context.Context, aliases []string) (string, error) { func (c *DeezerClient) GetArtistImages(ctx context.Context, aliases []string) (string, error) {
l := logger.FromContext(ctx) l := logger.FromContext(ctx)
resp := new(DeezerArtistResponse) resp := new(DeezerArtistResponse)

View file

@ -5,6 +5,7 @@ import (
"context" "context"
"fmt" "fmt"
"net/http" "net/http"
"strings"
"sync" "sync"
"github.com/gabehf/koito/internal/logger" "github.com/gabehf/koito/internal/logger"
@ -64,23 +65,29 @@ func Shutdown() {
func GetArtistImage(ctx context.Context, opts ArtistImageOpts) (string, error) { func GetArtistImage(ctx context.Context, opts ArtistImageOpts) (string, error) {
l := logger.FromContext(ctx) l := logger.FromContext(ctx)
if imgsrc.subsonicEnabled { var imgurl string
// i know the imgurl check here is stupid but i'm stupider and i want to remind myself to do it
// in each check
if imgsrc.subsonicEnabled && imgurl == "" {
img, err := imgsrc.subsonicC.GetArtistImage(ctx, opts.Aliases[0]) img, err := imgsrc.subsonicC.GetArtistImage(ctx, opts.Aliases[0])
if err != nil { if err != nil {
return "", err l.Debug().Err(err).Msg("GetArtistImage: Could not find artist image from Subsonic")
} } else if img != "" {
if img != "" {
return img, nil return img, nil
} }
l.Debug().Msg("Could not find artist image from Subsonic") } else {
l.Debug().Msg("GetArtistImage: Subsonic image fetching is disabled")
} }
if imgsrc.deezerC != nil { if imgsrc.deezerEnabled && imgurl == "" {
img, err := imgsrc.deezerC.GetArtistImages(ctx, opts.Aliases) img, err := imgsrc.deezerC.GetArtistImages(ctx, opts.Aliases)
if err != nil { if err != nil {
return "", err l.Debug().Err(err).Msg("GetArtistImage: Could not find artist image from Subsonic")
} } else if img != "" {
return img, nil return img, nil
} }
} else {
l.Debug().Msg("GetArtistImage: Deezer image fetching is disabled")
}
l.Warn().Msg("GetArtistImage: No image providers are enabled") l.Warn().Msg("GetArtistImage: No image providers are enabled")
return "", nil return "", nil
} }
@ -132,3 +139,23 @@ func GetAlbumImage(ctx context.Context, opts AlbumImageOpts) (string, error) {
l.Warn().Msg("GetAlbumImage: No image providers are enabled") l.Warn().Msg("GetAlbumImage: No image providers are enabled")
return "", nil return "", nil
} }
// ValidateImageURL checks if the URL points to a valid image by performing a HEAD request.
func ValidateImageURL(url string) error {
resp, err := http.Head(url)
if err != nil {
return fmt.Errorf("ValidateImageURL: http.Head: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("ValidateImageURL: HEAD request failed, status code: %d", resp.StatusCode)
}
contentType := resp.Header.Get("Content-Type")
if !strings.HasPrefix(contentType, "image/") {
return fmt.Errorf("ValidateImageURL: URL does not point to an image, content type: %s", contentType)
}
return nil
}

View file

@ -129,9 +129,13 @@ func (c *SubsonicClient) GetArtistImage(ctx context.Context, artist string) (str
if err != nil { if err != nil {
return "", fmt.Errorf("GetArtistImage: %v", err) return "", fmt.Errorf("GetArtistImage: %v", err)
} }
l.Debug().Any("subsonic_response", resp).Send() l.Debug().Any("subsonic_response", resp).Msg("")
if len(resp.SubsonicResponse.SearchResult3.Artist) < 1 || resp.SubsonicResponse.SearchResult3.Artist[0].ArtistImageUrl == "" { if len(resp.SubsonicResponse.SearchResult3.Artist) < 1 || resp.SubsonicResponse.SearchResult3.Artist[0].ArtistImageUrl == "" {
return "", fmt.Errorf("GetArtistImage: failed to get artist art") return "", fmt.Errorf("GetArtistImage: failed to get artist art")
} }
// Subsonic seems to have a tendency to return an artist image even though the url is a 404
if err = ValidateImageURL(resp.SubsonicResponse.SearchResult3.Artist[0].ArtistImageUrl); err != nil {
return "", fmt.Errorf("GetArtistImage: failed to get validate image url")
}
return resp.SubsonicResponse.SearchResult3.Artist[0].ArtistImageUrl, nil return resp.SubsonicResponse.SearchResult3.Artist[0].ArtistImageUrl, nil
} }

View file

@ -254,6 +254,47 @@ func (q *Queries) GetArtistByName(ctx context.Context, alias string) (GetArtistB
return i, err return i, err
} }
const getArtistsWithoutImages = `-- name: GetArtistsWithoutImages :many
SELECT
id, musicbrainz_id, image, image_source, name
FROM artists_with_name
WHERE image IS NULL
AND id > $2
ORDER BY id ASC
LIMIT $1
`
type GetArtistsWithoutImagesParams struct {
Limit int32
ID int32
}
func (q *Queries) GetArtistsWithoutImages(ctx context.Context, arg GetArtistsWithoutImagesParams) ([]ArtistsWithName, error) {
rows, err := q.db.Query(ctx, getArtistsWithoutImages, arg.Limit, arg.ID)
if err != nil {
return nil, err
}
defer rows.Close()
var items []ArtistsWithName
for rows.Next() {
var i ArtistsWithName
if err := rows.Scan(
&i.ID,
&i.MusicBrainzID,
&i.Image,
&i.ImageSource,
&i.Name,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const getReleaseArtists = `-- name: GetReleaseArtists :many const getReleaseArtists = `-- name: GetReleaseArtists :many
SELECT SELECT
a.id, a.musicbrainz_id, a.image, a.image_source, a.name, a.id, a.musicbrainz_id, a.image, a.image_source, a.name,