feat: refetch missing images on startup (#160)

* artist image refetching

* album image refetching

* remove unused var
This commit is contained in:
Gabe Farrell 2026-01-20 12:10:54 -05:00 committed by GitHub
parent 5e294b839c
commit 1a8099e902
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 271 additions and 44 deletions

View file

@ -13,7 +13,9 @@ import (
"github.com/gabehf/koito/internal/cfg"
"github.com/gabehf/koito/internal/db"
"github.com/gabehf/koito/internal/images"
"github.com/gabehf/koito/internal/logger"
"github.com/gabehf/koito/internal/utils"
"github.com/google/uuid"
"github.com/h2non/bimg"
)
@ -78,30 +80,10 @@ func SourceImageDir() string {
}
}
// ValidateImageURL checks if the URL points to a valid image by performing a HEAD request.
func ValidateImageURL(url string) error {
resp, err := http.Head(url)
if err != nil {
return fmt.Errorf("ValidateImageURL: http.Head: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("ValidateImageURL: HEAD request failed, status code: %d", resp.StatusCode)
}
contentType := resp.Header.Get("Content-Type")
if !strings.HasPrefix(contentType, "image/") {
return fmt.Errorf("ValidateImageURL: URL does not point to an image, content type: %s", contentType)
}
return nil
}
// DownloadAndCacheImage downloads an image from the given URL, then calls CompressAndSaveImage.
func DownloadAndCacheImage(ctx context.Context, id uuid.UUID, url string, size ImageSize) error {
l := logger.FromContext(ctx)
err := ValidateImageURL(url)
err := images.ValidateImageURL(url)
if err != nil {
return fmt.Errorf("DownloadAndCacheImage: %w", err)
}
@ -285,3 +267,126 @@ func pruneDirImgs(ctx context.Context, store db.DB, path string, memo map[string
}
return count, nil
}
func FetchMissingArtistImages(ctx context.Context, store db.DB) error {
l := logger.FromContext(ctx)
l.Info().Msg("FetchMissingArtistImages: Starting backfill of missing artist images")
var from int32 = 0
for {
l.Debug().Int32("ID", from).Msg("Fetching artist images to backfill from ID")
artists, err := store.ArtistsWithoutImages(ctx, from)
if err != nil {
return fmt.Errorf("FetchMissingArtistImages: failed to fetch artists for image backfill: %w", err)
}
if len(artists) == 0 {
if from == 0 {
l.Info().Msg("FetchMissingArtistImages: No artists with missing images found")
} else {
l.Info().Msg("FetchMissingArtistImages: Finished fetching missing artist images")
}
return nil
}
for _, artist := range artists {
from = artist.ID
l.Debug().
Str("title", artist.Name).
Msg("FetchMissingArtistImages: Attempting to fetch missing artist image")
var aliases []string
if aliasrow, err := store.GetAllArtistAliases(ctx, artist.ID); err != nil {
aliases = utils.FlattenAliases(aliasrow)
} else {
aliases = []string{artist.Name}
}
var imgid uuid.UUID
imgUrl, imgErr := images.GetArtistImage(ctx, images.ArtistImageOpts{
Aliases: aliases,
})
if imgErr == nil && imgUrl != "" {
imgid = uuid.New()
err = store.UpdateArtist(ctx, db.UpdateArtistOpts{
ID: artist.ID,
Image: imgid,
ImageSrc: imgUrl,
})
if err != nil {
l.Err(err).
Str("title", artist.Name).
Msg("FetchMissingArtistImages: Failed to update artist with image in database")
continue
}
l.Info().
Str("name", artist.Name).
Msg("FetchMissingArtistImages: Successfully fetched missing artist image")
} else {
l.Err(err).
Str("name", artist.Name).
Msg("FetchMissingArtistImages: Failed to fetch artist image")
}
}
}
}
func FetchMissingAlbumImages(ctx context.Context, store db.DB) error {
l := logger.FromContext(ctx)
l.Info().Msg("FetchMissingAlbumImages: Starting backfill of missing album images")
var from int32 = 0
for {
l.Debug().Int32("ID", from).Msg("Fetching album images to backfill from ID")
albums, err := store.AlbumsWithoutImages(ctx, from)
if err != nil {
return fmt.Errorf("FetchMissingAlbumImages: failed to fetch albums for image backfill: %w", err)
}
if len(albums) == 0 {
if from == 0 {
l.Info().Msg("FetchMissingAlbumImages: No albums with missing images found")
} else {
l.Info().Msg("FetchMissingAlbumImages: Finished fetching missing album images")
}
return nil
}
for _, album := range albums {
from = album.ID
l.Debug().
Str("title", album.Title).
Msg("FetchMissingAlbumImages: Attempting to fetch missing album image")
var imgid uuid.UUID
imgUrl, imgErr := images.GetAlbumImage(ctx, images.AlbumImageOpts{
Artists: utils.FlattenSimpleArtistNames(album.Artists),
Album: album.Title,
})
if imgErr == nil && imgUrl != "" {
imgid = uuid.New()
err = store.UpdateAlbum(ctx, db.UpdateAlbumOpts{
ID: album.ID,
Image: imgid,
ImageSrc: imgUrl,
})
if err != nil {
l.Err(err).
Str("title", album.Title).
Msg("FetchMissingAlbumImages: Failed to update album with image in database")
continue
}
l.Info().
Str("name", album.Title).
Msg("FetchMissingAlbumImages: Successfully fetched missing album image")
} else {
l.Err(err).
Str("name", album.Title).
Msg("FetchMissingAlbumImages: Failed to fetch album image")
}
}
}
}

View file

@ -88,6 +88,7 @@ type DB interface {
// in seconds
CountTimeListenedToItem(ctx context.Context, opts TimeListenedOpts) (int64, error)
CountUsers(ctx context.Context) (int64, error)
// Search
SearchArtists(ctx context.Context, q string) ([]*models.Artist, error)
@ -105,6 +106,7 @@ type DB interface {
ImageHasAssociation(ctx context.Context, image uuid.UUID) (bool, error)
GetImageSource(ctx context.Context, image uuid.UUID) (string, error)
AlbumsWithoutImages(ctx context.Context, from int32) ([]*models.Album, error)
ArtistsWithoutImages(ctx context.Context, from int32) ([]*models.Artist, error)
GetExportPage(ctx context.Context, opts GetExportPageOpts) ([]*ExportItem, error)
Ping(ctx context.Context) error
Close(ctx context.Context)

View file

@ -274,6 +274,9 @@ func (d *Psql) UpdateAlbum(ctx context.Context, opts db.UpdateAlbumOpts) error {
}
}
if opts.Image != uuid.Nil {
if opts.ImageSrc == "" {
return fmt.Errorf("UpdateAlbum: image source must be provided when updating an image")
}
l.Debug().Msgf("Updating release with ID %d with image %s", opts.ID, opts.Image)
err := qtx.UpdateReleaseImage(ctx, repository.UpdateReleaseImageParams{
ID: opts.ID,

View file

@ -210,6 +210,9 @@ func (d *Psql) UpdateArtist(ctx context.Context, opts db.UpdateArtistOpts) error
}
}
if opts.Image != uuid.Nil {
if opts.ImageSrc == "" {
return fmt.Errorf("UpdateAlbum: image source must be provided when updating an image")
}
l.Debug().Msgf("Updating artist with id %d with image %s", opts.ID, opts.Image)
err = qtx.UpdateArtistImage(ctx, repository.UpdateArtistImageParams{
ID: opts.ID,

View file

@ -72,3 +72,26 @@ func (d *Psql) AlbumsWithoutImages(ctx context.Context, from int32) ([]*models.A
}
return albums, nil
}
// returns nil, nil on no results
func (d *Psql) ArtistsWithoutImages(ctx context.Context, from int32) ([]*models.Artist, error) {
rows, err := d.q.GetArtistsWithoutImages(ctx, repository.GetArtistsWithoutImagesParams{
Limit: 20,
ID: from,
})
if errors.Is(err, pgx.ErrNoRows) {
return nil, nil
} else if err != nil {
return nil, fmt.Errorf("ArtistsWithoutImages: %w", err)
}
ret := make([]*models.Artist, len(rows))
for i, row := range rows {
ret[i] = &models.Artist{
ID: row.ID,
Name: row.Name,
MbzID: row.MusicBrainzID,
}
}
return ret, nil
}

View file

@ -110,6 +110,9 @@ func (c *DeezerClient) getEntity(ctx context.Context, endpoint string, result an
return nil
}
// Deezer behavior is that it serves a default image when it can't find one for an artist, so
// this function will just download the default image thinking that it is an actual artist image.
// I don't know how to fix this yet.
func (c *DeezerClient) GetArtistImages(ctx context.Context, aliases []string) (string, error) {
l := logger.FromContext(ctx)
resp := new(DeezerArtistResponse)

View file

@ -5,6 +5,7 @@ import (
"context"
"fmt"
"net/http"
"strings"
"sync"
"github.com/gabehf/koito/internal/logger"
@ -67,19 +68,23 @@ func GetArtistImage(ctx context.Context, opts ArtistImageOpts) (string, error) {
if imgsrc.subsonicEnabled {
img, err := imgsrc.subsonicC.GetArtistImage(ctx, opts.Aliases[0])
if err != nil {
return "", err
}
if img != "" {
l.Debug().Err(err).Msg("GetArtistImage: Could not find artist image from Subsonic")
} else if img != "" {
return img, nil
}
l.Debug().Msg("Could not find artist image from Subsonic")
} else {
l.Debug().Msg("GetArtistImage: Subsonic image fetching is disabled")
}
if imgsrc.deezerC != nil {
if imgsrc.deezerEnabled {
img, err := imgsrc.deezerC.GetArtistImages(ctx, opts.Aliases)
if err != nil {
l.Debug().Err(err).Msg("GetArtistImage: Could not find artist image from Deezer")
return "", err
} else if img != "" {
return img, nil
}
return img, nil
} else {
l.Debug().Msg("GetArtistImage: Deezer image fetching is disabled")
}
l.Warn().Msg("GetArtistImage: No image providers are enabled")
return "", nil
@ -89,7 +94,7 @@ func GetAlbumImage(ctx context.Context, opts AlbumImageOpts) (string, error) {
if imgsrc.subsonicEnabled {
img, err := imgsrc.subsonicC.GetAlbumImage(ctx, opts.Artists[0], opts.Album)
if err != nil {
return "", err
l.Debug().Err(err).Msg("GetAlbumImage: Could not find artist image from Subsonic")
}
if img != "" {
return img, nil
@ -102,29 +107,28 @@ func GetAlbumImage(ctx context.Context, opts AlbumImageOpts) (string, error) {
url := fmt.Sprintf(caaBaseUrl+"/release/%s/front", opts.ReleaseMbzID.String())
resp, err := http.DefaultClient.Head(url)
if err != nil {
return "", err
l.Debug().Err(err).Msg("GetAlbumImage: Could not find artist image from CoverArtArchive with Release MBID")
}
if resp.StatusCode == 200 {
return url, nil
}
l.Debug().Str("url", url).Str("status", resp.Status).Msg("Could not find album cover from CoverArtArchive with MusicBrainz release ID")
}
if opts.ReleaseGroupMbzID != nil && *opts.ReleaseGroupMbzID != uuid.Nil {
url := fmt.Sprintf(caaBaseUrl+"/release-group/%s/front", opts.ReleaseGroupMbzID.String())
resp, err := http.DefaultClient.Head(url)
if err != nil {
return "", err
l.Debug().Err(err).Msg("GetAlbumImage: Could not find artist image from CoverArtArchive with Release Group MBID")
}
if resp.StatusCode == 200 {
return url, nil
}
l.Debug().Str("url", url).Str("status", resp.Status).Msg("Could not find album cover from CoverArtArchive with MusicBrainz release group ID")
}
}
if imgsrc.deezerEnabled {
l.Debug().Msg("Attempting to find album image from Deezer")
img, err := imgsrc.deezerC.GetAlbumImages(ctx, opts.Artists, opts.Album)
if err != nil {
l.Debug().Err(err).Msg("GetAlbumImage: Could not find artist image from Deezer")
return "", err
}
return img, nil
@ -132,3 +136,23 @@ func GetAlbumImage(ctx context.Context, opts AlbumImageOpts) (string, error) {
l.Warn().Msg("GetAlbumImage: No image providers are enabled")
return "", nil
}
// ValidateImageURL checks if the URL points to a valid image by performing a HEAD request.
func ValidateImageURL(url string) error {
resp, err := http.Head(url)
if err != nil {
return fmt.Errorf("ValidateImageURL: http.Head: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("ValidateImageURL: HEAD request failed, status code: %d", resp.StatusCode)
}
contentType := resp.Header.Get("Content-Type")
if !strings.HasPrefix(contentType, "image/") {
return fmt.Errorf("ValidateImageURL: URL does not point to an image, content type: %s", contentType)
}
return nil
}

View file

@ -129,9 +129,13 @@ func (c *SubsonicClient) GetArtistImage(ctx context.Context, artist string) (str
if err != nil {
return "", fmt.Errorf("GetArtistImage: %v", err)
}
l.Debug().Any("subsonic_response", resp).Send()
l.Debug().Any("subsonic_response", resp).Msg("")
if len(resp.SubsonicResponse.SearchResult3.Artist) < 1 || resp.SubsonicResponse.SearchResult3.Artist[0].ArtistImageUrl == "" {
return "", fmt.Errorf("GetArtistImage: failed to get artist art")
}
// Subsonic seems to have a tendency to return an artist image even though the url is a 404
if err = ValidateImageURL(resp.SubsonicResponse.SearchResult3.Artist[0].ArtistImageUrl); err != nil {
return "", fmt.Errorf("GetArtistImage: failed to get validate image url")
}
return resp.SubsonicResponse.SearchResult3.Artist[0].ArtistImageUrl, nil
}

View file

@ -254,6 +254,47 @@ func (q *Queries) GetArtistByName(ctx context.Context, alias string) (GetArtistB
return i, err
}
const getArtistsWithoutImages = `-- name: GetArtistsWithoutImages :many
SELECT
id, musicbrainz_id, image, image_source, name
FROM artists_with_name
WHERE image IS NULL
AND id > $2
ORDER BY id ASC
LIMIT $1
`
type GetArtistsWithoutImagesParams struct {
Limit int32
ID int32
}
func (q *Queries) GetArtistsWithoutImages(ctx context.Context, arg GetArtistsWithoutImagesParams) ([]ArtistsWithName, error) {
rows, err := q.db.Query(ctx, getArtistsWithoutImages, arg.Limit, arg.ID)
if err != nil {
return nil, err
}
defer rows.Close()
var items []ArtistsWithName
for rows.Next() {
var i ArtistsWithName
if err := rows.Scan(
&i.ID,
&i.MusicBrainzID,
&i.Image,
&i.ImageSource,
&i.Name,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const getReleaseArtists = `-- name: GetReleaseArtists :many
SELECT
a.id, a.musicbrainz_id, a.image, a.image_source, a.name,