From 1a8099e902298f2b9aeef38f07164a338c4e06a9 Mon Sep 17 00:00:00 2001 From: Gabe Farrell <90876006+gabehf@users.noreply.github.com> Date: Tue, 20 Jan 2026 12:10:54 -0500 Subject: [PATCH] feat: refetch missing images on startup (#160) * artist image refetching * album image refetching * remove unused var --- .env.example | 5 + .gitignore | 1 + Makefile | 9 +- db/queries/artist.sql | 9 ++ engine/engine.go | 14 ++- engine/handlers/replace_image.go | 3 +- internal/catalog/images.go | 147 +++++++++++++++++++++++++----- internal/db/db.go | 2 + internal/db/psql/album.go | 3 + internal/db/psql/artist.go | 3 + internal/db/psql/images.go | 23 +++++ internal/images/deezer.go | 3 + internal/images/imagesrc.go | 46 +++++++--- internal/images/subsonic.go | 6 +- internal/repository/artist.sql.go | 41 +++++++++ 15 files changed, 271 insertions(+), 44 deletions(-) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..d5ed451 --- /dev/null +++ b/.env.example @@ -0,0 +1,5 @@ +KOITO_ALLOWED_HOSTS=* +KOITO_LOG_LEVEL=debug +KOITO_CONFIG_DIR=test_config_dir +KOITO_DATABASE_URL=postgres://postgres:secret@localhost:5432?sslmode=disable +TZ=Etc/UTC diff --git a/.gitignore b/.gitignore index bade026..083bb78 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ test_config_dir +.env diff --git a/Makefile b/Makefile index b437622..99455ac 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,8 @@ +ifneq (,$(wildcard ./.env)) + include .env + export +endif + .PHONY: all test clean client postgres.schemadump: @@ -28,10 +33,10 @@ postgres.remove-scratch: docker stop koito-scratch && docker rm koito-scratch api.debug: postgres.start - KOITO_ALLOWED_HOSTS=* KOITO_LOG_LEVEL=debug KOITO_CONFIG_DIR=test_config_dir KOITO_DATABASE_URL=postgres://postgres:secret@localhost:5432?sslmode=disable go run cmd/api/main.go + go run cmd/api/main.go api.scratch: postgres.run-scratch - KOITO_ALLOWED_HOSTS=* KOITO_LOG_LEVEL=debug KOITO_CONFIG_DIR=test_config_dir/scratch KOITO_DATABASE_URL=postgres://postgres:secret@localhost:5433?sslmode=disable go run cmd/api/main.go + KOITO_DATABASE_URL=postgres://postgres:secret@localhost:5433?sslmode=disable go run cmd/api/main.go api.test: go test ./... -timeout 60s diff --git a/db/queries/artist.sql b/db/queries/artist.sql index deaad60..70a2fdd 100644 --- a/db/queries/artist.sql +++ b/db/queries/artist.sql @@ -56,6 +56,15 @@ LEFT JOIN artist_aliases aa ON a.id = aa.artist_id WHERE a.musicbrainz_id = $1 GROUP BY a.id, a.musicbrainz_id, a.image, a.image_source, a.name; +-- name: GetArtistsWithoutImages :many +SELECT + * +FROM artists_with_name +WHERE image IS NULL + AND id > $2 +ORDER BY id ASC +LIMIT $1; + -- name: GetTopArtistsPaginated :many SELECT x.id, diff --git a/engine/engine.go b/engine/engine.go index 31fe552..9374819 100644 --- a/engine/engine.go +++ b/engine/engine.go @@ -211,6 +211,8 @@ func Run( } }() + l.Info().Msg("Engine: Beginning startup tasks...") + l.Debug().Msg("Engine: Checking import configuration") if !cfg.SkipImport() { go func() { @@ -218,18 +220,14 @@ func Run( }() } - // l.Info().Msg("Creating test export file") - // go func() { - // err := export.ExportData(ctx, "koito", store) - // if err != nil { - // l.Err(err).Msg("Failed to generate export file") - // } - // }() - l.Info().Msg("Engine: Pruning orphaned images") go catalog.PruneOrphanedImages(logger.NewContext(l), store) l.Info().Msg("Engine: Running duration backfill task") go catalog.BackfillTrackDurationsFromMusicBrainz(ctx, store, mbzC) + l.Info().Msg("Engine: Attempting to fetch missing artist images") + go catalog.FetchMissingArtistImages(ctx, store) + l.Info().Msg("Engine: Attempting to fetch missing album images") + go catalog.FetchMissingAlbumImages(ctx, store) l.Info().Msg("Engine: Initialization finished") quit := make(chan os.Signal, 1) diff --git a/engine/handlers/replace_image.go b/engine/handlers/replace_image.go index 66c0bbe..9a2835d 100644 --- a/engine/handlers/replace_image.go +++ b/engine/handlers/replace_image.go @@ -9,6 +9,7 @@ import ( "github.com/gabehf/koito/internal/catalog" "github.com/gabehf/koito/internal/cfg" "github.com/gabehf/koito/internal/db" + "github.com/gabehf/koito/internal/images" "github.com/gabehf/koito/internal/logger" "github.com/gabehf/koito/internal/utils" "github.com/google/uuid" @@ -75,7 +76,7 @@ func ReplaceImageHandler(store db.DB) http.HandlerFunc { fileUrl := r.FormValue("image_url") if fileUrl != "" { l.Debug().Msg("ReplaceImageHandler: Image identified as remote file") - err = catalog.ValidateImageURL(fileUrl) + err = images.ValidateImageURL(fileUrl) if err != nil { l.Debug().AnErr("error", err).Msg("ReplaceImageHandler: Invalid image URL") utils.WriteError(w, "url is invalid or not an image file", http.StatusBadRequest) diff --git a/internal/catalog/images.go b/internal/catalog/images.go index bf5aa26..4193a39 100644 --- a/internal/catalog/images.go +++ b/internal/catalog/images.go @@ -13,7 +13,9 @@ import ( "github.com/gabehf/koito/internal/cfg" "github.com/gabehf/koito/internal/db" + "github.com/gabehf/koito/internal/images" "github.com/gabehf/koito/internal/logger" + "github.com/gabehf/koito/internal/utils" "github.com/google/uuid" "github.com/h2non/bimg" ) @@ -78,30 +80,10 @@ func SourceImageDir() string { } } -// ValidateImageURL checks if the URL points to a valid image by performing a HEAD request. -func ValidateImageURL(url string) error { - resp, err := http.Head(url) - if err != nil { - return fmt.Errorf("ValidateImageURL: http.Head: %w", err) - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return fmt.Errorf("ValidateImageURL: HEAD request failed, status code: %d", resp.StatusCode) - } - - contentType := resp.Header.Get("Content-Type") - if !strings.HasPrefix(contentType, "image/") { - return fmt.Errorf("ValidateImageURL: URL does not point to an image, content type: %s", contentType) - } - - return nil -} - // DownloadAndCacheImage downloads an image from the given URL, then calls CompressAndSaveImage. func DownloadAndCacheImage(ctx context.Context, id uuid.UUID, url string, size ImageSize) error { l := logger.FromContext(ctx) - err := ValidateImageURL(url) + err := images.ValidateImageURL(url) if err != nil { return fmt.Errorf("DownloadAndCacheImage: %w", err) } @@ -285,3 +267,126 @@ func pruneDirImgs(ctx context.Context, store db.DB, path string, memo map[string } return count, nil } + +func FetchMissingArtistImages(ctx context.Context, store db.DB) error { + l := logger.FromContext(ctx) + l.Info().Msg("FetchMissingArtistImages: Starting backfill of missing artist images") + + var from int32 = 0 + + for { + l.Debug().Int32("ID", from).Msg("Fetching artist images to backfill from ID") + artists, err := store.ArtistsWithoutImages(ctx, from) + if err != nil { + return fmt.Errorf("FetchMissingArtistImages: failed to fetch artists for image backfill: %w", err) + } + + if len(artists) == 0 { + if from == 0 { + l.Info().Msg("FetchMissingArtistImages: No artists with missing images found") + } else { + l.Info().Msg("FetchMissingArtistImages: Finished fetching missing artist images") + } + return nil + } + + for _, artist := range artists { + from = artist.ID + + l.Debug(). + Str("title", artist.Name). + Msg("FetchMissingArtistImages: Attempting to fetch missing artist image") + + var aliases []string + if aliasrow, err := store.GetAllArtistAliases(ctx, artist.ID); err != nil { + aliases = utils.FlattenAliases(aliasrow) + } else { + aliases = []string{artist.Name} + } + + var imgid uuid.UUID + imgUrl, imgErr := images.GetArtistImage(ctx, images.ArtistImageOpts{ + Aliases: aliases, + }) + if imgErr == nil && imgUrl != "" { + imgid = uuid.New() + err = store.UpdateArtist(ctx, db.UpdateArtistOpts{ + ID: artist.ID, + Image: imgid, + ImageSrc: imgUrl, + }) + if err != nil { + l.Err(err). + Str("title", artist.Name). + Msg("FetchMissingArtistImages: Failed to update artist with image in database") + continue + } + l.Info(). + Str("name", artist.Name). + Msg("FetchMissingArtistImages: Successfully fetched missing artist image") + } else { + l.Err(err). + Str("name", artist.Name). + Msg("FetchMissingArtistImages: Failed to fetch artist image") + } + } + } +} +func FetchMissingAlbumImages(ctx context.Context, store db.DB) error { + l := logger.FromContext(ctx) + l.Info().Msg("FetchMissingAlbumImages: Starting backfill of missing album images") + + var from int32 = 0 + + for { + l.Debug().Int32("ID", from).Msg("Fetching album images to backfill from ID") + albums, err := store.AlbumsWithoutImages(ctx, from) + if err != nil { + return fmt.Errorf("FetchMissingAlbumImages: failed to fetch albums for image backfill: %w", err) + } + + if len(albums) == 0 { + if from == 0 { + l.Info().Msg("FetchMissingAlbumImages: No albums with missing images found") + } else { + l.Info().Msg("FetchMissingAlbumImages: Finished fetching missing album images") + } + return nil + } + + for _, album := range albums { + from = album.ID + + l.Debug(). + Str("title", album.Title). + Msg("FetchMissingAlbumImages: Attempting to fetch missing album image") + + var imgid uuid.UUID + imgUrl, imgErr := images.GetAlbumImage(ctx, images.AlbumImageOpts{ + Artists: utils.FlattenSimpleArtistNames(album.Artists), + Album: album.Title, + }) + if imgErr == nil && imgUrl != "" { + imgid = uuid.New() + err = store.UpdateAlbum(ctx, db.UpdateAlbumOpts{ + ID: album.ID, + Image: imgid, + ImageSrc: imgUrl, + }) + if err != nil { + l.Err(err). + Str("title", album.Title). + Msg("FetchMissingAlbumImages: Failed to update album with image in database") + continue + } + l.Info(). + Str("name", album.Title). + Msg("FetchMissingAlbumImages: Successfully fetched missing album image") + } else { + l.Err(err). + Str("name", album.Title). + Msg("FetchMissingAlbumImages: Failed to fetch album image") + } + } + } +} diff --git a/internal/db/db.go b/internal/db/db.go index a0f0f80..97badac 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -88,6 +88,7 @@ type DB interface { // in seconds CountTimeListenedToItem(ctx context.Context, opts TimeListenedOpts) (int64, error) CountUsers(ctx context.Context) (int64, error) + // Search SearchArtists(ctx context.Context, q string) ([]*models.Artist, error) @@ -105,6 +106,7 @@ type DB interface { ImageHasAssociation(ctx context.Context, image uuid.UUID) (bool, error) GetImageSource(ctx context.Context, image uuid.UUID) (string, error) AlbumsWithoutImages(ctx context.Context, from int32) ([]*models.Album, error) + ArtistsWithoutImages(ctx context.Context, from int32) ([]*models.Artist, error) GetExportPage(ctx context.Context, opts GetExportPageOpts) ([]*ExportItem, error) Ping(ctx context.Context) error Close(ctx context.Context) diff --git a/internal/db/psql/album.go b/internal/db/psql/album.go index f4c614c..758c287 100644 --- a/internal/db/psql/album.go +++ b/internal/db/psql/album.go @@ -274,6 +274,9 @@ func (d *Psql) UpdateAlbum(ctx context.Context, opts db.UpdateAlbumOpts) error { } } if opts.Image != uuid.Nil { + if opts.ImageSrc == "" { + return fmt.Errorf("UpdateAlbum: image source must be provided when updating an image") + } l.Debug().Msgf("Updating release with ID %d with image %s", opts.ID, opts.Image) err := qtx.UpdateReleaseImage(ctx, repository.UpdateReleaseImageParams{ ID: opts.ID, diff --git a/internal/db/psql/artist.go b/internal/db/psql/artist.go index 7bb50ec..859a490 100644 --- a/internal/db/psql/artist.go +++ b/internal/db/psql/artist.go @@ -210,6 +210,9 @@ func (d *Psql) UpdateArtist(ctx context.Context, opts db.UpdateArtistOpts) error } } if opts.Image != uuid.Nil { + if opts.ImageSrc == "" { + return fmt.Errorf("UpdateAlbum: image source must be provided when updating an image") + } l.Debug().Msgf("Updating artist with id %d with image %s", opts.ID, opts.Image) err = qtx.UpdateArtistImage(ctx, repository.UpdateArtistImageParams{ ID: opts.ID, diff --git a/internal/db/psql/images.go b/internal/db/psql/images.go index 49e2850..eef0d8f 100644 --- a/internal/db/psql/images.go +++ b/internal/db/psql/images.go @@ -72,3 +72,26 @@ func (d *Psql) AlbumsWithoutImages(ctx context.Context, from int32) ([]*models.A } return albums, nil } + +// returns nil, nil on no results +func (d *Psql) ArtistsWithoutImages(ctx context.Context, from int32) ([]*models.Artist, error) { + rows, err := d.q.GetArtistsWithoutImages(ctx, repository.GetArtistsWithoutImagesParams{ + Limit: 20, + ID: from, + }) + if errors.Is(err, pgx.ErrNoRows) { + return nil, nil + } else if err != nil { + return nil, fmt.Errorf("ArtistsWithoutImages: %w", err) + } + + ret := make([]*models.Artist, len(rows)) + for i, row := range rows { + ret[i] = &models.Artist{ + ID: row.ID, + Name: row.Name, + MbzID: row.MusicBrainzID, + } + } + return ret, nil +} diff --git a/internal/images/deezer.go b/internal/images/deezer.go index 8fb7b27..2ced676 100644 --- a/internal/images/deezer.go +++ b/internal/images/deezer.go @@ -110,6 +110,9 @@ func (c *DeezerClient) getEntity(ctx context.Context, endpoint string, result an return nil } +// Deezer behavior is that it serves a default image when it can't find one for an artist, so +// this function will just download the default image thinking that it is an actual artist image. +// I don't know how to fix this yet. func (c *DeezerClient) GetArtistImages(ctx context.Context, aliases []string) (string, error) { l := logger.FromContext(ctx) resp := new(DeezerArtistResponse) diff --git a/internal/images/imagesrc.go b/internal/images/imagesrc.go index 21eec65..b49e9dd 100644 --- a/internal/images/imagesrc.go +++ b/internal/images/imagesrc.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "net/http" + "strings" "sync" "github.com/gabehf/koito/internal/logger" @@ -67,19 +68,23 @@ func GetArtistImage(ctx context.Context, opts ArtistImageOpts) (string, error) { if imgsrc.subsonicEnabled { img, err := imgsrc.subsonicC.GetArtistImage(ctx, opts.Aliases[0]) if err != nil { - return "", err - } - if img != "" { + l.Debug().Err(err).Msg("GetArtistImage: Could not find artist image from Subsonic") + } else if img != "" { return img, nil } - l.Debug().Msg("Could not find artist image from Subsonic") + } else { + l.Debug().Msg("GetArtistImage: Subsonic image fetching is disabled") } - if imgsrc.deezerC != nil { + if imgsrc.deezerEnabled { img, err := imgsrc.deezerC.GetArtistImages(ctx, opts.Aliases) if err != nil { + l.Debug().Err(err).Msg("GetArtistImage: Could not find artist image from Deezer") return "", err + } else if img != "" { + return img, nil } - return img, nil + } else { + l.Debug().Msg("GetArtistImage: Deezer image fetching is disabled") } l.Warn().Msg("GetArtistImage: No image providers are enabled") return "", nil @@ -89,7 +94,7 @@ func GetAlbumImage(ctx context.Context, opts AlbumImageOpts) (string, error) { if imgsrc.subsonicEnabled { img, err := imgsrc.subsonicC.GetAlbumImage(ctx, opts.Artists[0], opts.Album) if err != nil { - return "", err + l.Debug().Err(err).Msg("GetAlbumImage: Could not find artist image from Subsonic") } if img != "" { return img, nil @@ -102,29 +107,28 @@ func GetAlbumImage(ctx context.Context, opts AlbumImageOpts) (string, error) { url := fmt.Sprintf(caaBaseUrl+"/release/%s/front", opts.ReleaseMbzID.String()) resp, err := http.DefaultClient.Head(url) if err != nil { - return "", err + l.Debug().Err(err).Msg("GetAlbumImage: Could not find artist image from CoverArtArchive with Release MBID") } if resp.StatusCode == 200 { return url, nil } - l.Debug().Str("url", url).Str("status", resp.Status).Msg("Could not find album cover from CoverArtArchive with MusicBrainz release ID") } if opts.ReleaseGroupMbzID != nil && *opts.ReleaseGroupMbzID != uuid.Nil { url := fmt.Sprintf(caaBaseUrl+"/release-group/%s/front", opts.ReleaseGroupMbzID.String()) resp, err := http.DefaultClient.Head(url) if err != nil { - return "", err + l.Debug().Err(err).Msg("GetAlbumImage: Could not find artist image from CoverArtArchive with Release Group MBID") } if resp.StatusCode == 200 { return url, nil } - l.Debug().Str("url", url).Str("status", resp.Status).Msg("Could not find album cover from CoverArtArchive with MusicBrainz release group ID") } } if imgsrc.deezerEnabled { l.Debug().Msg("Attempting to find album image from Deezer") img, err := imgsrc.deezerC.GetAlbumImages(ctx, opts.Artists, opts.Album) if err != nil { + l.Debug().Err(err).Msg("GetAlbumImage: Could not find artist image from Deezer") return "", err } return img, nil @@ -132,3 +136,23 @@ func GetAlbumImage(ctx context.Context, opts AlbumImageOpts) (string, error) { l.Warn().Msg("GetAlbumImage: No image providers are enabled") return "", nil } + +// ValidateImageURL checks if the URL points to a valid image by performing a HEAD request. +func ValidateImageURL(url string) error { + resp, err := http.Head(url) + if err != nil { + return fmt.Errorf("ValidateImageURL: http.Head: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("ValidateImageURL: HEAD request failed, status code: %d", resp.StatusCode) + } + + contentType := resp.Header.Get("Content-Type") + if !strings.HasPrefix(contentType, "image/") { + return fmt.Errorf("ValidateImageURL: URL does not point to an image, content type: %s", contentType) + } + + return nil +} diff --git a/internal/images/subsonic.go b/internal/images/subsonic.go index 961b4c2..6241b09 100644 --- a/internal/images/subsonic.go +++ b/internal/images/subsonic.go @@ -129,9 +129,13 @@ func (c *SubsonicClient) GetArtistImage(ctx context.Context, artist string) (str if err != nil { return "", fmt.Errorf("GetArtistImage: %v", err) } - l.Debug().Any("subsonic_response", resp).Send() + l.Debug().Any("subsonic_response", resp).Msg("") if len(resp.SubsonicResponse.SearchResult3.Artist) < 1 || resp.SubsonicResponse.SearchResult3.Artist[0].ArtistImageUrl == "" { return "", fmt.Errorf("GetArtistImage: failed to get artist art") } + // Subsonic seems to have a tendency to return an artist image even though the url is a 404 + if err = ValidateImageURL(resp.SubsonicResponse.SearchResult3.Artist[0].ArtistImageUrl); err != nil { + return "", fmt.Errorf("GetArtistImage: failed to get validate image url") + } return resp.SubsonicResponse.SearchResult3.Artist[0].ArtistImageUrl, nil } diff --git a/internal/repository/artist.sql.go b/internal/repository/artist.sql.go index 96f00f2..8506975 100644 --- a/internal/repository/artist.sql.go +++ b/internal/repository/artist.sql.go @@ -254,6 +254,47 @@ func (q *Queries) GetArtistByName(ctx context.Context, alias string) (GetArtistB return i, err } +const getArtistsWithoutImages = `-- name: GetArtistsWithoutImages :many +SELECT + id, musicbrainz_id, image, image_source, name +FROM artists_with_name +WHERE image IS NULL + AND id > $2 +ORDER BY id ASC +LIMIT $1 +` + +type GetArtistsWithoutImagesParams struct { + Limit int32 + ID int32 +} + +func (q *Queries) GetArtistsWithoutImages(ctx context.Context, arg GetArtistsWithoutImagesParams) ([]ArtistsWithName, error) { + rows, err := q.db.Query(ctx, getArtistsWithoutImages, arg.Limit, arg.ID) + if err != nil { + return nil, err + } + defer rows.Close() + var items []ArtistsWithName + for rows.Next() { + var i ArtistsWithName + if err := rows.Scan( + &i.ID, + &i.MusicBrainzID, + &i.Image, + &i.ImageSource, + &i.Name, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const getReleaseArtists = `-- name: GetReleaseArtists :many SELECT a.id, a.musicbrainz_id, a.image, a.image_source, a.name,