From df59605418a4f0e250e4f2c991e911620d107022 Mon Sep 17 00:00:00 2001 From: Gabe Farrell <90876006+gabehf@users.noreply.github.com> Date: Wed, 14 Jan 2026 00:08:05 -0500 Subject: [PATCH] feat: backfill duration from musicbrainz (#135) * feat: backfill durations from musicbrainz * chore: make request body dump info level --- db/queries/track.sql | 10 ++++ engine/engine.go | 59 ++++++++++++++----- engine/handlers/lbz_submit_listen.go | 2 +- internal/catalog/duration.go | 84 ++++++++++++++++++++++++++++ internal/db/db.go | 1 + internal/db/psql/track.go | 26 +++++++++ internal/repository/track.sql.go | 42 ++++++++++++++ 7 files changed, 208 insertions(+), 16 deletions(-) create mode 100644 internal/catalog/duration.go diff --git a/db/queries/track.sql b/db/queries/track.sql index af7006a..933fcc1 100644 --- a/db/queries/track.sql +++ b/db/queries/track.sql @@ -137,3 +137,13 @@ WHERE artist_id = $1 AND track_id = $2; -- name: DeleteTrack :exec DELETE FROM tracks WHERE id = $1; + +-- name: GetTracksWithNoDurationButHaveMbzID :many +SELECT + * +FROM tracks_with_title +WHERE duration = 0 + AND musicbrainz_id IS NOT NULL + AND id > $2 +ORDER BY id ASC +LIMIT $1; diff --git a/engine/engine.go b/engine/engine.go index b8e01b8..f259efb 100644 --- a/engine/engine.go +++ b/engine/engine.go @@ -2,6 +2,7 @@ package engine import ( "context" + "encoding/json" "fmt" "io" "net/http" @@ -105,6 +106,32 @@ func Run( l.Warn().Msg("Engine: MusicBrainz client disabled") } + if cfg.SubsonicEnabled() { + l.Debug().Msg("Engine: Checking Subsonic configuration") + pingURL := cfg.SubsonicUrl() + "/rest/ping.view?" + cfg.SubsonicParams() + "&f=json" + + resp, err := http.Get(pingURL) + if err != nil { + l.Fatal().Err(err).Msg("Engine: Failed to contact Subsonic server! Ensure the provided URL is correct") + } else { + defer resp.Body.Close() + + var result struct { + Response struct { + Status string `json:"status"` + } `json:"subsonic-response"` + } + + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + l.Fatal().Err(err).Msg("Engine: Failed to parse Subsonic response") + } else if result.Response.Status != "ok" { + l.Fatal().Msg("Engine: Provided Subsonic credentials are invalid") + } else { + l.Info().Msg("Engine: Subsonic credentials validated successfully") + } + } + } + l.Debug().Msg("Engine: Initializing image sources") images.Initialize(images.ImageSourceOpts{ UserAgent: cfg.UserAgent(), @@ -201,6 +228,8 @@ func Run( l.Info().Msg("Engine: Pruning orphaned images") go catalog.PruneOrphanedImages(logger.NewContext(l), store) + l.Info().Msg("Engine: Running duration backfill task") + go catalog.BackfillTrackDurationsFromMusicBrainz(ctx, store, mbzC) l.Info().Msg("Engine: Initialization finished") quit := make(chan os.Signal, 1) @@ -221,19 +250,19 @@ func Run( } func RunImporter(l *zerolog.Logger, store db.DB, mbzc mbz.MusicBrainzCaller) { - l.Debug().Msg("Checking for import files...") + l.Debug().Msg("Importer: Checking for import files...") files, err := os.ReadDir(path.Join(cfg.ConfigDir(), "import")) if err != nil { - l.Err(err).Msg("Failed to read files from import dir") + l.Err(err).Msg("Importer: Failed to read files from import dir") } if len(files) > 0 { - l.Info().Msg("Files found in import directory. Attempting to import...") + l.Info().Msg("Importer: Files found in import directory. Attempting to import...") } else { return } defer func() { if r := recover(); r != nil { - l.Error().Interface("recover", r).Msg("Panic when importing files") + l.Error().Interface("recover", r).Msg("Importer: Panic when importing files") } }() for _, file := range files { @@ -241,37 +270,37 @@ func RunImporter(l *zerolog.Logger, store db.DB, mbzc mbz.MusicBrainzCaller) { continue } if strings.Contains(file.Name(), "Streaming_History_Audio") { - l.Info().Msgf("Import file %s detecting as being Spotify export", file.Name()) + l.Info().Msgf("Importer: Import file %s detecting as being Spotify export", file.Name()) err := importer.ImportSpotifyFile(logger.NewContext(l), store, file.Name()) if err != nil { - l.Err(err).Msgf("Failed to import file: %s", file.Name()) + l.Err(err).Msgf("Importer: Failed to import file: %s", file.Name()) } } else if strings.Contains(file.Name(), "maloja") { - l.Info().Msgf("Import file %s detecting as being Maloja export", file.Name()) + l.Info().Msgf("Importer: Import file %s detecting as being Maloja export", file.Name()) err := importer.ImportMalojaFile(logger.NewContext(l), store, file.Name()) if err != nil { - l.Err(err).Msgf("Failed to import file: %s", file.Name()) + l.Err(err).Msgf("Importer: Failed to import file: %s", file.Name()) } } else if strings.Contains(file.Name(), "recenttracks") { - l.Info().Msgf("Import file %s detecting as being ghan.nl LastFM export", file.Name()) + l.Info().Msgf("Importer: Import file %s detecting as being ghan.nl LastFM export", file.Name()) err := importer.ImportLastFMFile(logger.NewContext(l), store, mbzc, file.Name()) if err != nil { - l.Err(err).Msgf("Failed to import file: %s", file.Name()) + l.Err(err).Msgf("Importer: Failed to import file: %s", file.Name()) } } else if strings.Contains(file.Name(), "listenbrainz") { - l.Info().Msgf("Import file %s detecting as being ListenBrainz export", file.Name()) + l.Info().Msgf("Importer: Import file %s detecting as being ListenBrainz export", file.Name()) err := importer.ImportListenBrainzExport(logger.NewContext(l), store, mbzc, file.Name()) if err != nil { - l.Err(err).Msgf("Failed to import file: %s", file.Name()) + l.Err(err).Msgf("Importer: Failed to import file: %s", file.Name()) } } else if strings.Contains(file.Name(), "koito") { - l.Info().Msgf("Import file %s detecting as being Koito export", file.Name()) + l.Info().Msgf("Importer: Import file %s detecting as being Koito export", file.Name()) err := importer.ImportKoitoFile(logger.NewContext(l), store, file.Name()) if err != nil { - l.Err(err).Msgf("Failed to import file: %s", file.Name()) + l.Err(err).Msgf("Importer: Failed to import file: %s", file.Name()) } } else { - l.Warn().Msgf("File %s not recognized as a valid import file; make sure it is valid and named correctly", file.Name()) + l.Warn().Msgf("Importer: File %s not recognized as a valid import file; make sure it is valid and named correctly", file.Name()) } } } diff --git a/engine/handlers/lbz_submit_listen.go b/engine/handlers/lbz_submit_listen.go index e92eb48..91eeaac 100644 --- a/engine/handlers/lbz_submit_listen.go +++ b/engine/handlers/lbz_submit_listen.go @@ -103,7 +103,7 @@ func LbzSubmitListenHandler(store db.DB, mbzc mbz.MusicBrainzCaller) func(w http return } - l.Debug().Any("request_body", req).Msg("LbzSubmitListenHandler: Parsed request body") + l.Info().Any("request_body", req).Msg("LbzSubmitListenHandler: Parsed request body") if len(req.Payload) < 1 { l.Debug().Msg("LbzSubmitListenHandler: Payload is empty") diff --git a/internal/catalog/duration.go b/internal/catalog/duration.go new file mode 100644 index 0000000..808ebd0 --- /dev/null +++ b/internal/catalog/duration.go @@ -0,0 +1,84 @@ +package catalog + +import ( + "context" + "fmt" + + "github.com/gabehf/koito/internal/db" + "github.com/gabehf/koito/internal/logger" + "github.com/gabehf/koito/internal/mbz" + "github.com/google/uuid" +) + +func BackfillTrackDurationsFromMusicBrainz( + ctx context.Context, + store db.DB, + mbzCaller mbz.MusicBrainzCaller, +) error { + l := logger.FromContext(ctx) + l.Info().Msg("BackfillTrackDurationsFromMusicBrainz: Starting backfill of track durations from MusicBrainz") + + var from int32 = 0 + + for { + tracks, err := store.GetTracksWithNoDurationButHaveMbzID(ctx, from) + if err != nil { + return fmt.Errorf("BackfillTrackDurationsFromMusicBrainz: failed to fetch tracks for duration backfill: %w", err) + } + + // nil, nil means no more results + if len(tracks) == 0 { + if from == 0 { + l.Info().Msg("BackfillTrackDurationsFromMusicBrainz: No tracks need updating. Skipping backfill...") + } else { + l.Info().Msg("BackfillTrackDurationsFromMusicBrainz: Backfill complete") + } + return nil + } + + for _, track := range tracks { + from = track.ID + + if track.MbzID == nil || *track.MbzID == uuid.Nil { + continue + } + + l.Debug(). + Str("title", track.Title). + Str("mbz_id", track.MbzID.String()). + Msg("BackfillTrackDurationsFromMusicBrainz: Backfilling duration from MusicBrainz") + + mbzTrack, err := mbzCaller.GetTrack(ctx, *track.MbzID) + if err != nil { + l.Err(err). + Str("title", track.Title). + Msg("BackfillTrackDurationsFromMusicBrainz: Failed to fetch track from MusicBrainz") + continue + } + + if mbzTrack.LengthMs <= 0 { + l.Debug(). + Str("title", track.Title). + Msg("BackfillTrackDurationsFromMusicBrainz: MusicBrainz track has no duration") + continue + } + + durationSeconds := int32(mbzTrack.LengthMs / 1000) + + err = store.UpdateTrack(ctx, db.UpdateTrackOpts{ + ID: track.ID, + Duration: durationSeconds, + }) + if err != nil { + l.Err(err). + Str("title", track.Title). + Msg("BackfillTrackDurationsFromMusicBrainz: Failed to update track duration") + } else { + l.Info(). + Str("title", track.Title). + Int32("duration_seconds", durationSeconds). + Msg("BackfillTrackDurationsFromMusicBrainz: Track duration backfilled successfully") + } + } + } +} diff --git a/internal/db/db.go b/internal/db/db.go index e725bc8..4695967 100644 --- a/internal/db/db.go +++ b/internal/db/db.go @@ -16,6 +16,7 @@ type DB interface { GetAlbum(ctx context.Context, opts GetAlbumOpts) (*models.Album, error) GetAlbumWithNoMbzIDByTitles(ctx context.Context, artistId int32, titles []string) (*models.Album, error) GetTrack(ctx context.Context, opts GetTrackOpts) (*models.Track, error) + GetTracksWithNoDurationButHaveMbzID(ctx context.Context, from int32) ([]*models.Track, error) GetArtistsForAlbum(ctx context.Context, id int32) ([]*models.Artist, error) GetArtistsForTrack(ctx context.Context, id int32) ([]*models.Artist, error) GetTopTracksPaginated(ctx context.Context, opts GetItemsOpts) (*PaginatedResponse[*models.Track], error) diff --git a/internal/db/psql/track.go b/internal/db/psql/track.go index 6634397..f20263a 100644 --- a/internal/db/psql/track.go +++ b/internal/db/psql/track.go @@ -375,3 +375,29 @@ func (d *Psql) SetPrimaryTrackArtist(ctx context.Context, id int32, artistId int } return tx.Commit(ctx) } + +// returns nil, nil when no results +func (d *Psql) GetTracksWithNoDurationButHaveMbzID(ctx context.Context, from int32) ([]*models.Track, error) { + results, err := d.q.GetTracksWithNoDurationButHaveMbzID(ctx, repository.GetTracksWithNoDurationButHaveMbzIDParams{ + Limit: 20, + ID: 0, + }) + if errors.Is(err, pgx.ErrNoRows) { + return nil, nil + } else if err != nil { + return nil, fmt.Errorf("GetTracksWithNoDurationButHaveMbzID: %w", err) + } + + ret := make([]*models.Track, 0) + + for _, v := range results { + ret = append(ret, &models.Track{ + ID: v.ID, + Duration: v.Duration, + MbzID: v.MusicBrainzID, + Title: v.Title, + }) + } + + return ret, nil +} diff --git a/internal/repository/track.sql.go b/internal/repository/track.sql.go index 883e13c..6b11b01 100644 --- a/internal/repository/track.sql.go +++ b/internal/repository/track.sql.go @@ -447,6 +447,48 @@ func (q *Queries) GetTrackByTrackInfo(ctx context.Context, arg GetTrackByTrackIn return i, err } +const getTracksWithNoDurationButHaveMbzID = `-- name: GetTracksWithNoDurationButHaveMbzID :many +SELECT + id, musicbrainz_id, duration, release_id, title +FROM tracks_with_title +WHERE duration = 0 + AND musicbrainz_id IS NOT NULL + AND id > $2 +ORDER BY id ASC +LIMIT $1 +` + +type GetTracksWithNoDurationButHaveMbzIDParams struct { + Limit int32 + ID int32 +} + +func (q *Queries) GetTracksWithNoDurationButHaveMbzID(ctx context.Context, arg GetTracksWithNoDurationButHaveMbzIDParams) ([]TracksWithTitle, error) { + rows, err := q.db.Query(ctx, getTracksWithNoDurationButHaveMbzID, arg.Limit, arg.ID) + if err != nil { + return nil, err + } + defer rows.Close() + var items []TracksWithTitle + for rows.Next() { + var i TracksWithTitle + if err := rows.Scan( + &i.ID, + &i.MusicBrainzID, + &i.Duration, + &i.ReleaseID, + &i.Title, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const insertTrack = `-- name: InsertTrack :one INSERT INTO tracks (musicbrainz_id, release_id, duration) VALUES ($1, $2, $3)