feat: backfill duration from musicbrainz (#135)

* feat: backfill durations from musicbrainz

* chore: make request body dump info level
This commit is contained in:
Gabe Farrell 2026-01-14 00:08:05 -05:00 committed by GitHub
parent 288d04d714
commit df59605418
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 208 additions and 16 deletions

View file

@ -0,0 +1,84 @@
package catalog
import (
"context"
"fmt"
"github.com/gabehf/koito/internal/db"
"github.com/gabehf/koito/internal/logger"
"github.com/gabehf/koito/internal/mbz"
"github.com/google/uuid"
)
func BackfillTrackDurationsFromMusicBrainz(
ctx context.Context,
store db.DB,
mbzCaller mbz.MusicBrainzCaller,
) error {
l := logger.FromContext(ctx)
l.Info().Msg("BackfillTrackDurationsFromMusicBrainz: Starting backfill of track durations from MusicBrainz")
var from int32 = 0
for {
tracks, err := store.GetTracksWithNoDurationButHaveMbzID(ctx, from)
if err != nil {
return fmt.Errorf("BackfillTrackDurationsFromMusicBrainz: failed to fetch tracks for duration backfill: %w", err)
}
// nil, nil means no more results
if len(tracks) == 0 {
if from == 0 {
l.Info().Msg("BackfillTrackDurationsFromMusicBrainz: No tracks need updating. Skipping backfill...")
} else {
l.Info().Msg("BackfillTrackDurationsFromMusicBrainz: Backfill complete")
}
return nil
}
for _, track := range tracks {
from = track.ID
if track.MbzID == nil || *track.MbzID == uuid.Nil {
continue
}
l.Debug().
Str("title", track.Title).
Str("mbz_id", track.MbzID.String()).
Msg("BackfillTrackDurationsFromMusicBrainz: Backfilling duration from MusicBrainz")
mbzTrack, err := mbzCaller.GetTrack(ctx, *track.MbzID)
if err != nil {
l.Err(err).
Str("title", track.Title).
Msg("BackfillTrackDurationsFromMusicBrainz: Failed to fetch track from MusicBrainz")
continue
}
if mbzTrack.LengthMs <= 0 {
l.Debug().
Str("title", track.Title).
Msg("BackfillTrackDurationsFromMusicBrainz: MusicBrainz track has no duration")
continue
}
durationSeconds := int32(mbzTrack.LengthMs / 1000)
err = store.UpdateTrack(ctx, db.UpdateTrackOpts{
ID: track.ID,
Duration: durationSeconds,
})
if err != nil {
l.Err(err).
Str("title", track.Title).
Msg("BackfillTrackDurationsFromMusicBrainz: Failed to update track duration")
} else {
l.Info().
Str("title", track.Title).
Int32("duration_seconds", durationSeconds).
Msg("BackfillTrackDurationsFromMusicBrainz: Track duration backfilled successfully")
}
}
}
}

View file

@ -16,6 +16,7 @@ type DB interface {
GetAlbum(ctx context.Context, opts GetAlbumOpts) (*models.Album, error)
GetAlbumWithNoMbzIDByTitles(ctx context.Context, artistId int32, titles []string) (*models.Album, error)
GetTrack(ctx context.Context, opts GetTrackOpts) (*models.Track, error)
GetTracksWithNoDurationButHaveMbzID(ctx context.Context, from int32) ([]*models.Track, error)
GetArtistsForAlbum(ctx context.Context, id int32) ([]*models.Artist, error)
GetArtistsForTrack(ctx context.Context, id int32) ([]*models.Artist, error)
GetTopTracksPaginated(ctx context.Context, opts GetItemsOpts) (*PaginatedResponse[*models.Track], error)

View file

@ -375,3 +375,29 @@ func (d *Psql) SetPrimaryTrackArtist(ctx context.Context, id int32, artistId int
}
return tx.Commit(ctx)
}
// returns nil, nil when no results
func (d *Psql) GetTracksWithNoDurationButHaveMbzID(ctx context.Context, from int32) ([]*models.Track, error) {
results, err := d.q.GetTracksWithNoDurationButHaveMbzID(ctx, repository.GetTracksWithNoDurationButHaveMbzIDParams{
Limit: 20,
ID: 0,
})
if errors.Is(err, pgx.ErrNoRows) {
return nil, nil
} else if err != nil {
return nil, fmt.Errorf("GetTracksWithNoDurationButHaveMbzID: %w", err)
}
ret := make([]*models.Track, 0)
for _, v := range results {
ret = append(ret, &models.Track{
ID: v.ID,
Duration: v.Duration,
MbzID: v.MusicBrainzID,
Title: v.Title,
})
}
return ret, nil
}

View file

@ -447,6 +447,48 @@ func (q *Queries) GetTrackByTrackInfo(ctx context.Context, arg GetTrackByTrackIn
return i, err
}
const getTracksWithNoDurationButHaveMbzID = `-- name: GetTracksWithNoDurationButHaveMbzID :many
SELECT
id, musicbrainz_id, duration, release_id, title
FROM tracks_with_title
WHERE duration = 0
AND musicbrainz_id IS NOT NULL
AND id > $2
ORDER BY id ASC
LIMIT $1
`
type GetTracksWithNoDurationButHaveMbzIDParams struct {
Limit int32
ID int32
}
func (q *Queries) GetTracksWithNoDurationButHaveMbzID(ctx context.Context, arg GetTracksWithNoDurationButHaveMbzIDParams) ([]TracksWithTitle, error) {
rows, err := q.db.Query(ctx, getTracksWithNoDurationButHaveMbzID, arg.Limit, arg.ID)
if err != nil {
return nil, err
}
defer rows.Close()
var items []TracksWithTitle
for rows.Next() {
var i TracksWithTitle
if err := rows.Scan(
&i.ID,
&i.MusicBrainzID,
&i.Duration,
&i.ReleaseID,
&i.Title,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const insertTrack = `-- name: InsertTrack :one
INSERT INTO tracks (musicbrainz_id, release_id, duration)
VALUES ($1, $2, $3)