fix: correct interest bucket queries (#169)

This commit is contained in:
Gabe Farrell 2026-01-22 17:01:46 -05:00 committed by GitHub
parent cb4d177875
commit 08fc9eed86
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 204 additions and 250 deletions

View file

@ -1,162 +1,139 @@
-- name: GetGroupedListensFromArtist :many -- name: GetGroupedListensFromArtist :many
WITH artist_listens AS ( WITH bounds AS (
SELECT SELECT
l.listened_at MIN(l.listened_at) AS start_time,
NOW() AS end_time
FROM listens l FROM listens l
JOIN tracks t ON t.id = l.track_id JOIN tracks t ON t.id = l.track_id
JOIN artist_tracks at ON at.track_id = t.id JOIN artist_tracks at ON at.track_id = t.id
WHERE at.artist_id = $1 WHERE at.artist_id = $1
), ),
bounds AS ( stats AS (
SELECT SELECT
MIN(listened_at) AS start_time, start_time,
MAX(listened_at) AS end_time end_time,
FROM artist_listens EXTRACT(EPOCH FROM (end_time - start_time)) AS total_seconds,
((end_time - start_time) / sqlc.arg(bucket_count)::int) AS bucket_interval
FROM bounds
), ),
bucketed AS ( bucket_series AS (
SELECT generate_series(0, sqlc.arg(bucket_count)::int - 1) AS idx
),
listen_indices AS (
SELECT SELECT
LEAST( LEAST(
sqlc.arg(bucket_count) - 1, sqlc.arg(bucket_count)::int - 1,
FLOOR( FLOOR(
( (EXTRACT(EPOCH FROM (l.listened_at - s.start_time)) / NULLIF(s.total_seconds, 0))
EXTRACT(EPOCH FROM (al.listened_at - b.start_time)) * sqlc.arg(bucket_count)::int
/
NULLIF(EXTRACT(EPOCH FROM (b.end_time - b.start_time)), 0)
) * sqlc.arg(bucket_count)
)::int )::int
) AS bucket_idx, ) AS bucket_idx
b.start_time, FROM listens l
b.end_time JOIN tracks t ON t.id = l.track_id
FROM artist_listens al JOIN artist_tracks at ON at.track_id = t.id
CROSS JOIN bounds b CROSS JOIN stats s
), WHERE at.artist_id = $1
aggregated AS ( AND s.start_time IS NOT NULL
SELECT
start_time
+ (
bucket_idx * (end_time - start_time)
/ sqlc.arg(bucket_count)
) AS bucket_start,
start_time
+ (
(bucket_idx + 1) * (end_time - start_time)
/ sqlc.arg(bucket_count)
) AS bucket_end,
COUNT(*) AS listen_count
FROM bucketed
GROUP BY bucket_idx, start_time, end_time
) )
SELECT SELECT
bucket_start::timestamptz, (s.start_time + (s.bucket_interval * bs.idx))::timestamptz AS bucket_start,
bucket_end::timestamptz, (s.start_time + (s.bucket_interval * (bs.idx + 1)))::timestamptz AS bucket_end,
listen_count COUNT(li.bucket_idx) AS listen_count
FROM aggregated FROM bucket_series bs
ORDER BY bucket_start; CROSS JOIN stats s
LEFT JOIN listen_indices li ON bs.idx = li.bucket_idx
WHERE s.start_time IS NOT NULL
GROUP BY bs.idx, s.start_time, s.bucket_interval
ORDER BY bs.idx;
-- name: GetGroupedListensFromRelease :many -- name: GetGroupedListensFromRelease :many
WITH artist_listens AS ( WITH bounds AS (
SELECT SELECT
l.listened_at MIN(l.listened_at) AS start_time,
NOW() AS end_time
FROM listens l FROM listens l
JOIN tracks t ON t.id = l.track_id JOIN tracks t ON t.id = l.track_id
WHERE t.release_id = $1 WHERE t.release_id = $1
), ),
bounds AS ( stats AS (
SELECT SELECT
MIN(listened_at) AS start_time, start_time,
MAX(listened_at) AS end_time end_time,
FROM artist_listens EXTRACT(EPOCH FROM (end_time - start_time)) AS total_seconds,
((end_time - start_time) / sqlc.arg(bucket_count)::int) AS bucket_interval
FROM bounds
), ),
bucketed AS ( bucket_series AS (
SELECT generate_series(0, sqlc.arg(bucket_count)::int - 1) AS idx
),
listen_indices AS (
SELECT SELECT
LEAST( LEAST(
sqlc.arg(bucket_count) - 1, sqlc.arg(bucket_count)::int - 1,
FLOOR( FLOOR(
( (EXTRACT(EPOCH FROM (l.listened_at - s.start_time)) / NULLIF(s.total_seconds, 0))
EXTRACT(EPOCH FROM (al.listened_at - b.start_time)) * sqlc.arg(bucket_count)::int
/
NULLIF(EXTRACT(EPOCH FROM (b.end_time - b.start_time)), 0)
) * sqlc.arg(bucket_count)
)::int )::int
) AS bucket_idx, ) AS bucket_idx
b.start_time, FROM listens l
b.end_time JOIN tracks t ON t.id = l.track_id
FROM artist_listens al CROSS JOIN stats s
CROSS JOIN bounds b WHERE t.release_id = $1
), AND s.start_time IS NOT NULL
aggregated AS (
SELECT
start_time
+ (
bucket_idx * (end_time - start_time)
/ sqlc.arg(bucket_count)
) AS bucket_start,
start_time
+ (
(bucket_idx + 1) * (end_time - start_time)
/ sqlc.arg(bucket_count)
) AS bucket_end,
COUNT(*) AS listen_count
FROM bucketed
GROUP BY bucket_idx, start_time, end_time
) )
SELECT SELECT
bucket_start::timestamptz, (s.start_time + (s.bucket_interval * bs.idx))::timestamptz AS bucket_start,
bucket_end::timestamptz, (s.start_time + (s.bucket_interval * (bs.idx + 1)))::timestamptz AS bucket_end,
listen_count COUNT(li.bucket_idx) AS listen_count
FROM aggregated FROM bucket_series bs
ORDER BY bucket_start; CROSS JOIN stats s
LEFT JOIN listen_indices li ON bs.idx = li.bucket_idx
WHERE s.start_time IS NOT NULL
GROUP BY bs.idx, s.start_time, s.bucket_interval
ORDER BY bs.idx;
-- name: GetGroupedListensFromTrack :many -- name: GetGroupedListensFromTrack :many
WITH artist_listens AS ( WITH bounds AS (
SELECT SELECT
l.listened_at MIN(l.listened_at) AS start_time,
NOW() AS end_time
FROM listens l FROM listens l
JOIN tracks t ON t.id = l.track_id JOIN tracks t ON t.id = l.track_id
WHERE t.id = $1 WHERE t.id = $1
), ),
bounds AS ( stats AS (
SELECT SELECT
MIN(listened_at) AS start_time, start_time,
MAX(listened_at) AS end_time end_time,
FROM artist_listens EXTRACT(EPOCH FROM (end_time - start_time)) AS total_seconds,
((end_time - start_time) / sqlc.arg(bucket_count)::int) AS bucket_interval
FROM bounds
), ),
bucketed AS ( bucket_series AS (
SELECT generate_series(0, sqlc.arg(bucket_count)::int - 1) AS idx
),
listen_indices AS (
SELECT SELECT
LEAST( LEAST(
sqlc.arg(bucket_count) - 1, sqlc.arg(bucket_count)::int - 1,
FLOOR( FLOOR(
( (EXTRACT(EPOCH FROM (l.listened_at - s.start_time)) / NULLIF(s.total_seconds, 0))
EXTRACT(EPOCH FROM (al.listened_at - b.start_time)) * sqlc.arg(bucket_count)::int
/
NULLIF(EXTRACT(EPOCH FROM (b.end_time - b.start_time)), 0)
) * sqlc.arg(bucket_count)
)::int )::int
) AS bucket_idx, ) AS bucket_idx
b.start_time, FROM listens l
b.end_time JOIN tracks t ON t.id = l.track_id
FROM artist_listens al CROSS JOIN stats s
CROSS JOIN bounds b WHERE t.id = $1
), AND s.start_time IS NOT NULL
aggregated AS (
SELECT
start_time
+ (
bucket_idx * (end_time - start_time)
/ sqlc.arg(bucket_count)
) AS bucket_start,
start_time
+ (
(bucket_idx + 1) * (end_time - start_time)
/ sqlc.arg(bucket_count)
) AS bucket_end,
COUNT(*) AS listen_count
FROM bucketed
GROUP BY bucket_idx, start_time, end_time
) )
SELECT SELECT
bucket_start::timestamptz, (s.start_time + (s.bucket_interval * bs.idx))::timestamptz AS bucket_start,
bucket_end::timestamptz, (s.start_time + (s.bucket_interval * (bs.idx + 1)))::timestamptz AS bucket_end,
listen_count COUNT(li.bucket_idx) AS listen_count
FROM aggregated FROM bucket_series bs
ORDER BY bucket_start; CROSS JOIN stats s
LEFT JOIN listen_indices li ON bs.idx = li.bucket_idx
WHERE s.start_time IS NOT NULL
GROUP BY bs.idx, s.start_time, s.bucket_interval
ORDER BY bs.idx;

View file

@ -14,54 +14,54 @@ func (d *Psql) GetInterest(ctx context.Context, opts db.GetInterestOpts) ([]db.I
return nil, errors.New("GetInterest: bucket count must be provided") return nil, errors.New("GetInterest: bucket count must be provided")
} }
ret := make([]db.InterestBucket, opts.Buckets) ret := make([]db.InterestBucket, 0)
if opts.ArtistID != 0 { if opts.ArtistID != 0 {
resp, err := d.q.GetGroupedListensFromArtist(ctx, repository.GetGroupedListensFromArtistParams{ resp, err := d.q.GetGroupedListensFromArtist(ctx, repository.GetGroupedListensFromArtistParams{
ArtistID: opts.ArtistID, ArtistID: opts.ArtistID,
BucketCount: opts.Buckets, BucketCount: int32(opts.Buckets),
}) })
if err != nil { if err != nil {
return nil, fmt.Errorf("GetInterest: GetGroupedListensFromArtist: %w", err) return nil, fmt.Errorf("GetInterest: GetGroupedListensFromArtist: %w", err)
} }
for i, v := range resp { for _, v := range resp {
ret[i] = db.InterestBucket{ ret = append(ret, db.InterestBucket{
BucketStart: v.BucketStart, BucketStart: v.BucketStart,
BucketEnd: v.BucketEnd, BucketEnd: v.BucketEnd,
ListenCount: v.ListenCount, ListenCount: v.ListenCount,
} })
} }
return ret, nil return ret, nil
} else if opts.AlbumID != 0 { } else if opts.AlbumID != 0 {
resp, err := d.q.GetGroupedListensFromRelease(ctx, repository.GetGroupedListensFromReleaseParams{ resp, err := d.q.GetGroupedListensFromRelease(ctx, repository.GetGroupedListensFromReleaseParams{
ReleaseID: opts.AlbumID, ReleaseID: opts.AlbumID,
BucketCount: opts.Buckets, BucketCount: int32(opts.Buckets),
}) })
if err != nil { if err != nil {
return nil, fmt.Errorf("GetInterest: GetGroupedListensFromRelease: %w", err) return nil, fmt.Errorf("GetInterest: GetGroupedListensFromRelease: %w", err)
} }
for i, v := range resp { for _, v := range resp {
ret[i] = db.InterestBucket{ ret = append(ret, db.InterestBucket{
BucketStart: v.BucketStart, BucketStart: v.BucketStart,
BucketEnd: v.BucketEnd, BucketEnd: v.BucketEnd,
ListenCount: v.ListenCount, ListenCount: v.ListenCount,
} })
} }
return ret, nil return ret, nil
} else if opts.TrackID != 0 { } else if opts.TrackID != 0 {
resp, err := d.q.GetGroupedListensFromTrack(ctx, repository.GetGroupedListensFromTrackParams{ resp, err := d.q.GetGroupedListensFromTrack(ctx, repository.GetGroupedListensFromTrackParams{
ID: opts.TrackID, ID: opts.TrackID,
BucketCount: opts.Buckets, BucketCount: int32(opts.Buckets),
}) })
if err != nil { if err != nil {
return nil, fmt.Errorf("GetInterest: GetGroupedListensFromTrack: %w", err) return nil, fmt.Errorf("GetInterest: GetGroupedListensFromTrack: %w", err)
} }
for i, v := range resp { for _, v := range resp {
ret[i] = db.InterestBucket{ ret = append(ret, db.InterestBucket{
BucketStart: v.BucketStart, BucketStart: v.BucketStart,
BucketEnd: v.BucketEnd, BucketEnd: v.BucketEnd,
ListenCount: v.ListenCount, ListenCount: v.ListenCount,
} })
} }
return ret, nil return ret, nil
} else { } else {

View file

@ -11,64 +11,57 @@ import (
) )
const getGroupedListensFromArtist = `-- name: GetGroupedListensFromArtist :many const getGroupedListensFromArtist = `-- name: GetGroupedListensFromArtist :many
WITH artist_listens AS ( WITH bounds AS (
SELECT SELECT
l.listened_at MIN(l.listened_at) AS start_time,
NOW() AS end_time
FROM listens l FROM listens l
JOIN tracks t ON t.id = l.track_id JOIN tracks t ON t.id = l.track_id
JOIN artist_tracks at ON at.track_id = t.id JOIN artist_tracks at ON at.track_id = t.id
WHERE at.artist_id = $1 WHERE at.artist_id = $1
), ),
bounds AS ( stats AS (
SELECT SELECT
MIN(listened_at) AS start_time, start_time,
MAX(listened_at) AS end_time end_time,
FROM artist_listens EXTRACT(EPOCH FROM (end_time - start_time)) AS total_seconds,
((end_time - start_time) / $2::int) AS bucket_interval
FROM bounds
), ),
bucketed AS ( bucket_series AS (
SELECT generate_series(0, $2::int - 1) AS idx
),
listen_indices AS (
SELECT SELECT
LEAST( LEAST(
$2 - 1, $2::int - 1,
FLOOR( FLOOR(
( (EXTRACT(EPOCH FROM (l.listened_at - s.start_time)) / NULLIF(s.total_seconds, 0))
EXTRACT(EPOCH FROM (al.listened_at - b.start_time)) * $2::int
/
NULLIF(EXTRACT(EPOCH FROM (b.end_time - b.start_time)), 0)
) * $2
)::int )::int
) AS bucket_idx, ) AS bucket_idx
b.start_time, FROM listens l
b.end_time JOIN tracks t ON t.id = l.track_id
FROM artist_listens al JOIN artist_tracks at ON at.track_id = t.id
CROSS JOIN bounds b CROSS JOIN stats s
), WHERE at.artist_id = $1
aggregated AS ( AND s.start_time IS NOT NULL
SELECT
start_time
+ (
bucket_idx * (end_time - start_time)
/ $2
) AS bucket_start,
start_time
+ (
(bucket_idx + 1) * (end_time - start_time)
/ $2
) AS bucket_end,
COUNT(*) AS listen_count
FROM bucketed
GROUP BY bucket_idx, start_time, end_time
) )
SELECT SELECT
bucket_start::timestamptz, (s.start_time + (s.bucket_interval * bs.idx))::timestamptz AS bucket_start,
bucket_end::timestamptz, (s.start_time + (s.bucket_interval * (bs.idx + 1)))::timestamptz AS bucket_end,
listen_count COUNT(li.bucket_idx) AS listen_count
FROM aggregated FROM bucket_series bs
ORDER BY bucket_start CROSS JOIN stats s
LEFT JOIN listen_indices li ON bs.idx = li.bucket_idx
WHERE s.start_time IS NOT NULL
GROUP BY bs.idx, s.start_time, s.bucket_interval
ORDER BY bs.idx
` `
type GetGroupedListensFromArtistParams struct { type GetGroupedListensFromArtistParams struct {
ArtistID int32 ArtistID int32
BucketCount interface{} BucketCount int32
} }
type GetGroupedListensFromArtistRow struct { type GetGroupedListensFromArtistRow struct {
@ -98,63 +91,55 @@ func (q *Queries) GetGroupedListensFromArtist(ctx context.Context, arg GetGroupe
} }
const getGroupedListensFromRelease = `-- name: GetGroupedListensFromRelease :many const getGroupedListensFromRelease = `-- name: GetGroupedListensFromRelease :many
WITH artist_listens AS ( WITH bounds AS (
SELECT SELECT
l.listened_at MIN(l.listened_at) AS start_time,
NOW() AS end_time
FROM listens l FROM listens l
JOIN tracks t ON t.id = l.track_id JOIN tracks t ON t.id = l.track_id
WHERE t.release_id = $1 WHERE t.release_id = $1
), ),
bounds AS ( stats AS (
SELECT SELECT
MIN(listened_at) AS start_time, start_time,
MAX(listened_at) AS end_time end_time,
FROM artist_listens EXTRACT(EPOCH FROM (end_time - start_time)) AS total_seconds,
((end_time - start_time) / $2::int) AS bucket_interval
FROM bounds
), ),
bucketed AS ( bucket_series AS (
SELECT generate_series(0, $2::int - 1) AS idx
),
listen_indices AS (
SELECT SELECT
LEAST( LEAST(
$2 - 1, $2::int - 1,
FLOOR( FLOOR(
( (EXTRACT(EPOCH FROM (l.listened_at - s.start_time)) / NULLIF(s.total_seconds, 0))
EXTRACT(EPOCH FROM (al.listened_at - b.start_time)) * $2::int
/
NULLIF(EXTRACT(EPOCH FROM (b.end_time - b.start_time)), 0)
) * $2
)::int )::int
) AS bucket_idx, ) AS bucket_idx
b.start_time, FROM listens l
b.end_time JOIN tracks t ON t.id = l.track_id
FROM artist_listens al CROSS JOIN stats s
CROSS JOIN bounds b WHERE t.release_id = $1
), AND s.start_time IS NOT NULL
aggregated AS (
SELECT
start_time
+ (
bucket_idx * (end_time - start_time)
/ $2
) AS bucket_start,
start_time
+ (
(bucket_idx + 1) * (end_time - start_time)
/ $2
) AS bucket_end,
COUNT(*) AS listen_count
FROM bucketed
GROUP BY bucket_idx, start_time, end_time
) )
SELECT SELECT
bucket_start::timestamptz, (s.start_time + (s.bucket_interval * bs.idx))::timestamptz AS bucket_start,
bucket_end::timestamptz, (s.start_time + (s.bucket_interval * (bs.idx + 1)))::timestamptz AS bucket_end,
listen_count COUNT(li.bucket_idx) AS listen_count
FROM aggregated FROM bucket_series bs
ORDER BY bucket_start CROSS JOIN stats s
LEFT JOIN listen_indices li ON bs.idx = li.bucket_idx
WHERE s.start_time IS NOT NULL
GROUP BY bs.idx, s.start_time, s.bucket_interval
ORDER BY bs.idx
` `
type GetGroupedListensFromReleaseParams struct { type GetGroupedListensFromReleaseParams struct {
ReleaseID int32 ReleaseID int32
BucketCount interface{} BucketCount int32
} }
type GetGroupedListensFromReleaseRow struct { type GetGroupedListensFromReleaseRow struct {
@ -184,63 +169,55 @@ func (q *Queries) GetGroupedListensFromRelease(ctx context.Context, arg GetGroup
} }
const getGroupedListensFromTrack = `-- name: GetGroupedListensFromTrack :many const getGroupedListensFromTrack = `-- name: GetGroupedListensFromTrack :many
WITH artist_listens AS ( WITH bounds AS (
SELECT SELECT
l.listened_at MIN(l.listened_at) AS start_time,
NOW() AS end_time
FROM listens l FROM listens l
JOIN tracks t ON t.id = l.track_id JOIN tracks t ON t.id = l.track_id
WHERE t.id = $1 WHERE t.id = $1
), ),
bounds AS ( stats AS (
SELECT SELECT
MIN(listened_at) AS start_time, start_time,
MAX(listened_at) AS end_time end_time,
FROM artist_listens EXTRACT(EPOCH FROM (end_time - start_time)) AS total_seconds,
((end_time - start_time) / $2::int) AS bucket_interval
FROM bounds
), ),
bucketed AS ( bucket_series AS (
SELECT generate_series(0, $2::int - 1) AS idx
),
listen_indices AS (
SELECT SELECT
LEAST( LEAST(
$2 - 1, $2::int - 1,
FLOOR( FLOOR(
( (EXTRACT(EPOCH FROM (l.listened_at - s.start_time)) / NULLIF(s.total_seconds, 0))
EXTRACT(EPOCH FROM (al.listened_at - b.start_time)) * $2::int
/
NULLIF(EXTRACT(EPOCH FROM (b.end_time - b.start_time)), 0)
) * $2
)::int )::int
) AS bucket_idx, ) AS bucket_idx
b.start_time, FROM listens l
b.end_time JOIN tracks t ON t.id = l.track_id
FROM artist_listens al CROSS JOIN stats s
CROSS JOIN bounds b WHERE t.id = $1
), AND s.start_time IS NOT NULL
aggregated AS (
SELECT
start_time
+ (
bucket_idx * (end_time - start_time)
/ $2
) AS bucket_start,
start_time
+ (
(bucket_idx + 1) * (end_time - start_time)
/ $2
) AS bucket_end,
COUNT(*) AS listen_count
FROM bucketed
GROUP BY bucket_idx, start_time, end_time
) )
SELECT SELECT
bucket_start::timestamptz, (s.start_time + (s.bucket_interval * bs.idx))::timestamptz AS bucket_start,
bucket_end::timestamptz, (s.start_time + (s.bucket_interval * (bs.idx + 1)))::timestamptz AS bucket_end,
listen_count COUNT(li.bucket_idx) AS listen_count
FROM aggregated FROM bucket_series bs
ORDER BY bucket_start CROSS JOIN stats s
LEFT JOIN listen_indices li ON bs.idx = li.bucket_idx
WHERE s.start_time IS NOT NULL
GROUP BY bs.idx, s.start_time, s.bucket_interval
ORDER BY bs.idx
` `
type GetGroupedListensFromTrackParams struct { type GetGroupedListensFromTrackParams struct {
ID int32 ID int32
BucketCount interface{} BucketCount int32
} }
type GetGroupedListensFromTrackRow struct { type GetGroupedListensFromTrackRow struct {