Files
Main/shanty-playlist/src/scoring.rs
T
Connor Johnstone ada336d945
CI / check (push) Successful in 1m17s
CI / docker (push) Successful in 2m17s
real fix for fmt,clippy
2026-04-01 23:25:14 -04:00

239 lines
8.4 KiB
Rust

use std::collections::HashMap;
use shanty_data::PopularTrack;
use shanty_db::entities::track::Model as Track;
use crate::types::ScoredTrack;
/// Popularity exponent curve (0-10 scale).
/// 0 = no preference, 10 = heavy popular bias.
const POPULARITY_EXPONENTS: [f64; 11] = [
0.0, 0.06, 0.17, 0.33, 0.67, 1.30, 1.50, 1.70, 1.94, 2.22, 2.50,
];
/// Score all tracks for the given artists, returning scored tracks for ranking.
///
/// `artists` is a list of (mbid_or_name, display_name, similarity_score) tuples.
/// `tracks_by_artist` maps artist identifier -> their local tracks.
/// `top_tracks_by_artist` maps artist identifier -> their Last.fm top tracks.
pub fn score_tracks(
artists: &[(String, String, f64)],
tracks_by_artist: &HashMap<String, Vec<Track>>,
top_tracks_by_artist: &HashMap<String, Vec<PopularTrack>>,
popularity_bias: u8,
_global_popularity: u8,
max_tracks_per_artist: Option<u8>,
) -> Vec<ScoredTrack> {
let bias = popularity_bias.min(10) as usize;
let mut scored = Vec::new();
for (artist_key, name, match_score) in artists {
let local_tracks = match tracks_by_artist.get(artist_key) {
Some(t) if !t.is_empty() => t,
_ => continue,
};
let top_tracks = top_tracks_by_artist
.get(artist_key)
.cloned()
.unwrap_or_default();
// Build playcount lookups by lowercase name and by MBID
let playcount_by_name: HashMap<String, u64> = top_tracks
.iter()
.map(|t| (t.name.to_lowercase(), t.playcount))
.collect();
let playcount_by_mbid: HashMap<String, u64> = top_tracks
.iter()
.filter_map(|t| t.mbid.as_ref().map(|m| (m.clone(), t.playcount)))
.collect();
let max_playcount = playcount_by_name
.values()
.copied()
.max()
.unwrap_or(1)
.max(1);
for track in local_tracks {
let title_lower = track.title.as_ref().map(|t| t.to_lowercase());
// Match by: exact title, MBID, and prefix — take the MAXIMUM playcount
// across all methods so a popular base track isn't hidden by a less
// popular variant that happens to match exactly.
let mut best_playcount: Option<u64> = None;
let mut consider = |pc: u64| {
best_playcount = Some(best_playcount.map_or(pc, |cur: u64| cur.max(pc)));
};
// Exact title match
if let Some(pc) = title_lower
.as_ref()
.and_then(|t| playcount_by_name.get(t).copied())
{
consider(pc);
}
// MBID match
if let Some(pc) = track
.musicbrainz_id
.as_ref()
.and_then(|id| playcount_by_mbid.get(id).copied())
{
consider(pc);
}
// Prefix match: local title starts with a top track name, or vice versa
if let Some(local) = title_lower.as_ref()
&& let Some((_, &pc)) = playcount_by_name
.iter()
.filter(|(top_name, _)| {
local.starts_with(top_name.as_str()) || top_name.starts_with(local.as_str())
})
.max_by_key(|&(_, &pc)| pc)
{
consider(pc);
}
let playcount = best_playcount;
// If we have popularity data, use it; unmatched tracks get a low base score
let (popularity, similarity, score) = if !playcount_by_name.is_empty() {
let playcount = playcount.unwrap_or(0);
let popularity = if playcount > 0 {
(playcount as f64 / max_playcount as f64).powf(POPULARITY_EXPONENTS[bias])
} else {
// Unmatched track: small base score so it can still appear
0.01
};
let similarity = (match_score.exp()) / std::f64::consts::E;
let score = similarity * popularity;
(popularity, similarity, score)
} else {
// No top tracks data — use uniform scoring based on similarity only
let similarity = (match_score.exp()) / std::f64::consts::E;
(1.0, similarity, similarity)
};
scored.push(ScoredTrack {
track_id: track.id,
file_path: track.file_path.clone(),
title: track.title.clone(),
artist: name.clone(),
artist_mbid: track
.artist_id
.map(|_| artist_key.clone())
.or_else(|| Some(artist_key.clone())),
album: track.album.clone(),
duration: track.duration,
score,
popularity,
similarity,
});
}
}
// Step 1: Cap tracks per artist based on popularity bias
let mut by_artist: HashMap<String, Vec<ScoredTrack>> = HashMap::new();
for t in scored {
let key = t.artist_mbid.clone().unwrap_or_else(|| t.artist.clone());
by_artist.entry(key).or_default().push(t);
}
let cap = if let Some(explicit) = max_tracks_per_artist {
Some((explicit as usize).max(1))
} else if popularity_bias == 0 {
None
} else {
let b = popularity_bias as f64;
let c = if b <= 5.0 {
90.0 - 12.8 * b
} else {
26.0 - 3.2 * (b - 5.0)
};
Some((c.round() as usize).max(1))
};
for group in by_artist.values_mut() {
group.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
});
if let Some(cap) = cap {
group.truncate(cap);
}
}
// Step 2: Normalize so each artist's total weight = their similarity
let similarity_map: HashMap<&str, f64> = artists
.iter()
.map(|(key, _, sim)| (key.as_str(), *sim))
.collect();
for (key, group) in &mut by_artist {
let total: f64 = group.iter().map(|t| t.score).sum();
if total > 0.0 {
let sim = similarity_map.get(key.as_str()).copied().unwrap_or(1.0);
for t in group.iter_mut() {
t.score *= sim / total;
}
}
}
let mut result: Vec<ScoredTrack> = by_artist.into_values().flatten().collect();
// Step 3: Apply global popularity weighting
if _global_popularity > 0 {
let gp = _global_popularity.min(10) as usize;
let gp_exponent = POPULARITY_EXPONENTS[gp];
let gp_strength = _global_popularity as f64 / 10.0;
// Find max playcount across ALL artists
let global_max: u64 = top_tracks_by_artist
.values()
.flat_map(|tracks| tracks.iter().map(|t| t.playcount))
.max()
.unwrap_or(1)
.max(1);
// Build a global playcount lookup (lowercase name -> max playcount)
let mut global_playcounts: HashMap<String, u64> = HashMap::new();
for tracks in top_tracks_by_artist.values() {
for t in tracks {
let key = t.name.to_lowercase();
global_playcounts
.entry(key)
.and_modify(|c| *c = (*c).max(t.playcount))
.or_insert(t.playcount);
}
}
// Apply to ALL tracks: popular ones get boosted, unknown ones get reduced.
// Factor range: unknown tracks get `1 - gp_strength` (minimum 0.01),
// top global track gets 1.0 + gp_strength (up to 2.0 at max setting).
for t in &mut result {
let playcount = t
.title
.as_ref()
.and_then(|title| global_playcounts.get(&title.to_lowercase()).copied())
.unwrap_or(0);
let global_pop = if playcount > 0 {
(playcount as f64 / global_max as f64).powf(gp_exponent)
} else {
0.0
};
// Map global_pop [0, 1] to a factor centered around 1.0:
// global_pop=0 → 1.0 - gp_strength, global_pop=1 → 1.0 + gp_strength
let factor = (1.0 + gp_strength * (2.0 * global_pop - 1.0)).max(0.01);
t.score *= factor;
}
}
result
}