239 lines
8.4 KiB
Rust
239 lines
8.4 KiB
Rust
use std::collections::HashMap;
|
|
|
|
use shanty_data::PopularTrack;
|
|
use shanty_db::entities::track::Model as Track;
|
|
|
|
use crate::types::ScoredTrack;
|
|
|
|
/// Popularity exponent curve (0-10 scale).
|
|
/// 0 = no preference, 10 = heavy popular bias.
|
|
const POPULARITY_EXPONENTS: [f64; 11] = [
|
|
0.0, 0.06, 0.17, 0.33, 0.67, 1.30, 1.50, 1.70, 1.94, 2.22, 2.50,
|
|
];
|
|
|
|
/// Score all tracks for the given artists, returning scored tracks for ranking.
|
|
///
|
|
/// `artists` is a list of (mbid_or_name, display_name, similarity_score) tuples.
|
|
/// `tracks_by_artist` maps artist identifier -> their local tracks.
|
|
/// `top_tracks_by_artist` maps artist identifier -> their Last.fm top tracks.
|
|
pub fn score_tracks(
|
|
artists: &[(String, String, f64)],
|
|
tracks_by_artist: &HashMap<String, Vec<Track>>,
|
|
top_tracks_by_artist: &HashMap<String, Vec<PopularTrack>>,
|
|
popularity_bias: u8,
|
|
_global_popularity: u8,
|
|
max_tracks_per_artist: Option<u8>,
|
|
) -> Vec<ScoredTrack> {
|
|
let bias = popularity_bias.min(10) as usize;
|
|
let mut scored = Vec::new();
|
|
|
|
for (artist_key, name, match_score) in artists {
|
|
let local_tracks = match tracks_by_artist.get(artist_key) {
|
|
Some(t) if !t.is_empty() => t,
|
|
_ => continue,
|
|
};
|
|
|
|
let top_tracks = top_tracks_by_artist
|
|
.get(artist_key)
|
|
.cloned()
|
|
.unwrap_or_default();
|
|
|
|
// Build playcount lookups by lowercase name and by MBID
|
|
let playcount_by_name: HashMap<String, u64> = top_tracks
|
|
.iter()
|
|
.map(|t| (t.name.to_lowercase(), t.playcount))
|
|
.collect();
|
|
|
|
let playcount_by_mbid: HashMap<String, u64> = top_tracks
|
|
.iter()
|
|
.filter_map(|t| t.mbid.as_ref().map(|m| (m.clone(), t.playcount)))
|
|
.collect();
|
|
|
|
let max_playcount = playcount_by_name
|
|
.values()
|
|
.copied()
|
|
.max()
|
|
.unwrap_or(1)
|
|
.max(1);
|
|
|
|
for track in local_tracks {
|
|
let title_lower = track.title.as_ref().map(|t| t.to_lowercase());
|
|
|
|
// Match by: exact title, MBID, and prefix — take the MAXIMUM playcount
|
|
// across all methods so a popular base track isn't hidden by a less
|
|
// popular variant that happens to match exactly.
|
|
let mut best_playcount: Option<u64> = None;
|
|
let mut consider = |pc: u64| {
|
|
best_playcount = Some(best_playcount.map_or(pc, |cur: u64| cur.max(pc)));
|
|
};
|
|
|
|
// Exact title match
|
|
if let Some(pc) = title_lower
|
|
.as_ref()
|
|
.and_then(|t| playcount_by_name.get(t).copied())
|
|
{
|
|
consider(pc);
|
|
}
|
|
|
|
// MBID match
|
|
if let Some(pc) = track
|
|
.musicbrainz_id
|
|
.as_ref()
|
|
.and_then(|id| playcount_by_mbid.get(id).copied())
|
|
{
|
|
consider(pc);
|
|
}
|
|
|
|
// Prefix match: local title starts with a top track name, or vice versa
|
|
if let Some(local) = title_lower.as_ref()
|
|
&& let Some((_, &pc)) = playcount_by_name
|
|
.iter()
|
|
.filter(|(top_name, _)| {
|
|
local.starts_with(top_name.as_str()) || top_name.starts_with(local.as_str())
|
|
})
|
|
.max_by_key(|&(_, &pc)| pc)
|
|
{
|
|
consider(pc);
|
|
}
|
|
|
|
let playcount = best_playcount;
|
|
|
|
// If we have popularity data, use it; unmatched tracks get a low base score
|
|
let (popularity, similarity, score) = if !playcount_by_name.is_empty() {
|
|
let playcount = playcount.unwrap_or(0);
|
|
|
|
let popularity = if playcount > 0 {
|
|
(playcount as f64 / max_playcount as f64).powf(POPULARITY_EXPONENTS[bias])
|
|
} else {
|
|
// Unmatched track: small base score so it can still appear
|
|
0.01
|
|
};
|
|
|
|
let similarity = (match_score.exp()) / std::f64::consts::E;
|
|
let score = similarity * popularity;
|
|
(popularity, similarity, score)
|
|
} else {
|
|
// No top tracks data — use uniform scoring based on similarity only
|
|
let similarity = (match_score.exp()) / std::f64::consts::E;
|
|
(1.0, similarity, similarity)
|
|
};
|
|
|
|
scored.push(ScoredTrack {
|
|
track_id: track.id,
|
|
file_path: track.file_path.clone(),
|
|
title: track.title.clone(),
|
|
artist: name.clone(),
|
|
artist_mbid: track
|
|
.artist_id
|
|
.map(|_| artist_key.clone())
|
|
.or_else(|| Some(artist_key.clone())),
|
|
album: track.album.clone(),
|
|
duration: track.duration,
|
|
score,
|
|
popularity,
|
|
similarity,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Step 1: Cap tracks per artist based on popularity bias
|
|
let mut by_artist: HashMap<String, Vec<ScoredTrack>> = HashMap::new();
|
|
for t in scored {
|
|
let key = t.artist_mbid.clone().unwrap_or_else(|| t.artist.clone());
|
|
by_artist.entry(key).or_default().push(t);
|
|
}
|
|
|
|
let cap = if let Some(explicit) = max_tracks_per_artist {
|
|
Some((explicit as usize).max(1))
|
|
} else if popularity_bias == 0 {
|
|
None
|
|
} else {
|
|
let b = popularity_bias as f64;
|
|
let c = if b <= 5.0 {
|
|
90.0 - 12.8 * b
|
|
} else {
|
|
26.0 - 3.2 * (b - 5.0)
|
|
};
|
|
Some((c.round() as usize).max(1))
|
|
};
|
|
|
|
for group in by_artist.values_mut() {
|
|
group.sort_by(|a, b| {
|
|
b.score
|
|
.partial_cmp(&a.score)
|
|
.unwrap_or(std::cmp::Ordering::Equal)
|
|
});
|
|
if let Some(cap) = cap {
|
|
group.truncate(cap);
|
|
}
|
|
}
|
|
|
|
// Step 2: Normalize so each artist's total weight = their similarity
|
|
let similarity_map: HashMap<&str, f64> = artists
|
|
.iter()
|
|
.map(|(key, _, sim)| (key.as_str(), *sim))
|
|
.collect();
|
|
|
|
for (key, group) in &mut by_artist {
|
|
let total: f64 = group.iter().map(|t| t.score).sum();
|
|
if total > 0.0 {
|
|
let sim = similarity_map.get(key.as_str()).copied().unwrap_or(1.0);
|
|
for t in group.iter_mut() {
|
|
t.score *= sim / total;
|
|
}
|
|
}
|
|
}
|
|
|
|
let mut result: Vec<ScoredTrack> = by_artist.into_values().flatten().collect();
|
|
|
|
// Step 3: Apply global popularity weighting
|
|
if _global_popularity > 0 {
|
|
let gp = _global_popularity.min(10) as usize;
|
|
let gp_exponent = POPULARITY_EXPONENTS[gp];
|
|
let gp_strength = _global_popularity as f64 / 10.0;
|
|
|
|
// Find max playcount across ALL artists
|
|
let global_max: u64 = top_tracks_by_artist
|
|
.values()
|
|
.flat_map(|tracks| tracks.iter().map(|t| t.playcount))
|
|
.max()
|
|
.unwrap_or(1)
|
|
.max(1);
|
|
|
|
// Build a global playcount lookup (lowercase name -> max playcount)
|
|
let mut global_playcounts: HashMap<String, u64> = HashMap::new();
|
|
for tracks in top_tracks_by_artist.values() {
|
|
for t in tracks {
|
|
let key = t.name.to_lowercase();
|
|
global_playcounts
|
|
.entry(key)
|
|
.and_modify(|c| *c = (*c).max(t.playcount))
|
|
.or_insert(t.playcount);
|
|
}
|
|
}
|
|
|
|
// Apply to ALL tracks: popular ones get boosted, unknown ones get reduced.
|
|
// Factor range: unknown tracks get `1 - gp_strength` (minimum 0.01),
|
|
// top global track gets 1.0 + gp_strength (up to 2.0 at max setting).
|
|
for t in &mut result {
|
|
let playcount = t
|
|
.title
|
|
.as_ref()
|
|
.and_then(|title| global_playcounts.get(&title.to_lowercase()).copied())
|
|
.unwrap_or(0);
|
|
|
|
let global_pop = if playcount > 0 {
|
|
(playcount as f64 / global_max as f64).powf(gp_exponent)
|
|
} else {
|
|
0.0
|
|
};
|
|
// Map global_pop [0, 1] to a factor centered around 1.0:
|
|
// global_pop=0 → 1.0 - gp_strength, global_pop=1 → 1.0 + gp_strength
|
|
let factor = (1.0 + gp_strength * (2.0 * global_pop - 1.0)).max(0.01);
|
|
t.score *= factor;
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|