use std::collections::HashMap; use shanty_data::PopularTrack; use shanty_db::entities::track::Model as Track; use crate::types::ScoredTrack; /// Popularity exponent curve (0-10 scale). /// 0 = no preference, 10 = heavy popular bias. const POPULARITY_EXPONENTS: [f64; 11] = [ 0.0, 0.06, 0.17, 0.33, 0.67, 1.30, 1.50, 1.70, 1.94, 2.22, 2.50, ]; /// Score all tracks for the given artists, returning scored tracks for ranking. /// /// `artists` is a list of (mbid_or_name, display_name, similarity_score) tuples. /// `tracks_by_artist` maps artist identifier -> their local tracks. /// `top_tracks_by_artist` maps artist identifier -> their Last.fm top tracks. pub fn score_tracks( artists: &[(String, String, f64)], tracks_by_artist: &HashMap>, top_tracks_by_artist: &HashMap>, popularity_bias: u8, _global_popularity: u8, max_tracks_per_artist: Option, ) -> Vec { let bias = popularity_bias.min(10) as usize; let mut scored = Vec::new(); for (artist_key, name, match_score) in artists { let local_tracks = match tracks_by_artist.get(artist_key) { Some(t) if !t.is_empty() => t, _ => continue, }; let top_tracks = top_tracks_by_artist .get(artist_key) .cloned() .unwrap_or_default(); // Build playcount lookups by lowercase name and by MBID let playcount_by_name: HashMap = top_tracks .iter() .map(|t| (t.name.to_lowercase(), t.playcount)) .collect(); let playcount_by_mbid: HashMap = top_tracks .iter() .filter_map(|t| t.mbid.as_ref().map(|m| (m.clone(), t.playcount))) .collect(); let max_playcount = playcount_by_name .values() .copied() .max() .unwrap_or(1) .max(1); for track in local_tracks { let title_lower = track.title.as_ref().map(|t| t.to_lowercase()); // Match by: exact title, MBID, and prefix — take the MAXIMUM playcount // across all methods so a popular base track isn't hidden by a less // popular variant that happens to match exactly. let mut best_playcount: Option = None; let mut consider = |pc: u64| { best_playcount = Some(best_playcount.map_or(pc, |cur: u64| cur.max(pc))); }; // Exact title match if let Some(pc) = title_lower .as_ref() .and_then(|t| playcount_by_name.get(t).copied()) { consider(pc); } // MBID match if let Some(pc) = track .musicbrainz_id .as_ref() .and_then(|id| playcount_by_mbid.get(id).copied()) { consider(pc); } // Prefix match: local title starts with a top track name, or vice versa if let Some(local) = title_lower.as_ref() && let Some((_, &pc)) = playcount_by_name .iter() .filter(|(top_name, _)| { local.starts_with(top_name.as_str()) || top_name.starts_with(local.as_str()) }) .max_by_key(|&(_, &pc)| pc) { consider(pc); } let playcount = best_playcount; // If we have popularity data, use it; unmatched tracks get a low base score let (popularity, similarity, score) = if !playcount_by_name.is_empty() { let playcount = playcount.unwrap_or(0); let popularity = if playcount > 0 { (playcount as f64 / max_playcount as f64).powf(POPULARITY_EXPONENTS[bias]) } else { // Unmatched track: small base score so it can still appear 0.01 }; let similarity = (match_score.exp()) / std::f64::consts::E; let score = similarity * popularity; (popularity, similarity, score) } else { // No top tracks data — use uniform scoring based on similarity only let similarity = (match_score.exp()) / std::f64::consts::E; (1.0, similarity, similarity) }; scored.push(ScoredTrack { track_id: track.id, file_path: track.file_path.clone(), title: track.title.clone(), artist: name.clone(), artist_mbid: track .artist_id .map(|_| artist_key.clone()) .or_else(|| Some(artist_key.clone())), album: track.album.clone(), duration: track.duration, score, popularity, similarity, }); } } // Step 1: Cap tracks per artist based on popularity bias let mut by_artist: HashMap> = HashMap::new(); for t in scored { let key = t.artist_mbid.clone().unwrap_or_else(|| t.artist.clone()); by_artist.entry(key).or_default().push(t); } let cap = if let Some(explicit) = max_tracks_per_artist { Some((explicit as usize).max(1)) } else if popularity_bias == 0 { None } else { let b = popularity_bias as f64; let c = if b <= 5.0 { 90.0 - 12.8 * b } else { 26.0 - 3.2 * (b - 5.0) }; Some((c.round() as usize).max(1)) }; for group in by_artist.values_mut() { group.sort_by(|a, b| { b.score .partial_cmp(&a.score) .unwrap_or(std::cmp::Ordering::Equal) }); if let Some(cap) = cap { group.truncate(cap); } } // Step 2: Normalize so each artist's total weight = their similarity let similarity_map: HashMap<&str, f64> = artists .iter() .map(|(key, _, sim)| (key.as_str(), *sim)) .collect(); for (key, group) in &mut by_artist { let total: f64 = group.iter().map(|t| t.score).sum(); if total > 0.0 { let sim = similarity_map.get(key.as_str()).copied().unwrap_or(1.0); for t in group.iter_mut() { t.score *= sim / total; } } } let mut result: Vec = by_artist.into_values().flatten().collect(); // Step 3: Apply global popularity weighting if _global_popularity > 0 { let gp = _global_popularity.min(10) as usize; let gp_exponent = POPULARITY_EXPONENTS[gp]; let gp_strength = _global_popularity as f64 / 10.0; // Find max playcount across ALL artists let global_max: u64 = top_tracks_by_artist .values() .flat_map(|tracks| tracks.iter().map(|t| t.playcount)) .max() .unwrap_or(1) .max(1); // Build a global playcount lookup (lowercase name -> max playcount) let mut global_playcounts: HashMap = HashMap::new(); for tracks in top_tracks_by_artist.values() { for t in tracks { let key = t.name.to_lowercase(); global_playcounts .entry(key) .and_modify(|c| *c = (*c).max(t.playcount)) .or_insert(t.playcount); } } // Apply to ALL tracks: popular ones get boosted, unknown ones get reduced. // Factor range: unknown tracks get `1 - gp_strength` (minimum 0.01), // top global track gets 1.0 + gp_strength (up to 2.0 at max setting). for t in &mut result { let playcount = t .title .as_ref() .and_then(|title| global_playcounts.get(&title.to_lowercase()).copied()) .unwrap_or(0); let global_pop = if playcount > 0 { (playcount as f64 / global_max as f64).powf(gp_exponent) } else { 0.0 }; // Map global_pop [0, 1] to a factor centered around 1.0: // global_pop=0 → 1.0 - gp_strength, global_pop=1 → 1.0 + gp_strength let factor = (1.0 + gp_strength * (2.0 * global_pop - 1.0)).max(0.01); t.score *= factor; } } result }