From 70aedb49f2e0f09a6b78a056aa80af90ee452d32 Mon Sep 17 00:00:00 2001 From: Connor Johnstone Date: Wed, 4 Mar 2026 23:28:29 -0500 Subject: [PATCH] Some code cleanup --- src/lastfm.rs | 93 +++++++++--------- src/main.rs | 251 ++++++++---------------------------------------- src/metadata.rs | 79 ++++++++------- src/playlist.rs | 217 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 345 insertions(+), 295 deletions(-) create mode 100644 src/playlist.rs diff --git a/src/lastfm.rs b/src/lastfm.rs index d7203f4..1651847 100644 --- a/src/lastfm.rs +++ b/src/lastfm.rs @@ -98,6 +98,37 @@ impl LastfmClient { Ok(None) } + /// Try MBID lookup then name lookup, returning whichever yields more results. + #[allow(clippy::type_complexity)] + fn dual_lookup( + &self, + method: &str, + artist_mbid: &str, + artist_name: Option<&str>, + extra_params: &str, + parse: fn(&str) -> Result, Box>, + ) -> Result, Box> { + let mbid_url = format!( + "{}?method={}&mbid={}&api_key={}{}&format=json", + BASE_URL, method, artist_mbid, self.api_key, extra_params + ); + let mbid_results = match self.fetch_or_none(&mbid_url)? { + Some(body) => parse(&body).unwrap_or_default(), + None => Vec::new(), + }; + + let name_results = match self.fetch_by_name(method, artist_name, extra_params)? { + Some(body) => parse(&body).unwrap_or_default(), + None => Vec::new(), + }; + + if name_results.len() > mbid_results.len() { + Ok(name_results) + } else { + Ok(mbid_results) + } + } + fn parse_similar_artists(body: &str) -> Result, Box> { let resp: SimilarArtistsResponse = serde_json::from_str(body)?; Ok(resp @@ -120,30 +151,13 @@ impl LastfmClient { artist_mbid: &str, artist_name: Option<&str>, ) -> Result, Box> { - let method = "artist.getSimilar"; - let extra = "&limit=500"; - - // Try MBID lookup - let mbid_url = format!( - "{}?method={}&mbid={}&api_key={}{}&format=json", - BASE_URL, method, artist_mbid, self.api_key, extra - ); - let mbid_results = match self.fetch_or_none(&mbid_url)? { - Some(body) => Self::parse_similar_artists(&body).unwrap_or_default(), - None => Vec::new(), - }; - - // Try name lookup and return whichever has more results - let name_results = match self.fetch_by_name(method, artist_name, extra)? { - Some(body) => Self::parse_similar_artists(&body).unwrap_or_default(), - None => Vec::new(), - }; - - if name_results.len() > mbid_results.len() { - Ok(name_results) - } else { - Ok(mbid_results) - } + self.dual_lookup( + "artist.getSimilar", + artist_mbid, + artist_name, + "&limit=500", + Self::parse_similar_artists, + ) } fn parse_top_tracks(body: &str) -> Result, Box> { @@ -165,29 +179,12 @@ impl LastfmClient { artist_mbid: &str, artist_name: Option<&str>, ) -> Result, Box> { - let method = "artist.getTopTracks"; - let extra = "&limit=1000"; - - // Try MBID lookup - let mbid_url = format!( - "{}?method={}&mbid={}&api_key={}{}&format=json", - BASE_URL, method, artist_mbid, self.api_key, extra - ); - let mbid_results = match self.fetch_or_none(&mbid_url)? { - Some(body) => Self::parse_top_tracks(&body).unwrap_or_default(), - None => Vec::new(), - }; - - // Try name lookup and return whichever has more results - let name_results = match self.fetch_by_name(method, artist_name, extra)? { - Some(body) => Self::parse_top_tracks(&body).unwrap_or_default(), - None => Vec::new(), - }; - - if name_results.len() > mbid_results.len() { - Ok(name_results) - } else { - Ok(mbid_results) - } + self.dual_lookup( + "artist.getTopTracks", + artist_mbid, + artist_name, + "&limit=1000", + Self::parse_top_tracks, + ) } } diff --git a/src/main.rs b/src/main.rs index fe27b05..9f50fbb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,16 +4,17 @@ mod filesystem; mod lastfm; mod metadata; mod mpd; +mod playlist; mod tui; -use std::collections::HashMap; use std::env; use std::path::{Path, PathBuf}; use clap::{Parser, Subcommand}; -use rand::distr::weighted::WeightedIndex; use rand::prelude::*; +use playlist::Candidate; + #[derive(Parser)] #[command(name = "playlists")] struct Cli { @@ -122,16 +123,23 @@ fn cmd_index(verbose: bool, force: bool, directory: &str) { let dir = Path::new(directory); for path in filesystem::walk_music_files(dir) { - let artist_mbid = match metadata::read_artist_mbid(&path) { - Ok(Some(mbid)) => mbid, - Ok(None) => continue, + let tags = match metadata::read_tags(&path, &[ + metadata::Tag::ArtistMbid, + metadata::Tag::TrackMbid, + metadata::Tag::ArtistName, + metadata::Tag::TrackTitle, + ]) { + Ok(t) => t, Err(e) => { - eprintln!("{}: could not read artist MBID: {e}", path.display()); + eprintln!("{}: could not read tags: {e}", path.display()); continue; } }; - let recording_mbid = metadata::read_track_mbid(&path).ok().flatten(); + let Some(artist_mbid) = tags[0].clone() else { continue }; + let recording_mbid = tags[1].clone(); + let artist_name = tags[2].clone(); + let track_title = tags[3].clone(); let already_indexed = match db::artist_exists(&conn, &artist_mbid) { Ok(exists) => exists, @@ -141,7 +149,6 @@ fn cmd_index(verbose: bool, force: bool, directory: &str) { } }; - let artist_name = metadata::read_artist_name(&path).ok().flatten(); let display_name = artist_name.as_deref().unwrap_or(&artist_mbid); if !already_indexed || force { @@ -181,8 +188,6 @@ fn cmd_index(verbose: bool, force: bool, directory: &str) { println!("Skipping {display_name} (already indexed)"); } - let track_title = metadata::read_track_title(&path).ok().flatten(); - let path_str = path.to_string_lossy(); if let Err(e) = db::insert_track(&conn, &path_str, &artist_mbid, recording_mbid.as_deref(), track_title.as_deref()) { eprintln!("DB error inserting track {}: {e}", path.display()); @@ -233,10 +238,6 @@ fn cmd_build(opts: BuildOptions, file: Option<&str>) { build_playlist(&conn, &artist_mbid, &seed_name, &opts); } -const POPULARITY_EXPONENTS: [f64; 11] = [ - 0.0, 0.03, 0.08, 0.15, 0.25, 0.35, 0.55, 0.85, 1.30, 1.80, 2.50, -]; - fn build_playlist( conn: &rusqlite::Connection, artist_mbid: &str, @@ -251,97 +252,49 @@ fn build_playlist( } }; - // Seed artist + similar artists: (mbid, name, match_score) let mut artists: Vec<(String, String, f64)> = vec![ (artist_mbid.to_string(), seed_name.to_string(), 1.0), ]; artists.extend(similar); - // Collect scored tracks: (total, popularity, match_score, artist_name, path) - let mut playlist: Vec<(f64, f64, f64, String, String)> = Vec::new(); - - for (mbid, name, match_score) in &artists { - let local_tracks = match db::get_local_tracks_for_artist(conn, mbid) { - Ok(t) => t, - Err(e) => { - eprintln!("DB error for {name}: {e}"); - continue; - } - }; - - if local_tracks.is_empty() { - continue; - } - - // Look up pre-indexed top tracks from DB - let top_tracks_by_name = match db::get_top_tracks_by_name(conn, mbid) { - Ok(t) => t, - Err(e) => { - eprintln!("DB error fetching top tracks for {name}: {e}"); - Vec::new() - } - }; - - let playcount_by_name: std::collections::HashMap = - top_tracks_by_name.into_iter().collect(); - - let max_playcount = playcount_by_name - .values() - .copied() - .max() - .unwrap_or(1) - .max(1); - - for (track_path, _recording_mbid, title) in &local_tracks { - // Match by title (lowercased), fall back to recording MBID - let playcount = title - .as_ref() - .and_then(|t| playcount_by_name.get(&t.to_lowercase()).copied()) - .or_else(|| { - _recording_mbid - .as_ref() - .and_then(|id| playcount_by_name.get(id).copied()) - }); - - // Skip tracks not in the artist's top 1000 - let Some(playcount) = playcount else { continue }; - - let popularity = if playcount > 0 { - (playcount as f64 / max_playcount as f64).powf(POPULARITY_EXPONENTS[opts.popularity_bias as usize]) - } else { - 0.0 - }; - - let similarity = (match_score.exp()) / std::f64::consts::E; - let total = similarity * popularity; - playlist.push((total, popularity, similarity, name.clone(), track_path.clone())); - } - } + let scored = playlist::score_tracks(conn, &artists, opts.popularity_bias); if opts.verbose { - let mut sorted = playlist.clone(); - sorted.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal)); - for (total, popularity, similarity, artist, track_path) in &sorted { - eprintln!("{total:.4}\t{similarity:.4}\t{popularity:.4}\t{artist}\t{track_path}"); + let mut sorted = scored.iter().collect::>(); + sorted.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal)); + for t in &sorted { + eprintln!("{:.4}\t{:.4}\t{:.4}\t{}\t{}", t.score, t.similarity, t.popularity, t.artist, t.path); } } - // Convert to (score, artist, path) for playlist generation - let candidates: Vec<(f64, String, String)> = playlist + let candidates: Vec = scored .into_iter() - .map(|(total, _, _, artist, path)| (total, artist, path)) + .map(|t| Candidate { + score: t.score, + artist: t.artist, + path: t.path, + }) .collect(); - let mut selected = generate_playlist(&candidates, opts.count, seed_name); + let mut selected = playlist::generate_playlist(&candidates, opts.count, seed_name); if opts.random { selected.shuffle(&mut rand::rng()); } else if opts.shuffle { - selected = interleave_artists(selected); + selected = playlist::interleave_artists(selected); } - let tracks: Vec = selected.iter().map(|(_, _, p)| p.clone()).collect(); + let tracks: Vec = selected.into_iter().map(|c| c.path).collect(); + output_tracks(&tracks, opts, seed_name, conn); +} + +fn output_tracks( + tracks: &[String], + opts: &BuildOptions, + seed_name: &str, + conn: &rusqlite::Connection, +) { if opts.mpd { let music_dir = env::var("MPD_MUSIC_DIR").unwrap_or_default(); if music_dir.is_empty() { @@ -355,7 +308,7 @@ fn build_playlist( std::process::exit(1); } }; - client.queue_playlist(&tracks, &music_dir); + client.queue_playlist(tracks, &music_dir); } else if opts.airsonic { let client = match airsonic::AirsonicClient::new() { Ok(c) => c, @@ -364,135 +317,13 @@ fn build_playlist( std::process::exit(1); } }; - if let Err(e) = client.create_playlist(seed_name, &tracks, conn, opts.verbose) { + if let Err(e) = client.create_playlist(seed_name, tracks, conn, opts.verbose) { eprintln!("Airsonic error: {e}"); std::process::exit(1); } } else { - for track in &tracks { + for track in tracks { println!("{track}"); } } } - -fn generate_playlist( - candidates: &[(f64, String, String)], - n: usize, - seed_name: &str, -) -> Vec<(f64, String, String)> { - if candidates.is_empty() { - return Vec::new(); - } - - let mut rng = rand::rng(); - let mut pool: Vec<(f64, String, String)> = candidates.to_vec(); - let mut result: Vec<(f64, String, String)> = Vec::new(); - let mut artist_counts: HashMap = HashMap::new(); - - let seed_min = (n / 10).max(1); - - let distinct_artists: usize = { - let mut seen = std::collections::HashSet::new(); - for (_, artist, _) in &pool { - seen.insert(artist.clone()); - } - seen.len() - }; - - let divisor = match distinct_artists { - 1 => 1, - 2 => 2, - 3 => 3, - 4 => 3, - 5 => 4, - _ => 5, - }; - let artist_cap = n.div_ceil(divisor).max(1); - - while result.len() < n && !pool.is_empty() { - let seed_count = *artist_counts.get(seed_name).unwrap_or(&0); - let remaining = n - result.len(); - let seed_deficit = seed_min.saturating_sub(seed_count); - let force_seed = seed_deficit > 0 && remaining <= seed_deficit; - - // Find eligible tracks (artist hasn't hit cap) - let eligible: Vec = pool - .iter() - .enumerate() - .filter(|(_, (_, artist, _))| { - if force_seed { - artist == seed_name - } else { - *artist_counts.get(artist).unwrap_or(&0) < artist_cap - } - }) - .map(|(i, _)| i) - .collect(); - - // If no eligible tracks, relax and use all remaining - let indices: &[usize] = if eligible.is_empty() { - &(0..pool.len()).collect::>() - } else { - &eligible - }; - - let weights: Vec = indices.iter().map(|&i| pool[i].0.max(0.001)).collect(); - let dist = match WeightedIndex::new(&weights) { - Ok(d) => d, - Err(_) => break, - }; - - let picked = indices[dist.sample(&mut rng)]; - let track = pool.remove(picked); - *artist_counts.entry(track.1.clone()).or_insert(0) += 1; - result.push(track); - } - - result -} - -/// Reorder tracks so that artists are evenly spread out. -/// Greedily picks from the artist with the most remaining tracks, -/// avoiding back-to-back repeats when possible. -fn interleave_artists(tracks: Vec<(f64, String, String)>) -> Vec<(f64, String, String)> { - use std::collections::BTreeMap; - - let mut rng = rand::rng(); - - // Group by artist, shuffling within each group - let mut by_artist: BTreeMap> = BTreeMap::new(); - for track in tracks { - by_artist.entry(track.1.clone()).or_default().push(track); - } - for group in by_artist.values_mut() { - group.shuffle(&mut rng); - } - - let mut result = Vec::new(); - let mut last_artist: Option = None; - - while !by_artist.is_empty() { - // Sort artists by remaining count (descending), break ties randomly - let mut artists: Vec = by_artist.keys().cloned().collect(); - artists.sort_by(|a, b| by_artist[b].len().cmp(&by_artist[a].len())); - - // Pick the first artist that isn't the same as the last one - let pick = artists - .iter() - .find(|a| last_artist.as_ref() != Some(a)) - .or(artists.first()) - .cloned() - .unwrap(); - - let group = by_artist.get_mut(&pick).unwrap(); - let track = group.pop().unwrap(); - if group.is_empty() { - by_artist.remove(&pick); - } - - last_artist = Some(pick); - result.push(track); - } - - result -} diff --git a/src/metadata.rs b/src/metadata.rs index 1a8ca3f..23ae3d2 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -3,46 +3,51 @@ use std::path::Path; use lofty::file::TaggedFileExt; use lofty::tag::ItemKey; -/// Extract the artist name from a music file. +/// Tags that can be read from a music file. +pub enum Tag { + ArtistName, + ArtistMbid, + TrackTitle, + TrackMbid, +} + +impl Tag { + fn item_key(&self) -> ItemKey { + match self { + Tag::ArtistName => ItemKey::TrackArtist, + Tag::ArtistMbid => ItemKey::MusicBrainzArtistId, + Tag::TrackTitle => ItemKey::TrackTitle, + Tag::TrackMbid => ItemKey::MusicBrainzRecordingId, + } + } +} + +fn read_tag(path: &Path, key: ItemKey) -> Result, lofty::error::LoftyError> { + let tagged_file = lofty::read_from_path(path)?; + let Some(tag) = tagged_file.primary_tag().or_else(|| tagged_file.first_tag()) else { + return Ok(None); + }; + Ok(tag.get_string(key).map(String::from)) +} + +/// Read multiple tags from a music file in a single file open. +/// Returns a Vec in the same order as the input keys. +pub fn read_tags(path: &Path, keys: &[Tag]) -> Result>, lofty::error::LoftyError> { + let tagged_file = lofty::read_from_path(path)?; + let Some(tag) = tagged_file.primary_tag().or_else(|| tagged_file.first_tag()) else { + return Ok(vec![None; keys.len()]); + }; + Ok(keys + .iter() + .map(|k| tag.get_string(k.item_key()).map(String::from)) + .collect()) +} + pub fn read_artist_name(path: &Path) -> Result, lofty::error::LoftyError> { - let tagged_file = lofty::read_from_path(path)?; - - let Some(tag) = tagged_file.primary_tag().or_else(|| tagged_file.first_tag()) else { - return Ok(None); - }; - - Ok(tag.get_string(ItemKey::TrackArtist).map(String::from)) + read_tag(path, ItemKey::TrackArtist) } -/// Extract the MusicBrainz artist ID from a music file. pub fn read_artist_mbid(path: &Path) -> Result, lofty::error::LoftyError> { - let tagged_file = lofty::read_from_path(path)?; - - let Some(tag) = tagged_file.primary_tag().or_else(|| tagged_file.first_tag()) else { - return Ok(None); - }; - - Ok(tag.get_string(ItemKey::MusicBrainzArtistId).map(String::from)) + read_tag(path, ItemKey::MusicBrainzArtistId) } -/// Extract the track title from a music file. -pub fn read_track_title(path: &Path) -> Result, lofty::error::LoftyError> { - let tagged_file = lofty::read_from_path(path)?; - - let Some(tag) = tagged_file.primary_tag().or_else(|| tagged_file.first_tag()) else { - return Ok(None); - }; - - Ok(tag.get_string(ItemKey::TrackTitle).map(String::from)) -} - -/// Extract the MusicBrainz recording ID from a music file. -pub fn read_track_mbid(path: &Path) -> Result, lofty::error::LoftyError> { - let tagged_file = lofty::read_from_path(path)?; - - let Some(tag) = tagged_file.primary_tag().or_else(|| tagged_file.first_tag()) else { - return Ok(None); - }; - - Ok(tag.get_string(ItemKey::MusicBrainzRecordingId).map(String::from)) -} diff --git a/src/playlist.rs b/src/playlist.rs new file mode 100644 index 0000000..da5bf67 --- /dev/null +++ b/src/playlist.rs @@ -0,0 +1,217 @@ +use std::collections::{BTreeMap, HashMap, HashSet}; + +use rand::distr::weighted::WeightedIndex; +use rand::prelude::*; + +use crate::db; + +const POPULARITY_EXPONENTS: [f64; 11] = [ + 0.0, 0.03, 0.08, 0.15, 0.25, 0.35, 0.55, 0.85, 1.30, 1.80, 2.50, +]; + +/// A track with its computed scores, used for verbose output and candidate conversion. +pub struct ScoredTrack { + pub path: String, + pub artist: String, + pub score: f64, + pub popularity: f64, + pub similarity: f64, +} + +/// A weighted candidate for playlist selection. +pub struct Candidate { + pub score: f64, + pub artist: String, + pub path: String, +} + +/// Score all tracks for the given artists, returning scored tracks for ranking. +pub fn score_tracks( + conn: &rusqlite::Connection, + artists: &[(String, String, f64)], + popularity_bias: u8, +) -> Vec { + let mut scored = Vec::new(); + + for (mbid, name, match_score) in artists { + let local_tracks = match db::get_local_tracks_for_artist(conn, mbid) { + Ok(t) => t, + Err(e) => { + eprintln!("DB error for {name}: {e}"); + continue; + } + }; + + if local_tracks.is_empty() { + continue; + } + + let top_tracks_by_name = match db::get_top_tracks_by_name(conn, mbid) { + Ok(t) => t, + Err(e) => { + eprintln!("DB error fetching top tracks for {name}: {e}"); + Vec::new() + } + }; + + let playcount_by_name: HashMap = top_tracks_by_name.into_iter().collect(); + + let max_playcount = playcount_by_name + .values() + .copied() + .max() + .unwrap_or(1) + .max(1); + + for (track_path, recording_mbid, title) in &local_tracks { + let playcount = title + .as_ref() + .and_then(|t| playcount_by_name.get(&t.to_lowercase()).copied()) + .or_else(|| { + recording_mbid + .as_ref() + .and_then(|id| playcount_by_name.get(id).copied()) + }); + + let Some(playcount) = playcount else { continue }; + + let popularity = if playcount > 0 { + (playcount as f64 / max_playcount as f64) + .powf(POPULARITY_EXPONENTS[popularity_bias as usize]) + } else { + 0.0 + }; + + let similarity = (match_score.exp()) / std::f64::consts::E; + let score = similarity * popularity; + scored.push(ScoredTrack { + path: track_path.clone(), + artist: name.clone(), + score, + popularity, + similarity, + }); + } + } + + scored +} + +pub fn generate_playlist( + candidates: &[Candidate], + n: usize, + seed_name: &str, +) -> Vec { + if candidates.is_empty() { + return Vec::new(); + } + + let mut rng = rand::rng(); + let mut pool: Vec<&Candidate> = candidates.iter().collect(); + let mut result: Vec = Vec::new(); + let mut artist_counts: HashMap = HashMap::new(); + + let seed_min = (n / 10).max(1); + + let distinct_artists: usize = { + let mut seen = HashSet::new(); + for c in &pool { + seen.insert(&c.artist); + } + seen.len() + }; + + let divisor = match distinct_artists { + 1 => 1, + 2 => 2, + 3 => 3, + 4 => 3, + 5 => 4, + _ => 5, + }; + let artist_cap = n.div_ceil(divisor).max(1); + + while result.len() < n && !pool.is_empty() { + let seed_count = *artist_counts.get(seed_name).unwrap_or(&0); + let remaining = n - result.len(); + let seed_deficit = seed_min.saturating_sub(seed_count); + let force_seed = seed_deficit > 0 && remaining <= seed_deficit; + + let eligible: Vec = pool + .iter() + .enumerate() + .filter(|(_, c)| { + if force_seed { + c.artist == seed_name + } else { + *artist_counts.get(&c.artist).unwrap_or(&0) < artist_cap + } + }) + .map(|(i, _)| i) + .collect(); + + let indices: &[usize] = if eligible.is_empty() { + &(0..pool.len()).collect::>() + } else { + &eligible + }; + + let weights: Vec = indices.iter().map(|&i| pool[i].score.max(0.001)).collect(); + let dist = match WeightedIndex::new(&weights) { + Ok(d) => d, + Err(_) => break, + }; + + let picked = indices[dist.sample(&mut rng)]; + let track = pool.remove(picked); + *artist_counts.entry(track.artist.clone()).or_insert(0) += 1; + result.push(Candidate { + score: track.score, + artist: track.artist.clone(), + path: track.path.clone(), + }); + } + + result +} + +/// Reorder tracks so that artists are evenly spread out. +/// Greedily picks from the artist with the most remaining tracks, +/// avoiding back-to-back repeats when possible. +pub fn interleave_artists(tracks: Vec) -> Vec { + let mut rng = rand::rng(); + + let mut by_artist: BTreeMap> = BTreeMap::new(); + for track in tracks { + by_artist.entry(track.artist.clone()).or_default().push(track); + } + for group in by_artist.values_mut() { + group.shuffle(&mut rng); + } + + let mut result = Vec::new(); + let mut last_artist: Option = None; + + while !by_artist.is_empty() { + let mut artists: Vec = by_artist.keys().cloned().collect(); + artists.sort_by(|a, b| by_artist[b].len().cmp(&by_artist[a].len())); + + let pick = artists + .iter() + .find(|a| last_artist.as_ref() != Some(a)) + .or(artists.first()) + .cloned() + .unwrap(); + + let group = by_artist.get_mut(&pick).unwrap(); + let track = group.pop().unwrap(); + if group.is_empty() { + by_artist.remove(&pick); + } + + last_artist = Some(pick); + result.push(track); + } + + result +}