Hardened the indexing for blink-182

This commit is contained in:
Connor Johnstone
2026-03-03 13:30:51 -05:00
parent f5f0273853
commit 60a1d704dd
4 changed files with 104 additions and 26 deletions

View File

@@ -21,7 +21,9 @@ def find_db():
def load_artists(db_path): def load_artists(db_path):
conn = sqlite3.connect(db_path) conn = sqlite3.connect(db_path)
rows = conn.execute( rows = conn.execute(
"SELECT mbid, COALESCE(name, mbid) FROM artists ORDER BY name" "SELECT a.mbid, COALESCE(a.name, a.mbid) FROM artists a "
"LEFT JOIN tracks t ON t.artist_mbid = a.mbid "
"GROUP BY a.mbid ORDER BY COUNT(t.path) DESC, a.name"
).fetchall() ).fetchall()
conn.close() conn.close()
return rows # [(mbid, display_name), ...] return rows # [(mbid, display_name), ...]

View File

@@ -72,7 +72,9 @@ pub fn get_local_tracks_for_artist(
pub fn get_all_artists(conn: &Connection) -> Result<Vec<(String, String)>, rusqlite::Error> { pub fn get_all_artists(conn: &Connection) -> Result<Vec<(String, String)>, rusqlite::Error> {
let mut stmt = conn.prepare( let mut stmt = conn.prepare(
"SELECT mbid, COALESCE(name, mbid) FROM artists ORDER BY name", "SELECT a.mbid, COALESCE(a.name, a.mbid) FROM artists a \
LEFT JOIN tracks t ON t.artist_mbid = a.mbid \
GROUP BY a.mbid ORDER BY COUNT(t.path) DESC, a.name",
)?; )?;
let rows = stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?; let rows = stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?;
rows.collect() rows.collect()
@@ -84,6 +86,10 @@ pub fn insert_top_tracks(
tracks: &[TopTrack], tracks: &[TopTrack],
) -> Result<(), rusqlite::Error> { ) -> Result<(), rusqlite::Error> {
let tx = conn.unchecked_transaction()?; let tx = conn.unchecked_transaction()?;
tx.execute(
"DELETE FROM top_tracks WHERE artist_mbid = ?1",
rusqlite::params![artist_mbid],
)?;
for t in tracks { for t in tracks {
let name_lower = t.name.to_lowercase(); let name_lower = t.name.to_lowercase();
tx.execute( tx.execute(
@@ -130,9 +136,13 @@ pub fn insert_artist_with_similar(
) -> Result<(), rusqlite::Error> { ) -> Result<(), rusqlite::Error> {
let tx = conn.unchecked_transaction()?; let tx = conn.unchecked_transaction()?;
tx.execute( tx.execute(
"INSERT OR IGNORE INTO artists (mbid, name) VALUES (?1, ?2)", "INSERT OR REPLACE INTO artists (mbid, name) VALUES (?1, ?2)",
rusqlite::params![mbid, name], rusqlite::params![mbid, name],
)?; )?;
tx.execute(
"DELETE FROM similar_artists WHERE artist_mbid = ?1",
rusqlite::params![mbid],
)?;
for s in similar { for s in similar {
tx.execute( tx.execute(
"INSERT OR IGNORE INTO similar_artists (artist_mbid, similar_mbid, similar_name, match_score) "INSERT OR IGNORE INTO similar_artists (artist_mbid, similar_mbid, similar_name, match_score)

View File

@@ -79,6 +79,30 @@ impl LastfmClient {
Ok(Some(body)) Ok(Some(body))
} }
/// Normalize Unicode hyphens to ASCII and fetch by artist name.
fn fetch_by_name(
&self,
method: &str,
artist_name: Option<&str>,
extra_params: &str,
) -> Result<Option<String>, Box<dyn std::error::Error>> {
if let Some(name) = artist_name {
let name = name.replace('\u{2010}', "-")
.replace('\u{2011}', "-")
.replace('\u{2012}', "-")
.replace('\u{2013}', "-")
.replace('\u{2014}', "-")
.replace('\u{2015}', "-");
let encoded = urlencoding::encode(&name);
let url = format!(
"{}?method={}&artist={}&api_key={}{}&format=json",
BASE_URL, method, encoded, self.api_key, extra_params
);
return self.fetch_or_none(&url);
}
Ok(None)
}
/// Try fetching by MBID first, fall back to artist name. /// Try fetching by MBID first, fall back to artist name.
fn fetch_with_fallback( fn fetch_with_fallback(
&self, &self,
@@ -95,19 +119,7 @@ impl LastfmClient {
return Ok(Some(body)); return Ok(Some(body));
} }
// Fall back to artist name self.fetch_by_name(method, artist_name, extra_params)
if let Some(name) = artist_name {
let encoded = urlencoding::encode(name);
let url = format!(
"{}?method={}&artist={}&api_key={}{}&format=json",
BASE_URL, method, encoded, self.api_key, extra_params
);
if let Some(body) = self.fetch_or_none(&url)? {
return Ok(Some(body));
}
}
Ok(None)
} }
pub fn get_similar_artists( pub fn get_similar_artists(
@@ -125,7 +137,7 @@ impl LastfmClient {
}; };
let resp: SimilarArtistsResponse = serde_json::from_str(&body)?; let resp: SimilarArtistsResponse = serde_json::from_str(&body)?;
Ok(resp let results: Vec<SimilarArtist> = resp
.similarartists .similarartists
.artist .artist
.into_iter() .into_iter()
@@ -137,7 +149,29 @@ impl LastfmClient {
match_score: a.match_score.parse().unwrap_or(0.0), match_score: a.match_score.parse().unwrap_or(0.0),
} }
}) })
.collect()) .collect();
// MBID lookup can return valid but empty results; retry with name
if results.is_empty() {
if let Some(body) = self.fetch_by_name("artist.getSimilar", artist_name, "&limit=500")? {
let resp: SimilarArtistsResponse = serde_json::from_str(&body)?;
return Ok(resp
.similarartists
.artist
.into_iter()
.map(|a| {
let mbid = a.mbid.filter(|s| !s.is_empty());
SimilarArtist {
name: a.name,
mbid,
match_score: a.match_score.parse().unwrap_or(0.0),
}
})
.collect());
}
}
Ok(results)
} }
pub fn get_top_tracks( pub fn get_top_tracks(
@@ -155,7 +189,7 @@ impl LastfmClient {
}; };
let resp: TopTracksResponse = serde_json::from_str(&body)?; let resp: TopTracksResponse = serde_json::from_str(&body)?;
Ok(resp let results: Vec<TopTrack> = resp
.toptracks .toptracks
.track .track
.into_iter() .into_iter()
@@ -165,6 +199,25 @@ impl LastfmClient {
playcount: t.playcount.parse().unwrap_or(0), playcount: t.playcount.parse().unwrap_or(0),
listeners: t.listeners.parse().unwrap_or(0), listeners: t.listeners.parse().unwrap_or(0),
}) })
.collect()) .collect();
if results.is_empty() {
if let Some(body) = self.fetch_by_name("artist.getTopTracks", artist_name, "&limit=1000")? {
let resp: TopTracksResponse = serde_json::from_str(&body)?;
return Ok(resp
.toptracks
.track
.into_iter()
.map(|t| TopTrack {
name: t.name,
mbid: t.mbid.filter(|s| !s.is_empty()),
playcount: t.playcount.parse().unwrap_or(0),
listeners: t.listeners.parse().unwrap_or(0),
})
.collect());
}
}
Ok(results)
} }
} }

View File

@@ -26,7 +26,7 @@ fn db_path() -> PathBuf {
fn usage(program: &str) -> ! { fn usage(program: &str) -> ! {
eprintln!("Usage:"); eprintln!("Usage:");
eprintln!(" {program} index [-v] <directory>"); eprintln!(" {program} index [-v] [-f] <directory>");
eprintln!(" {program} build [-v] [-m] [-s|-r] [-n COUNT] [file]"); eprintln!(" {program} build [-v] [-m] [-s|-r] [-n COUNT] [file]");
std::process::exit(1); std::process::exit(1);
} }
@@ -47,10 +47,11 @@ fn main() {
fn cmd_index(args: &[String]) { fn cmd_index(args: &[String]) {
let verbose = args.iter().any(|a| a == "-v"); let verbose = args.iter().any(|a| a == "-v");
let rest: Vec<&String> = args.iter().skip(2).filter(|a| *a != "-v").collect(); let force = args.iter().any(|a| a == "-f");
let rest: Vec<&String> = args.iter().skip(2).filter(|a| *a != "-v" && *a != "-f").collect();
if rest.len() != 1 { if rest.len() != 1 {
eprintln!("Usage: {} index [-v] <directory>", args[0]); eprintln!("Usage: {} index [-v] [-f] <directory>", args[0]);
std::process::exit(1); std::process::exit(1);
} }
@@ -89,7 +90,7 @@ fn cmd_index(args: &[String]) {
let artist_name = metadata::read_artist_name(&path).ok().flatten(); let artist_name = metadata::read_artist_name(&path).ok().flatten();
let display_name = artist_name.as_deref().unwrap_or(&artist_mbid); let display_name = artist_name.as_deref().unwrap_or(&artist_mbid);
if !already_indexed { if !already_indexed || force {
if verbose { if verbose {
println!("Indexing {display_name}..."); println!("Indexing {display_name}...");
} }
@@ -318,7 +319,7 @@ fn build_playlist(
.map(|(total, _, _, artist, path)| (total, artist, path)) .map(|(total, _, _, artist, path)| (total, artist, path))
.collect(); .collect();
let mut selected = generate_playlist(&candidates, count); let mut selected = generate_playlist(&candidates, count, seed_name);
if random { if random {
selected.shuffle(&mut rand::rng()); selected.shuffle(&mut rand::rng());
@@ -352,6 +353,7 @@ fn build_playlist(
fn generate_playlist( fn generate_playlist(
candidates: &[(f64, String, String)], candidates: &[(f64, String, String)],
n: usize, n: usize,
seed_name: &str,
) -> Vec<(f64, String, String)> { ) -> Vec<(f64, String, String)> {
if candidates.is_empty() { if candidates.is_empty() {
return Vec::new(); return Vec::new();
@@ -362,6 +364,8 @@ fn generate_playlist(
let mut result: Vec<(f64, String, String)> = Vec::new(); let mut result: Vec<(f64, String, String)> = Vec::new();
let mut artist_counts: HashMap<String, usize> = HashMap::new(); let mut artist_counts: HashMap<String, usize> = HashMap::new();
let seed_min = (n / 10).max(1);
let distinct_artists: usize = { let distinct_artists: usize = {
let mut seen = std::collections::HashSet::new(); let mut seen = std::collections::HashSet::new();
for (_, artist, _) in &pool { for (_, artist, _) in &pool {
@@ -381,12 +385,21 @@ fn generate_playlist(
let artist_cap = ((n + divisor - 1) / divisor).max(1); let artist_cap = ((n + divisor - 1) / divisor).max(1);
while result.len() < n && !pool.is_empty() { while result.len() < n && !pool.is_empty() {
let seed_count = *artist_counts.get(seed_name).unwrap_or(&0);
let remaining = n - result.len();
let seed_deficit = seed_min.saturating_sub(seed_count);
let force_seed = seed_deficit > 0 && remaining <= seed_deficit;
// Find eligible tracks (artist hasn't hit cap) // Find eligible tracks (artist hasn't hit cap)
let eligible: Vec<usize> = pool let eligible: Vec<usize> = pool
.iter() .iter()
.enumerate() .enumerate()
.filter(|(_, (_, artist, _))| { .filter(|(_, (_, artist, _))| {
if force_seed {
artist == seed_name
} else {
*artist_counts.get(artist).unwrap_or(&0) < artist_cap *artist_counts.get(artist).unwrap_or(&0) < artist_cap
}
}) })
.map(|(i, _)| i) .map(|(i, _)| i)
.collect(); .collect();