Hardened the indexing for blink-182

This commit is contained in:
Connor Johnstone
2026-03-03 13:30:51 -05:00
parent f5f0273853
commit 60a1d704dd
4 changed files with 104 additions and 26 deletions

View File

@@ -21,7 +21,9 @@ def find_db():
def load_artists(db_path):
conn = sqlite3.connect(db_path)
rows = conn.execute(
"SELECT mbid, COALESCE(name, mbid) FROM artists ORDER BY name"
"SELECT a.mbid, COALESCE(a.name, a.mbid) FROM artists a "
"LEFT JOIN tracks t ON t.artist_mbid = a.mbid "
"GROUP BY a.mbid ORDER BY COUNT(t.path) DESC, a.name"
).fetchall()
conn.close()
return rows # [(mbid, display_name), ...]

View File

@@ -72,7 +72,9 @@ pub fn get_local_tracks_for_artist(
pub fn get_all_artists(conn: &Connection) -> Result<Vec<(String, String)>, rusqlite::Error> {
let mut stmt = conn.prepare(
"SELECT mbid, COALESCE(name, mbid) FROM artists ORDER BY name",
"SELECT a.mbid, COALESCE(a.name, a.mbid) FROM artists a \
LEFT JOIN tracks t ON t.artist_mbid = a.mbid \
GROUP BY a.mbid ORDER BY COUNT(t.path) DESC, a.name",
)?;
let rows = stmt.query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?;
rows.collect()
@@ -84,6 +86,10 @@ pub fn insert_top_tracks(
tracks: &[TopTrack],
) -> Result<(), rusqlite::Error> {
let tx = conn.unchecked_transaction()?;
tx.execute(
"DELETE FROM top_tracks WHERE artist_mbid = ?1",
rusqlite::params![artist_mbid],
)?;
for t in tracks {
let name_lower = t.name.to_lowercase();
tx.execute(
@@ -130,9 +136,13 @@ pub fn insert_artist_with_similar(
) -> Result<(), rusqlite::Error> {
let tx = conn.unchecked_transaction()?;
tx.execute(
"INSERT OR IGNORE INTO artists (mbid, name) VALUES (?1, ?2)",
"INSERT OR REPLACE INTO artists (mbid, name) VALUES (?1, ?2)",
rusqlite::params![mbid, name],
)?;
tx.execute(
"DELETE FROM similar_artists WHERE artist_mbid = ?1",
rusqlite::params![mbid],
)?;
for s in similar {
tx.execute(
"INSERT OR IGNORE INTO similar_artists (artist_mbid, similar_mbid, similar_name, match_score)

View File

@@ -79,6 +79,30 @@ impl LastfmClient {
Ok(Some(body))
}
/// Normalize Unicode hyphens to ASCII and fetch by artist name.
fn fetch_by_name(
&self,
method: &str,
artist_name: Option<&str>,
extra_params: &str,
) -> Result<Option<String>, Box<dyn std::error::Error>> {
if let Some(name) = artist_name {
let name = name.replace('\u{2010}', "-")
.replace('\u{2011}', "-")
.replace('\u{2012}', "-")
.replace('\u{2013}', "-")
.replace('\u{2014}', "-")
.replace('\u{2015}', "-");
let encoded = urlencoding::encode(&name);
let url = format!(
"{}?method={}&artist={}&api_key={}{}&format=json",
BASE_URL, method, encoded, self.api_key, extra_params
);
return self.fetch_or_none(&url);
}
Ok(None)
}
/// Try fetching by MBID first, fall back to artist name.
fn fetch_with_fallback(
&self,
@@ -95,19 +119,7 @@ impl LastfmClient {
return Ok(Some(body));
}
// Fall back to artist name
if let Some(name) = artist_name {
let encoded = urlencoding::encode(name);
let url = format!(
"{}?method={}&artist={}&api_key={}{}&format=json",
BASE_URL, method, encoded, self.api_key, extra_params
);
if let Some(body) = self.fetch_or_none(&url)? {
return Ok(Some(body));
}
}
Ok(None)
self.fetch_by_name(method, artist_name, extra_params)
}
pub fn get_similar_artists(
@@ -125,7 +137,7 @@ impl LastfmClient {
};
let resp: SimilarArtistsResponse = serde_json::from_str(&body)?;
Ok(resp
let results: Vec<SimilarArtist> = resp
.similarartists
.artist
.into_iter()
@@ -137,7 +149,29 @@ impl LastfmClient {
match_score: a.match_score.parse().unwrap_or(0.0),
}
})
.collect())
.collect();
// MBID lookup can return valid but empty results; retry with name
if results.is_empty() {
if let Some(body) = self.fetch_by_name("artist.getSimilar", artist_name, "&limit=500")? {
let resp: SimilarArtistsResponse = serde_json::from_str(&body)?;
return Ok(resp
.similarartists
.artist
.into_iter()
.map(|a| {
let mbid = a.mbid.filter(|s| !s.is_empty());
SimilarArtist {
name: a.name,
mbid,
match_score: a.match_score.parse().unwrap_or(0.0),
}
})
.collect());
}
}
Ok(results)
}
pub fn get_top_tracks(
@@ -155,7 +189,7 @@ impl LastfmClient {
};
let resp: TopTracksResponse = serde_json::from_str(&body)?;
Ok(resp
let results: Vec<TopTrack> = resp
.toptracks
.track
.into_iter()
@@ -165,6 +199,25 @@ impl LastfmClient {
playcount: t.playcount.parse().unwrap_or(0),
listeners: t.listeners.parse().unwrap_or(0),
})
.collect())
.collect();
if results.is_empty() {
if let Some(body) = self.fetch_by_name("artist.getTopTracks", artist_name, "&limit=1000")? {
let resp: TopTracksResponse = serde_json::from_str(&body)?;
return Ok(resp
.toptracks
.track
.into_iter()
.map(|t| TopTrack {
name: t.name,
mbid: t.mbid.filter(|s| !s.is_empty()),
playcount: t.playcount.parse().unwrap_or(0),
listeners: t.listeners.parse().unwrap_or(0),
})
.collect());
}
}
Ok(results)
}
}

View File

@@ -26,7 +26,7 @@ fn db_path() -> PathBuf {
fn usage(program: &str) -> ! {
eprintln!("Usage:");
eprintln!(" {program} index [-v] <directory>");
eprintln!(" {program} index [-v] [-f] <directory>");
eprintln!(" {program} build [-v] [-m] [-s|-r] [-n COUNT] [file]");
std::process::exit(1);
}
@@ -47,10 +47,11 @@ fn main() {
fn cmd_index(args: &[String]) {
let verbose = args.iter().any(|a| a == "-v");
let rest: Vec<&String> = args.iter().skip(2).filter(|a| *a != "-v").collect();
let force = args.iter().any(|a| a == "-f");
let rest: Vec<&String> = args.iter().skip(2).filter(|a| *a != "-v" && *a != "-f").collect();
if rest.len() != 1 {
eprintln!("Usage: {} index [-v] <directory>", args[0]);
eprintln!("Usage: {} index [-v] [-f] <directory>", args[0]);
std::process::exit(1);
}
@@ -89,7 +90,7 @@ fn cmd_index(args: &[String]) {
let artist_name = metadata::read_artist_name(&path).ok().flatten();
let display_name = artist_name.as_deref().unwrap_or(&artist_mbid);
if !already_indexed {
if !already_indexed || force {
if verbose {
println!("Indexing {display_name}...");
}
@@ -318,7 +319,7 @@ fn build_playlist(
.map(|(total, _, _, artist, path)| (total, artist, path))
.collect();
let mut selected = generate_playlist(&candidates, count);
let mut selected = generate_playlist(&candidates, count, seed_name);
if random {
selected.shuffle(&mut rand::rng());
@@ -352,6 +353,7 @@ fn build_playlist(
fn generate_playlist(
candidates: &[(f64, String, String)],
n: usize,
seed_name: &str,
) -> Vec<(f64, String, String)> {
if candidates.is_empty() {
return Vec::new();
@@ -362,6 +364,8 @@ fn generate_playlist(
let mut result: Vec<(f64, String, String)> = Vec::new();
let mut artist_counts: HashMap<String, usize> = HashMap::new();
let seed_min = (n / 10).max(1);
let distinct_artists: usize = {
let mut seen = std::collections::HashSet::new();
for (_, artist, _) in &pool {
@@ -381,12 +385,21 @@ fn generate_playlist(
let artist_cap = ((n + divisor - 1) / divisor).max(1);
while result.len() < n && !pool.is_empty() {
let seed_count = *artist_counts.get(seed_name).unwrap_or(&0);
let remaining = n - result.len();
let seed_deficit = seed_min.saturating_sub(seed_count);
let force_seed = seed_deficit > 0 && remaining <= seed_deficit;
// Find eligible tracks (artist hasn't hit cap)
let eligible: Vec<usize> = pool
.iter()
.enumerate()
.filter(|(_, (_, artist, _))| {
if force_seed {
artist == seed_name
} else {
*artist_counts.get(artist).unwrap_or(&0) < artist_cap
}
})
.map(|(i, _)| i)
.collect();