Hardened the lookups

This commit is contained in:
Connor Johnstone
2026-03-02 23:54:28 -05:00
parent 34977ea54b
commit 09d562fabb
7 changed files with 222 additions and 66 deletions

7
Cargo.lock generated
View File

@@ -361,6 +361,7 @@ dependencies = [
"serde", "serde",
"serde_json", "serde_json",
"ureq", "ureq",
"urlencoding",
"walkdir", "walkdir",
] ]
@@ -676,6 +677,12 @@ dependencies = [
"log", "log",
] ]
[[package]]
name = "urlencoding"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
[[package]] [[package]]
name = "utf-8" name = "utf-8"
version = "0.7.6" version = "0.7.6"

View File

@@ -13,3 +13,4 @@ ureq = "3"
rand = "0.9" rand = "0.9"
walkdir = "2.5" walkdir = "2.5"
crossterm = "0.28" crossterm = "0.28"
urlencoding = "2.1.3"

View File

@@ -37,6 +37,38 @@ def get_similar(db_path, mbid):
return rows return rows
def get_top_tracks(db_path, mbid):
conn = sqlite3.connect(db_path)
rows = conn.execute(
"SELECT t.path, tt.playcount FROM tracks t "
"JOIN top_tracks tt ON tt.artist_mbid = t.artist_mbid "
" AND (LOWER(t.title) = tt.name_lower "
" OR t.recording_mbid = tt.recording_mbid) "
"WHERE t.artist_mbid = ?1 "
"ORDER BY tt.playcount DESC",
(mbid,),
).fetchall()
conn.close()
return rows
def get_local_track_count(db_path, mbid):
conn = sqlite3.connect(db_path)
total = conn.execute(
"SELECT COUNT(*) FROM tracks WHERE artist_mbid = ?1", (mbid,)
).fetchone()[0]
matched = conn.execute(
"SELECT COUNT(*) FROM tracks t "
"JOIN top_tracks tt ON tt.artist_mbid = t.artist_mbid "
" AND (LOWER(t.title) = tt.name_lower "
" OR t.recording_mbid = tt.recording_mbid) "
"WHERE t.artist_mbid = ?1",
(mbid,),
).fetchone()[0]
conn.close()
return total, matched
def fuzzy_match(query, name): def fuzzy_match(query, name):
"""Simple fuzzy: all query chars appear in order in name.""" """Simple fuzzy: all query chars appear in order in name."""
name_lower = name.lower() name_lower = name.lower()
@@ -124,28 +156,48 @@ def run_tui(stdscr, db_path):
def show_similar(stdscr, db_path, artist): def show_similar(stdscr, db_path, artist):
mbid, name = artist mbid, name = artist
similar = get_similar(db_path, mbid) similar = get_similar(db_path, mbid)
top = get_top_tracks(db_path, mbid)
total_local, matched_local = get_local_track_count(db_path, mbid)
curses.curs_set(0) curses.curs_set(0)
stdscr.erase() stdscr.erase()
h, w = stdscr.getmaxyx() h, w = stdscr.getmaxyx()
title = f" Similar to {name}" mid = w // 2
stdscr.addnstr(0, 0, title, w, curses.color_pair(2) | curses.A_BOLD)
title_l = f" Similar to {name}"
title_r = f" Top tracks ({matched_local}/{total_local} matched)"
stdscr.addnstr(0, 0, title_l, mid, curses.color_pair(2) | curses.A_BOLD)
stdscr.addnstr(0, mid, title_r, w - mid, curses.color_pair(2) | curses.A_BOLD)
stdscr.addnstr(h - 1, 0, " [q] back", w, curses.color_pair(3)) stdscr.addnstr(h - 1, 0, " [q] back", w, curses.color_pair(3))
scroll = 0 scroll_l = 0
scroll_r = 0
list_h = h - 2 list_h = h - 2
while True: while True:
# Left pane: similar artists
for i in range(list_h): for i in range(list_h):
stdscr.move(i + 1, 0) stdscr.move(i + 1, 0)
stdscr.clrtoeol() stdscr.clrtoeol()
idx = scroll + i idx = scroll_l + i
if idx >= len(similar): if idx < len(similar):
continue
sname, score = similar[idx] sname, score = similar[idx]
line = f" {score:5.2f} {sname}" line = f" {score:5.2f} {sname}"
stdscr.addnstr(i + 1, 0, line, w) stdscr.addnstr(i + 1, 0, line, mid)
# Right pane: top tracks
for i in range(list_h):
idx = scroll_r + i
if idx < len(top):
path, playcount = top[idx]
# Show just the filename without extension
fname = Path(path).stem
# Strip "Artist - " prefix if present
if " - " in fname:
fname = fname.split(" - ", 1)[1]
line = f" {playcount:>8} {fname}"
stdscr.addnstr(i + 1, mid, line, w - mid)
stdscr.refresh() stdscr.refresh()
key = stdscr.get_wch() key = stdscr.get_wch()
@@ -153,10 +205,13 @@ def show_similar(stdscr, db_path, artist):
if key in ("q", "Q", "\x1b"): if key in ("q", "Q", "\x1b"):
return return
elif key == curses.KEY_UP or key == "\x10": elif key == curses.KEY_UP or key == "\x10":
scroll = max(0, scroll - 1) scroll_l = max(0, scroll_l - 1)
scroll_r = max(0, scroll_r - 1)
elif key == curses.KEY_DOWN or key == "\x0e": elif key == curses.KEY_DOWN or key == "\x0e":
if scroll + list_h < len(similar): if scroll_l + list_h < len(similar):
scroll += 1 scroll_l += 1
if scroll_r + list_h < len(top):
scroll_r += 1
def main(): def main():

View File

@@ -1,6 +1,6 @@
use rusqlite::Connection; use rusqlite::Connection;
use crate::lastfm::SimilarArtist; use crate::lastfm::{SimilarArtist, TopTrack};
pub fn open(path: &str) -> Result<Connection, rusqlite::Error> { pub fn open(path: &str) -> Result<Connection, rusqlite::Error> {
let conn = Connection::open(path)?; let conn = Connection::open(path)?;
@@ -19,7 +19,16 @@ pub fn open(path: &str) -> Result<Connection, rusqlite::Error> {
CREATE TABLE IF NOT EXISTS tracks ( CREATE TABLE IF NOT EXISTS tracks (
path TEXT PRIMARY KEY, path TEXT PRIMARY KEY,
artist_mbid TEXT NOT NULL REFERENCES artists(mbid), artist_mbid TEXT NOT NULL REFERENCES artists(mbid),
recording_mbid TEXT recording_mbid TEXT,
title TEXT
);
CREATE TABLE IF NOT EXISTS top_tracks (
artist_mbid TEXT NOT NULL REFERENCES artists(mbid),
recording_mbid TEXT,
name TEXT NOT NULL,
name_lower TEXT NOT NULL,
playcount INTEGER NOT NULL,
PRIMARY KEY (artist_mbid, name_lower)
);", );",
)?; )?;
Ok(conn) Ok(conn)
@@ -53,11 +62,11 @@ pub fn get_available_similar_artists(
pub fn get_local_tracks_for_artist( pub fn get_local_tracks_for_artist(
conn: &Connection, conn: &Connection,
artist_mbid: &str, artist_mbid: &str,
) -> Result<Vec<(String, Option<String>)>, rusqlite::Error> { ) -> Result<Vec<(String, Option<String>, Option<String>)>, rusqlite::Error> {
let mut stmt = conn.prepare( let mut stmt = conn.prepare(
"SELECT path, recording_mbid FROM tracks WHERE artist_mbid = ?1", "SELECT path, recording_mbid, title FROM tracks WHERE artist_mbid = ?1",
)?; )?;
let rows = stmt.query_map([artist_mbid], |row| Ok((row.get(0)?, row.get(1)?)))?; let rows = stmt.query_map([artist_mbid], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))?;
rows.collect() rows.collect()
} }
@@ -69,15 +78,46 @@ pub fn get_all_artists(conn: &Connection) -> Result<Vec<(String, String)>, rusql
rows.collect() rows.collect()
} }
pub fn insert_top_tracks(
conn: &Connection,
artist_mbid: &str,
tracks: &[TopTrack],
) -> Result<(), rusqlite::Error> {
let tx = conn.unchecked_transaction()?;
for t in tracks {
let name_lower = t.name.to_lowercase();
tx.execute(
"INSERT OR IGNORE INTO top_tracks (artist_mbid, recording_mbid, name, name_lower, playcount)
VALUES (?1, ?2, ?3, ?4, ?5)",
rusqlite::params![artist_mbid, t.mbid, t.name, name_lower, t.playcount],
)?;
}
tx.commit()
}
pub fn get_top_tracks_by_name(
conn: &Connection,
artist_mbid: &str,
) -> Result<Vec<(String, u64)>, rusqlite::Error> {
let mut stmt = conn.prepare(
"SELECT name_lower, playcount FROM top_tracks WHERE artist_mbid = ?1",
)?;
let rows = stmt.query_map([artist_mbid], |row| {
Ok((row.get(0)?, row.get::<_, i64>(1)? as u64))
})?;
rows.collect()
}
pub fn insert_track( pub fn insert_track(
conn: &Connection, conn: &Connection,
path: &str, path: &str,
artist_mbid: &str, artist_mbid: &str,
recording_mbid: Option<&str>, recording_mbid: Option<&str>,
title: Option<&str>,
) -> Result<(), rusqlite::Error> { ) -> Result<(), rusqlite::Error> {
conn.execute( conn.execute(
"INSERT OR IGNORE INTO tracks (path, artist_mbid, recording_mbid) VALUES (?1, ?2, ?3)", "INSERT OR IGNORE INTO tracks (path, artist_mbid, recording_mbid, title) VALUES (?1, ?2, ?3, ?4)",
rusqlite::params![path, artist_mbid, recording_mbid], rusqlite::params![path, artist_mbid, recording_mbid, title],
)?; )?;
Ok(()) Ok(())
} }

View File

@@ -70,20 +70,59 @@ impl LastfmClient {
Self { api_key } Self { api_key }
} }
/// Fetch a URL and return the body. Returns `None` if Last.fm returns an API error.
fn fetch_or_none(&self, url: &str) -> Result<Option<String>, Box<dyn std::error::Error>> {
let body: String = ureq::get(url).call()?.body_mut().read_to_string()?;
if serde_json::from_str::<ApiError>(&body).is_ok() {
return Ok(None);
}
Ok(Some(body))
}
/// Try fetching by MBID first, fall back to artist name.
fn fetch_with_fallback(
&self,
method: &str,
artist_mbid: &str,
artist_name: Option<&str>,
extra_params: &str,
) -> Result<Option<String>, Box<dyn std::error::Error>> {
let url = format!(
"{}?method={}&mbid={}&api_key={}{}&format=json",
BASE_URL, method, artist_mbid, self.api_key, extra_params
);
if let Some(body) = self.fetch_or_none(&url)? {
return Ok(Some(body));
}
// Fall back to artist name
if let Some(name) = artist_name {
let encoded = urlencoding::encode(name);
let url = format!(
"{}?method={}&artist={}&api_key={}{}&format=json",
BASE_URL, method, encoded, self.api_key, extra_params
);
if let Some(body) = self.fetch_or_none(&url)? {
return Ok(Some(body));
}
}
Ok(None)
}
pub fn get_similar_artists( pub fn get_similar_artists(
&self, &self,
artist_mbid: &str, artist_mbid: &str,
artist_name: Option<&str>,
) -> Result<Vec<SimilarArtist>, Box<dyn std::error::Error>> { ) -> Result<Vec<SimilarArtist>, Box<dyn std::error::Error>> {
let url = format!( let Some(body) = self.fetch_with_fallback(
"{}?method=artist.getSimilar&mbid={}&api_key={}&limit=500&format=json", "artist.getSimilar",
BASE_URL, artist_mbid, self.api_key artist_mbid,
); artist_name,
let body: String = ureq::get(&url).call()?.body_mut().read_to_string()?; "&limit=500",
)? else {
if let Ok(err) = serde_json::from_str::<ApiError>(&body) {
eprintln!(" Last.fm: {}", err.message);
return Ok(Vec::new()); return Ok(Vec::new());
} };
let resp: SimilarArtistsResponse = serde_json::from_str(&body)?; let resp: SimilarArtistsResponse = serde_json::from_str(&body)?;
Ok(resp Ok(resp
@@ -104,17 +143,16 @@ impl LastfmClient {
pub fn get_top_tracks( pub fn get_top_tracks(
&self, &self,
artist_mbid: &str, artist_mbid: &str,
artist_name: Option<&str>,
) -> Result<Vec<TopTrack>, Box<dyn std::error::Error>> { ) -> Result<Vec<TopTrack>, Box<dyn std::error::Error>> {
let url = format!( let Some(body) = self.fetch_with_fallback(
"{}?method=artist.getTopTracks&mbid={}&api_key={}&limit=1000&format=json", "artist.getTopTracks",
BASE_URL, artist_mbid, self.api_key artist_mbid,
); artist_name,
let body: String = ureq::get(&url).call()?.body_mut().read_to_string()?; "&limit=1000",
)? else {
if let Ok(err) = serde_json::from_str::<ApiError>(&body) {
eprintln!(" Last.fm: {}", err.message);
return Ok(Vec::new()); return Ok(Vec::new());
} };
let resp: TopTracksResponse = serde_json::from_str(&body)?; let resp: TopTracksResponse = serde_json::from_str(&body)?;
Ok(resp Ok(resp

View File

@@ -82,7 +82,7 @@ fn cmd_index(args: &[String]) {
println!("Indexing {display_name}..."); println!("Indexing {display_name}...");
} }
match lastfm.get_similar_artists(&artist_mbid) { match lastfm.get_similar_artists(&artist_mbid, artist_name.as_deref()) {
Ok(similar) => { Ok(similar) => {
if let Err(e) = db::insert_artist_with_similar( if let Err(e) = db::insert_artist_with_similar(
&conn, &conn,
@@ -99,12 +99,25 @@ fn cmd_index(args: &[String]) {
continue; continue;
} }
} }
match lastfm.get_top_tracks(&artist_mbid, artist_name.as_deref()) {
Ok(top_tracks) => {
if let Err(e) = db::insert_top_tracks(&conn, &artist_mbid, &top_tracks) {
eprintln!("DB error inserting top tracks for {display_name}: {e}");
}
}
Err(e) => {
eprintln!("Last.fm top tracks error for {display_name}: {e}");
}
}
} else if verbose { } else if verbose {
println!("Skipping {display_name} (already indexed)"); println!("Skipping {display_name} (already indexed)");
} }
let track_title = metadata::read_track_title(&path).ok().flatten();
let path_str = path.to_string_lossy(); let path_str = path.to_string_lossy();
if let Err(e) = db::insert_track(&conn, &path_str, &artist_mbid, recording_mbid.as_deref()) { if let Err(e) = db::insert_track(&conn, &path_str, &artist_mbid, recording_mbid.as_deref(), track_title.as_deref()) {
eprintln!("DB error inserting track {}: {e}", path.display()); eprintln!("DB error inserting track {}: {e}", path.display());
} }
} }
@@ -153,14 +166,7 @@ fn cmd_build(args: &[String]) {
} }
dotenvy::dotenv().ok(); dotenvy::dotenv().ok();
let api_key = env::var("LASTFM_API_KEY").unwrap_or_default();
if api_key.is_empty() {
eprintln!("Error: LASTFM_API_KEY not set");
std::process::exit(1);
}
let conn = db::open("playlists.db").expect("failed to open database"); let conn = db::open("playlists.db").expect("failed to open database");
let lastfm = lastfm::LastfmClient::new(api_key);
let (artist_mbid, seed_name) = if let Some(file_arg) = rest.first() { let (artist_mbid, seed_name) = if let Some(file_arg) = rest.first() {
let path = Path::new(file_arg.as_str()); let path = Path::new(file_arg.as_str());
@@ -198,12 +204,11 @@ fn cmd_build(args: &[String]) {
} }
}; };
build_playlist(&conn, &lastfm, &artist_mbid, &seed_name, count, verbose, mpd, shuffle, random); build_playlist(&conn, &artist_mbid, &seed_name, count, verbose, mpd, shuffle, random);
} }
fn build_playlist( fn build_playlist(
conn: &rusqlite::Connection, conn: &rusqlite::Connection,
lastfm: &lastfm::LastfmClient,
artist_mbid: &str, artist_mbid: &str,
seed_name: &str, seed_name: &str,
count: usize, count: usize,
@@ -242,36 +247,35 @@ fn build_playlist(
continue; continue;
} }
// Fetch top tracks from Last.fm for popularity data // Look up pre-indexed top tracks from DB
let top_tracks = match lastfm.get_top_tracks(mbid) { let top_tracks_by_name = match db::get_top_tracks_by_name(conn, mbid) {
Ok(t) => t, Ok(t) => t,
Err(e) => { Err(e) => {
eprintln!("Last.fm error for {name}: {e}"); eprintln!("DB error fetching top tracks for {name}: {e}");
Vec::new() Vec::new()
} }
}; };
// Build a map from recording_mbid -> playcount let playcount_by_name: std::collections::HashMap<String, u64> =
let mut playcount_by_mbid: std::collections::HashMap<String, u64> = top_tracks_by_name.into_iter().collect();
std::collections::HashMap::new();
for tt in &top_tracks {
if let Some(ref mbid) = tt.mbid {
playcount_by_mbid.insert(mbid.clone(), tt.playcount);
}
}
// Find max playcount for this artist to normalize let max_playcount = playcount_by_name
let max_playcount = top_tracks .values()
.iter() .copied()
.map(|t| t.playcount)
.max() .max()
.unwrap_or(1) .unwrap_or(1)
.max(1); .max(1);
for (track_path, recording_mbid) in &local_tracks { for (track_path, _recording_mbid, title) in &local_tracks {
let playcount = recording_mbid // Match by title (lowercased), fall back to recording MBID
let playcount = title
.as_ref() .as_ref()
.and_then(|rec_mbid| playcount_by_mbid.get(rec_mbid).copied()); .and_then(|t| playcount_by_name.get(&t.to_lowercase()).copied())
.or_else(|| {
_recording_mbid
.as_ref()
.and_then(|id| playcount_by_name.get(id).copied())
});
// Skip tracks not in the artist's top 1000 // Skip tracks not in the artist's top 1000
let Some(playcount) = playcount else { continue }; let Some(playcount) = playcount else { continue };

View File

@@ -57,6 +57,17 @@ pub fn read_artist_mbid(path: &Path) -> Result<Option<String>, lofty::error::Lof
Ok(tag.get_string(ItemKey::MusicBrainzArtistId).map(String::from)) Ok(tag.get_string(ItemKey::MusicBrainzArtistId).map(String::from))
} }
/// Extract the track title from a music file.
pub fn read_track_title(path: &Path) -> Result<Option<String>, lofty::error::LoftyError> {
let tagged_file = lofty::read_from_path(path)?;
let Some(tag) = tagged_file.primary_tag().or_else(|| tagged_file.first_tag()) else {
return Ok(None);
};
Ok(tag.get_string(ItemKey::TrackTitle).map(String::from))
}
/// Extract the MusicBrainz recording ID from a music file. /// Extract the MusicBrainz recording ID from a music file.
pub fn read_track_mbid(path: &Path) -> Result<Option<String>, lofty::error::LoftyError> { pub fn read_track_mbid(path: &Path) -> Result<Option<String>, lofty::error::LoftyError> {
let tagged_file = lofty::read_from_path(path)?; let tagged_file = lofty::read_from_path(path)?;