Hardened the lookups

This commit is contained in:
Connor Johnstone
2026-03-02 23:54:28 -05:00
parent 34977ea54b
commit 09d562fabb
7 changed files with 222 additions and 66 deletions

7
Cargo.lock generated
View File

@@ -361,6 +361,7 @@ dependencies = [
"serde",
"serde_json",
"ureq",
"urlencoding",
"walkdir",
]
@@ -676,6 +677,12 @@ dependencies = [
"log",
]
[[package]]
name = "urlencoding"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
[[package]]
name = "utf-8"
version = "0.7.6"

View File

@@ -13,3 +13,4 @@ ureq = "3"
rand = "0.9"
walkdir = "2.5"
crossterm = "0.28"
urlencoding = "2.1.3"

View File

@@ -37,6 +37,38 @@ def get_similar(db_path, mbid):
return rows
def get_top_tracks(db_path, mbid):
conn = sqlite3.connect(db_path)
rows = conn.execute(
"SELECT t.path, tt.playcount FROM tracks t "
"JOIN top_tracks tt ON tt.artist_mbid = t.artist_mbid "
" AND (LOWER(t.title) = tt.name_lower "
" OR t.recording_mbid = tt.recording_mbid) "
"WHERE t.artist_mbid = ?1 "
"ORDER BY tt.playcount DESC",
(mbid,),
).fetchall()
conn.close()
return rows
def get_local_track_count(db_path, mbid):
conn = sqlite3.connect(db_path)
total = conn.execute(
"SELECT COUNT(*) FROM tracks WHERE artist_mbid = ?1", (mbid,)
).fetchone()[0]
matched = conn.execute(
"SELECT COUNT(*) FROM tracks t "
"JOIN top_tracks tt ON tt.artist_mbid = t.artist_mbid "
" AND (LOWER(t.title) = tt.name_lower "
" OR t.recording_mbid = tt.recording_mbid) "
"WHERE t.artist_mbid = ?1",
(mbid,),
).fetchone()[0]
conn.close()
return total, matched
def fuzzy_match(query, name):
"""Simple fuzzy: all query chars appear in order in name."""
name_lower = name.lower()
@@ -124,28 +156,48 @@ def run_tui(stdscr, db_path):
def show_similar(stdscr, db_path, artist):
mbid, name = artist
similar = get_similar(db_path, mbid)
top = get_top_tracks(db_path, mbid)
total_local, matched_local = get_local_track_count(db_path, mbid)
curses.curs_set(0)
stdscr.erase()
h, w = stdscr.getmaxyx()
title = f" Similar to {name}"
stdscr.addnstr(0, 0, title, w, curses.color_pair(2) | curses.A_BOLD)
mid = w // 2
title_l = f" Similar to {name}"
title_r = f" Top tracks ({matched_local}/{total_local} matched)"
stdscr.addnstr(0, 0, title_l, mid, curses.color_pair(2) | curses.A_BOLD)
stdscr.addnstr(0, mid, title_r, w - mid, curses.color_pair(2) | curses.A_BOLD)
stdscr.addnstr(h - 1, 0, " [q] back", w, curses.color_pair(3))
scroll = 0
scroll_l = 0
scroll_r = 0
list_h = h - 2
while True:
# Left pane: similar artists
for i in range(list_h):
stdscr.move(i + 1, 0)
stdscr.clrtoeol()
idx = scroll + i
if idx >= len(similar):
continue
sname, score = similar[idx]
line = f" {score:5.2f} {sname}"
stdscr.addnstr(i + 1, 0, line, w)
idx = scroll_l + i
if idx < len(similar):
sname, score = similar[idx]
line = f" {score:5.2f} {sname}"
stdscr.addnstr(i + 1, 0, line, mid)
# Right pane: top tracks
for i in range(list_h):
idx = scroll_r + i
if idx < len(top):
path, playcount = top[idx]
# Show just the filename without extension
fname = Path(path).stem
# Strip "Artist - " prefix if present
if " - " in fname:
fname = fname.split(" - ", 1)[1]
line = f" {playcount:>8} {fname}"
stdscr.addnstr(i + 1, mid, line, w - mid)
stdscr.refresh()
key = stdscr.get_wch()
@@ -153,10 +205,13 @@ def show_similar(stdscr, db_path, artist):
if key in ("q", "Q", "\x1b"):
return
elif key == curses.KEY_UP or key == "\x10":
scroll = max(0, scroll - 1)
scroll_l = max(0, scroll_l - 1)
scroll_r = max(0, scroll_r - 1)
elif key == curses.KEY_DOWN or key == "\x0e":
if scroll + list_h < len(similar):
scroll += 1
if scroll_l + list_h < len(similar):
scroll_l += 1
if scroll_r + list_h < len(top):
scroll_r += 1
def main():

View File

@@ -1,6 +1,6 @@
use rusqlite::Connection;
use crate::lastfm::SimilarArtist;
use crate::lastfm::{SimilarArtist, TopTrack};
pub fn open(path: &str) -> Result<Connection, rusqlite::Error> {
let conn = Connection::open(path)?;
@@ -19,7 +19,16 @@ pub fn open(path: &str) -> Result<Connection, rusqlite::Error> {
CREATE TABLE IF NOT EXISTS tracks (
path TEXT PRIMARY KEY,
artist_mbid TEXT NOT NULL REFERENCES artists(mbid),
recording_mbid TEXT
recording_mbid TEXT,
title TEXT
);
CREATE TABLE IF NOT EXISTS top_tracks (
artist_mbid TEXT NOT NULL REFERENCES artists(mbid),
recording_mbid TEXT,
name TEXT NOT NULL,
name_lower TEXT NOT NULL,
playcount INTEGER NOT NULL,
PRIMARY KEY (artist_mbid, name_lower)
);",
)?;
Ok(conn)
@@ -53,11 +62,11 @@ pub fn get_available_similar_artists(
pub fn get_local_tracks_for_artist(
conn: &Connection,
artist_mbid: &str,
) -> Result<Vec<(String, Option<String>)>, rusqlite::Error> {
) -> Result<Vec<(String, Option<String>, Option<String>)>, rusqlite::Error> {
let mut stmt = conn.prepare(
"SELECT path, recording_mbid FROM tracks WHERE artist_mbid = ?1",
"SELECT path, recording_mbid, title FROM tracks WHERE artist_mbid = ?1",
)?;
let rows = stmt.query_map([artist_mbid], |row| Ok((row.get(0)?, row.get(1)?)))?;
let rows = stmt.query_map([artist_mbid], |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)))?;
rows.collect()
}
@@ -69,15 +78,46 @@ pub fn get_all_artists(conn: &Connection) -> Result<Vec<(String, String)>, rusql
rows.collect()
}
pub fn insert_top_tracks(
conn: &Connection,
artist_mbid: &str,
tracks: &[TopTrack],
) -> Result<(), rusqlite::Error> {
let tx = conn.unchecked_transaction()?;
for t in tracks {
let name_lower = t.name.to_lowercase();
tx.execute(
"INSERT OR IGNORE INTO top_tracks (artist_mbid, recording_mbid, name, name_lower, playcount)
VALUES (?1, ?2, ?3, ?4, ?5)",
rusqlite::params![artist_mbid, t.mbid, t.name, name_lower, t.playcount],
)?;
}
tx.commit()
}
pub fn get_top_tracks_by_name(
conn: &Connection,
artist_mbid: &str,
) -> Result<Vec<(String, u64)>, rusqlite::Error> {
let mut stmt = conn.prepare(
"SELECT name_lower, playcount FROM top_tracks WHERE artist_mbid = ?1",
)?;
let rows = stmt.query_map([artist_mbid], |row| {
Ok((row.get(0)?, row.get::<_, i64>(1)? as u64))
})?;
rows.collect()
}
pub fn insert_track(
conn: &Connection,
path: &str,
artist_mbid: &str,
recording_mbid: Option<&str>,
title: Option<&str>,
) -> Result<(), rusqlite::Error> {
conn.execute(
"INSERT OR IGNORE INTO tracks (path, artist_mbid, recording_mbid) VALUES (?1, ?2, ?3)",
rusqlite::params![path, artist_mbid, recording_mbid],
"INSERT OR IGNORE INTO tracks (path, artist_mbid, recording_mbid, title) VALUES (?1, ?2, ?3, ?4)",
rusqlite::params![path, artist_mbid, recording_mbid, title],
)?;
Ok(())
}

View File

@@ -70,20 +70,59 @@ impl LastfmClient {
Self { api_key }
}
/// Fetch a URL and return the body. Returns `None` if Last.fm returns an API error.
fn fetch_or_none(&self, url: &str) -> Result<Option<String>, Box<dyn std::error::Error>> {
let body: String = ureq::get(url).call()?.body_mut().read_to_string()?;
if serde_json::from_str::<ApiError>(&body).is_ok() {
return Ok(None);
}
Ok(Some(body))
}
/// Try fetching by MBID first, fall back to artist name.
fn fetch_with_fallback(
&self,
method: &str,
artist_mbid: &str,
artist_name: Option<&str>,
extra_params: &str,
) -> Result<Option<String>, Box<dyn std::error::Error>> {
let url = format!(
"{}?method={}&mbid={}&api_key={}{}&format=json",
BASE_URL, method, artist_mbid, self.api_key, extra_params
);
if let Some(body) = self.fetch_or_none(&url)? {
return Ok(Some(body));
}
// Fall back to artist name
if let Some(name) = artist_name {
let encoded = urlencoding::encode(name);
let url = format!(
"{}?method={}&artist={}&api_key={}{}&format=json",
BASE_URL, method, encoded, self.api_key, extra_params
);
if let Some(body) = self.fetch_or_none(&url)? {
return Ok(Some(body));
}
}
Ok(None)
}
pub fn get_similar_artists(
&self,
artist_mbid: &str,
artist_name: Option<&str>,
) -> Result<Vec<SimilarArtist>, Box<dyn std::error::Error>> {
let url = format!(
"{}?method=artist.getSimilar&mbid={}&api_key={}&limit=500&format=json",
BASE_URL, artist_mbid, self.api_key
);
let body: String = ureq::get(&url).call()?.body_mut().read_to_string()?;
if let Ok(err) = serde_json::from_str::<ApiError>(&body) {
eprintln!(" Last.fm: {}", err.message);
let Some(body) = self.fetch_with_fallback(
"artist.getSimilar",
artist_mbid,
artist_name,
"&limit=500",
)? else {
return Ok(Vec::new());
}
};
let resp: SimilarArtistsResponse = serde_json::from_str(&body)?;
Ok(resp
@@ -104,17 +143,16 @@ impl LastfmClient {
pub fn get_top_tracks(
&self,
artist_mbid: &str,
artist_name: Option<&str>,
) -> Result<Vec<TopTrack>, Box<dyn std::error::Error>> {
let url = format!(
"{}?method=artist.getTopTracks&mbid={}&api_key={}&limit=1000&format=json",
BASE_URL, artist_mbid, self.api_key
);
let body: String = ureq::get(&url).call()?.body_mut().read_to_string()?;
if let Ok(err) = serde_json::from_str::<ApiError>(&body) {
eprintln!(" Last.fm: {}", err.message);
let Some(body) = self.fetch_with_fallback(
"artist.getTopTracks",
artist_mbid,
artist_name,
"&limit=1000",
)? else {
return Ok(Vec::new());
}
};
let resp: TopTracksResponse = serde_json::from_str(&body)?;
Ok(resp

View File

@@ -82,7 +82,7 @@ fn cmd_index(args: &[String]) {
println!("Indexing {display_name}...");
}
match lastfm.get_similar_artists(&artist_mbid) {
match lastfm.get_similar_artists(&artist_mbid, artist_name.as_deref()) {
Ok(similar) => {
if let Err(e) = db::insert_artist_with_similar(
&conn,
@@ -99,12 +99,25 @@ fn cmd_index(args: &[String]) {
continue;
}
}
match lastfm.get_top_tracks(&artist_mbid, artist_name.as_deref()) {
Ok(top_tracks) => {
if let Err(e) = db::insert_top_tracks(&conn, &artist_mbid, &top_tracks) {
eprintln!("DB error inserting top tracks for {display_name}: {e}");
}
}
Err(e) => {
eprintln!("Last.fm top tracks error for {display_name}: {e}");
}
}
} else if verbose {
println!("Skipping {display_name} (already indexed)");
}
let track_title = metadata::read_track_title(&path).ok().flatten();
let path_str = path.to_string_lossy();
if let Err(e) = db::insert_track(&conn, &path_str, &artist_mbid, recording_mbid.as_deref()) {
if let Err(e) = db::insert_track(&conn, &path_str, &artist_mbid, recording_mbid.as_deref(), track_title.as_deref()) {
eprintln!("DB error inserting track {}: {e}", path.display());
}
}
@@ -153,14 +166,7 @@ fn cmd_build(args: &[String]) {
}
dotenvy::dotenv().ok();
let api_key = env::var("LASTFM_API_KEY").unwrap_or_default();
if api_key.is_empty() {
eprintln!("Error: LASTFM_API_KEY not set");
std::process::exit(1);
}
let conn = db::open("playlists.db").expect("failed to open database");
let lastfm = lastfm::LastfmClient::new(api_key);
let (artist_mbid, seed_name) = if let Some(file_arg) = rest.first() {
let path = Path::new(file_arg.as_str());
@@ -198,12 +204,11 @@ fn cmd_build(args: &[String]) {
}
};
build_playlist(&conn, &lastfm, &artist_mbid, &seed_name, count, verbose, mpd, shuffle, random);
build_playlist(&conn, &artist_mbid, &seed_name, count, verbose, mpd, shuffle, random);
}
fn build_playlist(
conn: &rusqlite::Connection,
lastfm: &lastfm::LastfmClient,
artist_mbid: &str,
seed_name: &str,
count: usize,
@@ -242,36 +247,35 @@ fn build_playlist(
continue;
}
// Fetch top tracks from Last.fm for popularity data
let top_tracks = match lastfm.get_top_tracks(mbid) {
// Look up pre-indexed top tracks from DB
let top_tracks_by_name = match db::get_top_tracks_by_name(conn, mbid) {
Ok(t) => t,
Err(e) => {
eprintln!("Last.fm error for {name}: {e}");
eprintln!("DB error fetching top tracks for {name}: {e}");
Vec::new()
}
};
// Build a map from recording_mbid -> playcount
let mut playcount_by_mbid: std::collections::HashMap<String, u64> =
std::collections::HashMap::new();
for tt in &top_tracks {
if let Some(ref mbid) = tt.mbid {
playcount_by_mbid.insert(mbid.clone(), tt.playcount);
}
}
let playcount_by_name: std::collections::HashMap<String, u64> =
top_tracks_by_name.into_iter().collect();
// Find max playcount for this artist to normalize
let max_playcount = top_tracks
.iter()
.map(|t| t.playcount)
let max_playcount = playcount_by_name
.values()
.copied()
.max()
.unwrap_or(1)
.max(1);
for (track_path, recording_mbid) in &local_tracks {
let playcount = recording_mbid
for (track_path, _recording_mbid, title) in &local_tracks {
// Match by title (lowercased), fall back to recording MBID
let playcount = title
.as_ref()
.and_then(|rec_mbid| playcount_by_mbid.get(rec_mbid).copied());
.and_then(|t| playcount_by_name.get(&t.to_lowercase()).copied())
.or_else(|| {
_recording_mbid
.as_ref()
.and_then(|id| playcount_by_name.get(id).copied())
});
// Skip tracks not in the artist's top 1000
let Some(playcount) = playcount else { continue };

View File

@@ -57,6 +57,17 @@ pub fn read_artist_mbid(path: &Path) -> Result<Option<String>, lofty::error::Lof
Ok(tag.get_string(ItemKey::MusicBrainzArtistId).map(String::from))
}
/// Extract the track title from a music file.
pub fn read_track_title(path: &Path) -> Result<Option<String>, lofty::error::LoftyError> {
let tagged_file = lofty::read_from_path(path)?;
let Some(tag) = tagged_file.primary_tag().or_else(|| tagged_file.first_tag()) else {
return Ok(None);
};
Ok(tag.get_string(ItemKey::TrackTitle).map(String::from))
}
/// Extract the MusicBrainz recording ID from a music file.
pub fn read_track_mbid(path: &Path) -> Result<Option<String>, lofty::error::LoftyError> {
let tagged_file = lofty::read_from_path(path)?;