From 4a388c66375f75d8c3c17767f27ef7f93b8d6c7c Mon Sep 17 00:00:00 2001 From: Connor Johnstone Date: Mon, 2 Mar 2026 22:01:43 -0500 Subject: [PATCH] Building up a little db of "similar artists" --- .gitignore | 1 + Cargo.lock | 86 ++++++++++++++++++++++++ Cargo.toml | 1 + scripts/search.py | 168 ++++++++++++++++++++++++++++++++++++++++++++++ src/db.rs | 50 ++++++++++++++ src/lastfm.rs | 63 +++++++---------- src/main.rs | 94 ++++++++++++++------------ src/metadata.rs | 11 +++ 8 files changed, 395 insertions(+), 79 deletions(-) create mode 100755 scripts/search.py create mode 100644 src/db.rs diff --git a/.gitignore b/.gitignore index fedaa2b..76e7bee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /target .env +playlists.db diff --git a/Cargo.lock b/Cargo.lock index 92eb93d..fa2f740 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,6 +14,12 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + [[package]] name = "byteorder" version = "1.5.0" @@ -63,6 +69,18 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "fallible-iterator" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -79,6 +97,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "getrandom" version = "0.2.17" @@ -90,6 +114,24 @@ dependencies = [ "wasi", ] +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown", +] + [[package]] name = "http" version = "1.4.0" @@ -118,6 +160,17 @@ version = "0.2.182" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" +[[package]] +name = "libsqlite3-sys" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbb8270bb4060bd76c6e96f20c52d80620f1d82a3470885694e41e0f81ef6fe7" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + [[package]] name = "lofty" version = "0.23.2" @@ -193,12 +246,19 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "pkg-config" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" + [[package]] name = "playlists" version = "0.1.0" dependencies = [ "dotenvy", "lofty", + "rusqlite", "serde", "serde_json", "ureq", @@ -237,6 +297,20 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rusqlite" +version = "0.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e34486da88d8e051c7c0e23c3f15fd806ea8546260aa2fec247e97242ec143" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rustls" version = "0.23.37" @@ -336,6 +410,12 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + [[package]] name = "subtle" version = "2.6.1" @@ -400,6 +480,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index 6122bf6..f018344 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,5 +8,6 @@ dotenvy = "0.15" lofty = "0.23" serde = { version = "1", features = ["derive"] } serde_json = "1" +rusqlite = { version = "0.34", features = ["bundled"] } ureq = "3" walkdir = "2.5" diff --git a/scripts/search.py b/scripts/search.py new file mode 100755 index 0000000..6c77c8b --- /dev/null +++ b/scripts/search.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +"""Fuzzy search artists in playlists.db and show their similar artists.""" + +import curses +import sqlite3 +import sys +from pathlib import Path + + +def find_db(): + """Look for playlists.db in cwd, then script's parent dir.""" + for base in [Path.cwd(), Path(__file__).resolve().parent.parent]: + p = base / "playlists.db" + if p.exists(): + return str(p) + print("Could not find playlists.db", file=sys.stderr) + sys.exit(1) + + +def load_artists(db_path): + conn = sqlite3.connect(db_path) + rows = conn.execute( + "SELECT mbid, COALESCE(name, mbid) FROM artists ORDER BY name" + ).fetchall() + conn.close() + return rows # [(mbid, display_name), ...] + + +def get_similar(db_path, mbid): + conn = sqlite3.connect(db_path) + rows = conn.execute( + "SELECT similar_name, match_score FROM similar_artists " + "WHERE artist_mbid = ?1 ORDER BY match_score DESC", + (mbid,), + ).fetchall() + conn.close() + return rows + + +def fuzzy_match(query, name): + """Simple fuzzy: all query chars appear in order in name.""" + name_lower = name.lower() + qi = 0 + for ch in name_lower: + if qi < len(query) and ch == query[qi]: + qi += 1 + return qi == len(query) + + +def run_tui(stdscr, db_path): + curses.curs_set(0) + curses.use_default_colors() + curses.init_pair(1, curses.COLOR_BLACK, curses.COLOR_CYAN) + curses.init_pair(2, curses.COLOR_CYAN, -1) + curses.init_pair(3, curses.COLOR_WHITE, -1) + + artists = load_artists(db_path) + query = "" + selected = 0 + scroll = 0 + + while True: + stdscr.erase() + h, w = stdscr.getmaxyx() + + # filter + q = query.lower() + if q: + filtered = [(m, n) for m, n in artists if fuzzy_match(q, n)] + else: + filtered = artists + + selected = max(0, min(selected, len(filtered) - 1)) + + # prompt + prompt = f" > {query}" + stdscr.addnstr(0, 0, prompt, w, curses.color_pair(2) | curses.A_BOLD) + count_str = f" {len(filtered)}/{len(artists)}" + if len(prompt) + len(count_str) < w: + stdscr.addstr(0, len(prompt), count_str, curses.color_pair(3)) + + # artist list + list_h = h - 1 + if list_h < 1: + stdscr.refresh() + continue + + if selected < scroll: + scroll = selected + if selected >= scroll + list_h: + scroll = selected - list_h + 1 + + for i in range(list_h): + idx = scroll + i + if idx >= len(filtered): + break + _, name = filtered[idx] + attr = curses.color_pair(1) if idx == selected else curses.A_NORMAL + stdscr.addnstr(i + 1, 0, f" {name}", w, attr) + + stdscr.refresh() + + key = stdscr.get_wch() + + if key == "\x1b": # Esc + return + elif key == curses.KEY_UP or key == "\x10": # Up / Ctrl-P + selected = max(0, selected - 1) + elif key == curses.KEY_DOWN or key == "\x0e": # Down / Ctrl-N + selected = min(len(filtered) - 1, selected + 1) + elif key == "\n" or key == curses.KEY_ENTER: + if filtered: + show_similar(stdscr, db_path, filtered[selected]) + elif key in (curses.KEY_BACKSPACE, "\x7f", "\x08"): + query = query[:-1] + selected = 0 + scroll = 0 + elif isinstance(key, str) and key.isprintable(): + query += key + selected = 0 + scroll = 0 + + +def show_similar(stdscr, db_path, artist): + mbid, name = artist + similar = get_similar(db_path, mbid) + + curses.curs_set(0) + stdscr.erase() + h, w = stdscr.getmaxyx() + + title = f" Similar to {name}" + stdscr.addnstr(0, 0, title, w, curses.color_pair(2) | curses.A_BOLD) + stdscr.addnstr(h - 1, 0, " [q] back", w, curses.color_pair(3)) + + scroll = 0 + list_h = h - 2 + + while True: + for i in range(list_h): + stdscr.move(i + 1, 0) + stdscr.clrtoeol() + idx = scroll + i + if idx >= len(similar): + continue + sname, score = similar[idx] + line = f" {score:5.2f} {sname}" + stdscr.addnstr(i + 1, 0, line, w) + + stdscr.refresh() + key = stdscr.get_wch() + + if key in ("q", "Q", "\x1b"): + return + elif key == curses.KEY_UP or key == "\x10": + scroll = max(0, scroll - 1) + elif key == curses.KEY_DOWN or key == "\x0e": + if scroll + list_h < len(similar): + scroll += 1 + + +def main(): + db_path = find_db() + curses.wrapper(run_tui, db_path) + + +if __name__ == "__main__": + main() diff --git a/src/db.rs b/src/db.rs new file mode 100644 index 0000000..34a1d5c --- /dev/null +++ b/src/db.rs @@ -0,0 +1,50 @@ +use rusqlite::Connection; + +use crate::lastfm::SimilarArtist; + +pub fn open(path: &str) -> Result { + let conn = Connection::open(path)?; + conn.execute_batch( + "CREATE TABLE IF NOT EXISTS artists ( + mbid TEXT PRIMARY KEY, + name TEXT + ); + CREATE TABLE IF NOT EXISTS similar_artists ( + artist_mbid TEXT NOT NULL REFERENCES artists(mbid), + similar_mbid TEXT, + similar_name TEXT NOT NULL, + match_score REAL NOT NULL, + PRIMARY KEY (artist_mbid, similar_name) + );", + )?; + Ok(conn) +} + +pub fn artist_exists(conn: &Connection, mbid: &str) -> Result { + let count: i64 = + conn.query_row("SELECT COUNT(*) FROM artists WHERE mbid = ?1", [mbid], |row| { + row.get(0) + })?; + Ok(count > 0) +} + +pub fn insert_artist_with_similar( + conn: &Connection, + mbid: &str, + name: Option<&str>, + similar: &[SimilarArtist], +) -> Result<(), rusqlite::Error> { + let tx = conn.unchecked_transaction()?; + tx.execute( + "INSERT OR IGNORE INTO artists (mbid, name) VALUES (?1, ?2)", + rusqlite::params![mbid, name], + )?; + for s in similar { + tx.execute( + "INSERT OR IGNORE INTO similar_artists (artist_mbid, similar_mbid, similar_name, match_score) + VALUES (?1, ?2, ?3, ?4)", + rusqlite::params![mbid, s.mbid, s.name, s.match_score], + )?; + } + tx.commit() +} diff --git a/src/lastfm.rs b/src/lastfm.rs index f61e9b9..fbebb88 100644 --- a/src/lastfm.rs +++ b/src/lastfm.rs @@ -1,12 +1,9 @@ -use std::collections::HashMap; - use serde::Deserialize; const BASE_URL: &str = "https://ws.audioscrobbler.com/2.0/"; pub struct LastfmClient { api_key: String, - artist_cache: HashMap>, } pub struct SimilarArtist { @@ -18,6 +15,7 @@ pub struct SimilarArtist { // Last.fm returns {"error": N, "message": "..."} on failure #[derive(Deserialize)] struct ApiError { + #[allow(dead_code)] error: u32, message: String, } @@ -44,46 +42,37 @@ struct ArtistEntry { impl LastfmClient { pub fn new(api_key: String) -> Self { - Self { - api_key, - artist_cache: HashMap::new(), - } + Self { api_key } } pub fn get_similar_artists( - &mut self, + &self, artist_mbid: &str, - ) -> Result<&[SimilarArtist], Box> { - if !self.artist_cache.contains_key(artist_mbid) { - let url = format!( - "{}?method=artist.getSimilar&mbid={}&api_key={}&format=json", - BASE_URL, artist_mbid, self.api_key - ); - let body: String = ureq::get(&url).call()?.body_mut().read_to_string()?; + ) -> Result, Box> { + let url = format!( + "{}?method=artist.getSimilar&mbid={}&api_key={}&format=json", + BASE_URL, artist_mbid, self.api_key + ); + let body: String = ureq::get(&url).call()?.body_mut().read_to_string()?; - let artists = if let Ok(err) = serde_json::from_str::(&body) { - eprintln!(" Last.fm: {}", err.message); - Vec::new() - } else { - let resp: SimilarArtistsResponse = serde_json::from_str(&body)?; - resp.similarartists - .artist - .into_iter() - .map(|a| { - let mbid = a.mbid.filter(|s| !s.is_empty()); - SimilarArtist { - name: a.name, - mbid, - match_score: a.match_score.parse().unwrap_or(0.0), - } - }) - .collect() - }; - - self.artist_cache.insert(artist_mbid.to_string(), artists); + if let Ok(err) = serde_json::from_str::(&body) { + eprintln!(" Last.fm: {}", err.message); + return Ok(Vec::new()); } - Ok(self.artist_cache.get(artist_mbid).unwrap()) + let resp: SimilarArtistsResponse = serde_json::from_str(&body)?; + Ok(resp + .similarartists + .artist + .into_iter() + .map(|a| { + let mbid = a.mbid.filter(|s| !s.is_empty()); + SimilarArtist { + name: a.name, + mbid, + match_score: a.match_score.parse().unwrap_or(0.0), + } + }) + .collect()) } - } diff --git a/src/main.rs b/src/main.rs index 8244e46..a9bedde 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,4 @@ +mod db; mod filesystem; mod lastfm; mod metadata; @@ -6,62 +7,71 @@ use std::env; use std::path::Path; fn main() { - dotenvy::dotenv().ok(); - let args: Vec = env::args().collect(); - if args.len() != 2 { - eprintln!("Usage: {} ", args[0]); + let verbose = args.iter().any(|a| a == "-v"); + let rest: Vec<&String> = args.iter().skip(1).filter(|a| *a != "-v").collect(); + + if rest.len() != 2 || rest[0] != "index" { + eprintln!("Usage: {} index [-v] ", args[0]); std::process::exit(1); } - let api_key = env::var("LASTFM_API_KEY").unwrap_or_default(); - let mut lastfm = if api_key.is_empty() { - eprintln!("Warning: LASTFM_API_KEY not set, skipping similar artist lookups"); - None - } else { - Some(lastfm::LastfmClient::new(api_key)) - }; + dotenvy::dotenv().ok(); - let dir = Path::new(&args[1]); + let api_key = env::var("LASTFM_API_KEY").unwrap_or_default(); + if api_key.is_empty() { + eprintln!("Error: LASTFM_API_KEY not set"); + std::process::exit(1); + } + + let conn = db::open("playlists.db").expect("failed to open database"); + let lastfm = lastfm::LastfmClient::new(api_key); + let dir = Path::new(rest[1].as_str()); for path in filesystem::walk_music_files(dir) { - match metadata::read_all_metadata(&path) { - Ok(Some(entries)) => { - println!("{}", path.display()); - for entry in &entries { - println!(" {:30} {}", entry.key, entry.value); - } - } - Ok(None) => { - println!("{}", path.display()); - println!(" (no metadata tags found)"); - } + let artist_mbid = match metadata::read_artist_mbid(&path) { + Ok(Some(mbid)) => mbid, + Ok(None) => continue, Err(e) => { - eprintln!("{}: could not read metadata: {e}", path.display()); + eprintln!("{}: could not read artist MBID: {e}", path.display()); + continue; } + }; + + let already_indexed = match db::artist_exists(&conn, &artist_mbid) { + Ok(exists) => exists, + Err(e) => { + eprintln!("DB error checking artist {artist_mbid}: {e}"); + continue; + } + }; + + let artist_name = metadata::read_artist_name(&path).ok().flatten(); + let display_name = artist_name.as_deref().unwrap_or(&artist_mbid); + + if already_indexed { + if verbose { + println!("Skipping {display_name} (already indexed)"); + } + continue; } - if let Some(client) = lastfm.as_mut() { - let artist_mbid = match metadata::read_artist_mbid(&path) { - Ok(Some(mbid)) => mbid, - Ok(None) => continue, - Err(e) => { - eprintln!("{}: could not read artist MBID: {e}", path.display()); - continue; - } - }; + if verbose { + println!("Indexing {display_name}..."); + } - match client.get_similar_artists(&artist_mbid) { - Ok(similar) => { - if !similar.is_empty() { - println!(" Similar artists:"); - for a in similar.iter().take(50) { - println!(" {:.2} {}", a.match_score, a.name); - } - } + match lastfm.get_similar_artists(&artist_mbid) { + Ok(similar) => { + if let Err(e) = db::insert_artist_with_similar( + &conn, + &artist_mbid, + artist_name.as_deref(), + &similar, + ) { + eprintln!("DB error inserting artist {artist_mbid}: {e}"); } - Err(e) => eprintln!(" Warning: similar artists lookup failed: {e}"), } + Err(e) => eprintln!("Last.fm error for {artist_mbid}: {e}"), } } } diff --git a/src/metadata.rs b/src/metadata.rs index 9a13b21..5195bb8 100644 --- a/src/metadata.rs +++ b/src/metadata.rs @@ -35,6 +35,17 @@ pub fn read_all_metadata(path: &Path) -> Result>, lofty::er Ok(Some(entries)) } +/// Extract the artist name from a music file. +pub fn read_artist_name(path: &Path) -> Result, lofty::error::LoftyError> { + let tagged_file = lofty::read_from_path(path)?; + + let Some(tag) = tagged_file.primary_tag().or_else(|| tagged_file.first_tag()) else { + return Ok(None); + }; + + Ok(tag.get_string(ItemKey::TrackArtist).map(String::from)) +} + /// Extract the MusicBrainz artist ID from a music file. pub fn read_artist_mbid(path: &Path) -> Result, lofty::error::LoftyError> { let tagged_file = lofty::read_from_path(path)?;