Building up a little db of "similar artists"

This commit is contained in:
Connor Johnstone
2026-03-02 22:01:43 -05:00
parent 16e8962be1
commit 4a388c6637
8 changed files with 395 additions and 79 deletions

1
.gitignore vendored
View File

@@ -1,2 +1,3 @@
/target
.env
playlists.db

86
Cargo.lock generated
View File

@@ -14,6 +14,12 @@ version = "0.22.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
[[package]]
name = "bitflags"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
[[package]]
name = "byteorder"
version = "1.5.0"
@@ -63,6 +69,18 @@ version = "0.15.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b"
[[package]]
name = "fallible-iterator"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2acce4a10f12dc2fb14a218589d4f1f62ef011b2d0cc4b3cb1bba8e94da14649"
[[package]]
name = "fallible-streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "find-msvc-tools"
version = "0.1.9"
@@ -79,6 +97,12 @@ dependencies = [
"miniz_oxide",
]
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "getrandom"
version = "0.2.17"
@@ -90,6 +114,24 @@ dependencies = [
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"foldhash",
]
[[package]]
name = "hashlink"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
dependencies = [
"hashbrown",
]
[[package]]
name = "http"
version = "1.4.0"
@@ -118,6 +160,17 @@ version = "0.2.182"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
[[package]]
name = "libsqlite3-sys"
version = "0.32.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fbb8270bb4060bd76c6e96f20c52d80620f1d82a3470885694e41e0f81ef6fe7"
dependencies = [
"cc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "lofty"
version = "0.23.2"
@@ -193,12 +246,19 @@ version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
[[package]]
name = "pkg-config"
version = "0.3.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
name = "playlists"
version = "0.1.0"
dependencies = [
"dotenvy",
"lofty",
"rusqlite",
"serde",
"serde_json",
"ureq",
@@ -237,6 +297,20 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "rusqlite"
version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37e34486da88d8e051c7c0e23c3f15fd806ea8546260aa2fec247e97242ec143"
dependencies = [
"bitflags",
"fallible-iterator",
"fallible-streaming-iterator",
"hashlink",
"libsqlite3-sys",
"smallvec",
]
[[package]]
name = "rustls"
version = "0.23.37"
@@ -336,6 +410,12 @@ version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "subtle"
version = "2.6.1"
@@ -400,6 +480,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "walkdir"
version = "2.5.0"

View File

@@ -8,5 +8,6 @@ dotenvy = "0.15"
lofty = "0.23"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
rusqlite = { version = "0.34", features = ["bundled"] }
ureq = "3"
walkdir = "2.5"

168
scripts/search.py Executable file
View File

@@ -0,0 +1,168 @@
#!/usr/bin/env python3
"""Fuzzy search artists in playlists.db and show their similar artists."""
import curses
import sqlite3
import sys
from pathlib import Path
def find_db():
"""Look for playlists.db in cwd, then script's parent dir."""
for base in [Path.cwd(), Path(__file__).resolve().parent.parent]:
p = base / "playlists.db"
if p.exists():
return str(p)
print("Could not find playlists.db", file=sys.stderr)
sys.exit(1)
def load_artists(db_path):
conn = sqlite3.connect(db_path)
rows = conn.execute(
"SELECT mbid, COALESCE(name, mbid) FROM artists ORDER BY name"
).fetchall()
conn.close()
return rows # [(mbid, display_name), ...]
def get_similar(db_path, mbid):
conn = sqlite3.connect(db_path)
rows = conn.execute(
"SELECT similar_name, match_score FROM similar_artists "
"WHERE artist_mbid = ?1 ORDER BY match_score DESC",
(mbid,),
).fetchall()
conn.close()
return rows
def fuzzy_match(query, name):
"""Simple fuzzy: all query chars appear in order in name."""
name_lower = name.lower()
qi = 0
for ch in name_lower:
if qi < len(query) and ch == query[qi]:
qi += 1
return qi == len(query)
def run_tui(stdscr, db_path):
curses.curs_set(0)
curses.use_default_colors()
curses.init_pair(1, curses.COLOR_BLACK, curses.COLOR_CYAN)
curses.init_pair(2, curses.COLOR_CYAN, -1)
curses.init_pair(3, curses.COLOR_WHITE, -1)
artists = load_artists(db_path)
query = ""
selected = 0
scroll = 0
while True:
stdscr.erase()
h, w = stdscr.getmaxyx()
# filter
q = query.lower()
if q:
filtered = [(m, n) for m, n in artists if fuzzy_match(q, n)]
else:
filtered = artists
selected = max(0, min(selected, len(filtered) - 1))
# prompt
prompt = f" > {query}"
stdscr.addnstr(0, 0, prompt, w, curses.color_pair(2) | curses.A_BOLD)
count_str = f" {len(filtered)}/{len(artists)}"
if len(prompt) + len(count_str) < w:
stdscr.addstr(0, len(prompt), count_str, curses.color_pair(3))
# artist list
list_h = h - 1
if list_h < 1:
stdscr.refresh()
continue
if selected < scroll:
scroll = selected
if selected >= scroll + list_h:
scroll = selected - list_h + 1
for i in range(list_h):
idx = scroll + i
if idx >= len(filtered):
break
_, name = filtered[idx]
attr = curses.color_pair(1) if idx == selected else curses.A_NORMAL
stdscr.addnstr(i + 1, 0, f" {name}", w, attr)
stdscr.refresh()
key = stdscr.get_wch()
if key == "\x1b": # Esc
return
elif key == curses.KEY_UP or key == "\x10": # Up / Ctrl-P
selected = max(0, selected - 1)
elif key == curses.KEY_DOWN or key == "\x0e": # Down / Ctrl-N
selected = min(len(filtered) - 1, selected + 1)
elif key == "\n" or key == curses.KEY_ENTER:
if filtered:
show_similar(stdscr, db_path, filtered[selected])
elif key in (curses.KEY_BACKSPACE, "\x7f", "\x08"):
query = query[:-1]
selected = 0
scroll = 0
elif isinstance(key, str) and key.isprintable():
query += key
selected = 0
scroll = 0
def show_similar(stdscr, db_path, artist):
mbid, name = artist
similar = get_similar(db_path, mbid)
curses.curs_set(0)
stdscr.erase()
h, w = stdscr.getmaxyx()
title = f" Similar to {name}"
stdscr.addnstr(0, 0, title, w, curses.color_pair(2) | curses.A_BOLD)
stdscr.addnstr(h - 1, 0, " [q] back", w, curses.color_pair(3))
scroll = 0
list_h = h - 2
while True:
for i in range(list_h):
stdscr.move(i + 1, 0)
stdscr.clrtoeol()
idx = scroll + i
if idx >= len(similar):
continue
sname, score = similar[idx]
line = f" {score:5.2f} {sname}"
stdscr.addnstr(i + 1, 0, line, w)
stdscr.refresh()
key = stdscr.get_wch()
if key in ("q", "Q", "\x1b"):
return
elif key == curses.KEY_UP or key == "\x10":
scroll = max(0, scroll - 1)
elif key == curses.KEY_DOWN or key == "\x0e":
if scroll + list_h < len(similar):
scroll += 1
def main():
db_path = find_db()
curses.wrapper(run_tui, db_path)
if __name__ == "__main__":
main()

50
src/db.rs Normal file
View File

@@ -0,0 +1,50 @@
use rusqlite::Connection;
use crate::lastfm::SimilarArtist;
pub fn open(path: &str) -> Result<Connection, rusqlite::Error> {
let conn = Connection::open(path)?;
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS artists (
mbid TEXT PRIMARY KEY,
name TEXT
);
CREATE TABLE IF NOT EXISTS similar_artists (
artist_mbid TEXT NOT NULL REFERENCES artists(mbid),
similar_mbid TEXT,
similar_name TEXT NOT NULL,
match_score REAL NOT NULL,
PRIMARY KEY (artist_mbid, similar_name)
);",
)?;
Ok(conn)
}
pub fn artist_exists(conn: &Connection, mbid: &str) -> Result<bool, rusqlite::Error> {
let count: i64 =
conn.query_row("SELECT COUNT(*) FROM artists WHERE mbid = ?1", [mbid], |row| {
row.get(0)
})?;
Ok(count > 0)
}
pub fn insert_artist_with_similar(
conn: &Connection,
mbid: &str,
name: Option<&str>,
similar: &[SimilarArtist],
) -> Result<(), rusqlite::Error> {
let tx = conn.unchecked_transaction()?;
tx.execute(
"INSERT OR IGNORE INTO artists (mbid, name) VALUES (?1, ?2)",
rusqlite::params![mbid, name],
)?;
for s in similar {
tx.execute(
"INSERT OR IGNORE INTO similar_artists (artist_mbid, similar_mbid, similar_name, match_score)
VALUES (?1, ?2, ?3, ?4)",
rusqlite::params![mbid, s.mbid, s.name, s.match_score],
)?;
}
tx.commit()
}

View File

@@ -1,12 +1,9 @@
use std::collections::HashMap;
use serde::Deserialize;
const BASE_URL: &str = "https://ws.audioscrobbler.com/2.0/";
pub struct LastfmClient {
api_key: String,
artist_cache: HashMap<String, Vec<SimilarArtist>>,
}
pub struct SimilarArtist {
@@ -18,6 +15,7 @@ pub struct SimilarArtist {
// Last.fm returns {"error": N, "message": "..."} on failure
#[derive(Deserialize)]
struct ApiError {
#[allow(dead_code)]
error: u32,
message: String,
}
@@ -44,46 +42,37 @@ struct ArtistEntry {
impl LastfmClient {
pub fn new(api_key: String) -> Self {
Self {
api_key,
artist_cache: HashMap::new(),
}
Self { api_key }
}
pub fn get_similar_artists(
&mut self,
&self,
artist_mbid: &str,
) -> Result<&[SimilarArtist], Box<dyn std::error::Error>> {
if !self.artist_cache.contains_key(artist_mbid) {
let url = format!(
"{}?method=artist.getSimilar&mbid={}&api_key={}&format=json",
BASE_URL, artist_mbid, self.api_key
);
let body: String = ureq::get(&url).call()?.body_mut().read_to_string()?;
) -> Result<Vec<SimilarArtist>, Box<dyn std::error::Error>> {
let url = format!(
"{}?method=artist.getSimilar&mbid={}&api_key={}&format=json",
BASE_URL, artist_mbid, self.api_key
);
let body: String = ureq::get(&url).call()?.body_mut().read_to_string()?;
let artists = if let Ok(err) = serde_json::from_str::<ApiError>(&body) {
eprintln!(" Last.fm: {}", err.message);
Vec::new()
} else {
let resp: SimilarArtistsResponse = serde_json::from_str(&body)?;
resp.similarartists
.artist
.into_iter()
.map(|a| {
let mbid = a.mbid.filter(|s| !s.is_empty());
SimilarArtist {
name: a.name,
mbid,
match_score: a.match_score.parse().unwrap_or(0.0),
}
})
.collect()
};
self.artist_cache.insert(artist_mbid.to_string(), artists);
if let Ok(err) = serde_json::from_str::<ApiError>(&body) {
eprintln!(" Last.fm: {}", err.message);
return Ok(Vec::new());
}
Ok(self.artist_cache.get(artist_mbid).unwrap())
let resp: SimilarArtistsResponse = serde_json::from_str(&body)?;
Ok(resp
.similarartists
.artist
.into_iter()
.map(|a| {
let mbid = a.mbid.filter(|s| !s.is_empty());
SimilarArtist {
name: a.name,
mbid,
match_score: a.match_score.parse().unwrap_or(0.0),
}
})
.collect())
}
}

View File

@@ -1,3 +1,4 @@
mod db;
mod filesystem;
mod lastfm;
mod metadata;
@@ -6,62 +7,71 @@ use std::env;
use std::path::Path;
fn main() {
dotenvy::dotenv().ok();
let args: Vec<String> = env::args().collect();
if args.len() != 2 {
eprintln!("Usage: {} <directory>", args[0]);
let verbose = args.iter().any(|a| a == "-v");
let rest: Vec<&String> = args.iter().skip(1).filter(|a| *a != "-v").collect();
if rest.len() != 2 || rest[0] != "index" {
eprintln!("Usage: {} index [-v] <directory>", args[0]);
std::process::exit(1);
}
let api_key = env::var("LASTFM_API_KEY").unwrap_or_default();
let mut lastfm = if api_key.is_empty() {
eprintln!("Warning: LASTFM_API_KEY not set, skipping similar artist lookups");
None
} else {
Some(lastfm::LastfmClient::new(api_key))
};
dotenvy::dotenv().ok();
let dir = Path::new(&args[1]);
let api_key = env::var("LASTFM_API_KEY").unwrap_or_default();
if api_key.is_empty() {
eprintln!("Error: LASTFM_API_KEY not set");
std::process::exit(1);
}
let conn = db::open("playlists.db").expect("failed to open database");
let lastfm = lastfm::LastfmClient::new(api_key);
let dir = Path::new(rest[1].as_str());
for path in filesystem::walk_music_files(dir) {
match metadata::read_all_metadata(&path) {
Ok(Some(entries)) => {
println!("{}", path.display());
for entry in &entries {
println!(" {:30} {}", entry.key, entry.value);
}
}
Ok(None) => {
println!("{}", path.display());
println!(" (no metadata tags found)");
}
let artist_mbid = match metadata::read_artist_mbid(&path) {
Ok(Some(mbid)) => mbid,
Ok(None) => continue,
Err(e) => {
eprintln!("{}: could not read metadata: {e}", path.display());
eprintln!("{}: could not read artist MBID: {e}", path.display());
continue;
}
};
let already_indexed = match db::artist_exists(&conn, &artist_mbid) {
Ok(exists) => exists,
Err(e) => {
eprintln!("DB error checking artist {artist_mbid}: {e}");
continue;
}
};
let artist_name = metadata::read_artist_name(&path).ok().flatten();
let display_name = artist_name.as_deref().unwrap_or(&artist_mbid);
if already_indexed {
if verbose {
println!("Skipping {display_name} (already indexed)");
}
continue;
}
if let Some(client) = lastfm.as_mut() {
let artist_mbid = match metadata::read_artist_mbid(&path) {
Ok(Some(mbid)) => mbid,
Ok(None) => continue,
Err(e) => {
eprintln!("{}: could not read artist MBID: {e}", path.display());
continue;
}
};
if verbose {
println!("Indexing {display_name}...");
}
match client.get_similar_artists(&artist_mbid) {
Ok(similar) => {
if !similar.is_empty() {
println!(" Similar artists:");
for a in similar.iter().take(50) {
println!(" {:.2} {}", a.match_score, a.name);
}
}
match lastfm.get_similar_artists(&artist_mbid) {
Ok(similar) => {
if let Err(e) = db::insert_artist_with_similar(
&conn,
&artist_mbid,
artist_name.as_deref(),
&similar,
) {
eprintln!("DB error inserting artist {artist_mbid}: {e}");
}
Err(e) => eprintln!(" Warning: similar artists lookup failed: {e}"),
}
Err(e) => eprintln!("Last.fm error for {artist_mbid}: {e}"),
}
}
}

View File

@@ -35,6 +35,17 @@ pub fn read_all_metadata(path: &Path) -> Result<Option<Vec<TagEntry>>, lofty::er
Ok(Some(entries))
}
/// Extract the artist name from a music file.
pub fn read_artist_name(path: &Path) -> Result<Option<String>, lofty::error::LoftyError> {
let tagged_file = lofty::read_from_path(path)?;
let Some(tag) = tagged_file.primary_tag().or_else(|| tagged_file.first_tag()) else {
return Ok(None);
};
Ok(tag.get_string(ItemKey::TrackArtist).map(String::from))
}
/// Extract the MusicBrainz artist ID from a music file.
pub fn read_artist_mbid(path: &Path) -> Result<Option<String>, lofty::error::LoftyError> {
let tagged_file = lofty::read_from_path(path)?;