Building up a little db of "similar artists"

This commit is contained in:
Connor Johnstone
2026-03-02 22:01:43 -05:00
parent 16e8962be1
commit 4a388c6637
8 changed files with 395 additions and 79 deletions

50
src/db.rs Normal file
View File

@@ -0,0 +1,50 @@
use rusqlite::Connection;
use crate::lastfm::SimilarArtist;
pub fn open(path: &str) -> Result<Connection, rusqlite::Error> {
let conn = Connection::open(path)?;
conn.execute_batch(
"CREATE TABLE IF NOT EXISTS artists (
mbid TEXT PRIMARY KEY,
name TEXT
);
CREATE TABLE IF NOT EXISTS similar_artists (
artist_mbid TEXT NOT NULL REFERENCES artists(mbid),
similar_mbid TEXT,
similar_name TEXT NOT NULL,
match_score REAL NOT NULL,
PRIMARY KEY (artist_mbid, similar_name)
);",
)?;
Ok(conn)
}
pub fn artist_exists(conn: &Connection, mbid: &str) -> Result<bool, rusqlite::Error> {
let count: i64 =
conn.query_row("SELECT COUNT(*) FROM artists WHERE mbid = ?1", [mbid], |row| {
row.get(0)
})?;
Ok(count > 0)
}
pub fn insert_artist_with_similar(
conn: &Connection,
mbid: &str,
name: Option<&str>,
similar: &[SimilarArtist],
) -> Result<(), rusqlite::Error> {
let tx = conn.unchecked_transaction()?;
tx.execute(
"INSERT OR IGNORE INTO artists (mbid, name) VALUES (?1, ?2)",
rusqlite::params![mbid, name],
)?;
for s in similar {
tx.execute(
"INSERT OR IGNORE INTO similar_artists (artist_mbid, similar_mbid, similar_name, match_score)
VALUES (?1, ?2, ?3, ?4)",
rusqlite::params![mbid, s.mbid, s.name, s.match_score],
)?;
}
tx.commit()
}

View File

@@ -1,12 +1,9 @@
use std::collections::HashMap;
use serde::Deserialize;
const BASE_URL: &str = "https://ws.audioscrobbler.com/2.0/";
pub struct LastfmClient {
api_key: String,
artist_cache: HashMap<String, Vec<SimilarArtist>>,
}
pub struct SimilarArtist {
@@ -18,6 +15,7 @@ pub struct SimilarArtist {
// Last.fm returns {"error": N, "message": "..."} on failure
#[derive(Deserialize)]
struct ApiError {
#[allow(dead_code)]
error: u32,
message: String,
}
@@ -44,46 +42,37 @@ struct ArtistEntry {
impl LastfmClient {
pub fn new(api_key: String) -> Self {
Self {
api_key,
artist_cache: HashMap::new(),
}
Self { api_key }
}
pub fn get_similar_artists(
&mut self,
&self,
artist_mbid: &str,
) -> Result<&[SimilarArtist], Box<dyn std::error::Error>> {
if !self.artist_cache.contains_key(artist_mbid) {
let url = format!(
"{}?method=artist.getSimilar&mbid={}&api_key={}&format=json",
BASE_URL, artist_mbid, self.api_key
);
let body: String = ureq::get(&url).call()?.body_mut().read_to_string()?;
) -> Result<Vec<SimilarArtist>, Box<dyn std::error::Error>> {
let url = format!(
"{}?method=artist.getSimilar&mbid={}&api_key={}&format=json",
BASE_URL, artist_mbid, self.api_key
);
let body: String = ureq::get(&url).call()?.body_mut().read_to_string()?;
let artists = if let Ok(err) = serde_json::from_str::<ApiError>(&body) {
eprintln!(" Last.fm: {}", err.message);
Vec::new()
} else {
let resp: SimilarArtistsResponse = serde_json::from_str(&body)?;
resp.similarartists
.artist
.into_iter()
.map(|a| {
let mbid = a.mbid.filter(|s| !s.is_empty());
SimilarArtist {
name: a.name,
mbid,
match_score: a.match_score.parse().unwrap_or(0.0),
}
})
.collect()
};
self.artist_cache.insert(artist_mbid.to_string(), artists);
if let Ok(err) = serde_json::from_str::<ApiError>(&body) {
eprintln!(" Last.fm: {}", err.message);
return Ok(Vec::new());
}
Ok(self.artist_cache.get(artist_mbid).unwrap())
let resp: SimilarArtistsResponse = serde_json::from_str(&body)?;
Ok(resp
.similarartists
.artist
.into_iter()
.map(|a| {
let mbid = a.mbid.filter(|s| !s.is_empty());
SimilarArtist {
name: a.name,
mbid,
match_score: a.match_score.parse().unwrap_or(0.0),
}
})
.collect())
}
}

View File

@@ -1,3 +1,4 @@
mod db;
mod filesystem;
mod lastfm;
mod metadata;
@@ -6,62 +7,71 @@ use std::env;
use std::path::Path;
fn main() {
dotenvy::dotenv().ok();
let args: Vec<String> = env::args().collect();
if args.len() != 2 {
eprintln!("Usage: {} <directory>", args[0]);
let verbose = args.iter().any(|a| a == "-v");
let rest: Vec<&String> = args.iter().skip(1).filter(|a| *a != "-v").collect();
if rest.len() != 2 || rest[0] != "index" {
eprintln!("Usage: {} index [-v] <directory>", args[0]);
std::process::exit(1);
}
let api_key = env::var("LASTFM_API_KEY").unwrap_or_default();
let mut lastfm = if api_key.is_empty() {
eprintln!("Warning: LASTFM_API_KEY not set, skipping similar artist lookups");
None
} else {
Some(lastfm::LastfmClient::new(api_key))
};
dotenvy::dotenv().ok();
let dir = Path::new(&args[1]);
let api_key = env::var("LASTFM_API_KEY").unwrap_or_default();
if api_key.is_empty() {
eprintln!("Error: LASTFM_API_KEY not set");
std::process::exit(1);
}
let conn = db::open("playlists.db").expect("failed to open database");
let lastfm = lastfm::LastfmClient::new(api_key);
let dir = Path::new(rest[1].as_str());
for path in filesystem::walk_music_files(dir) {
match metadata::read_all_metadata(&path) {
Ok(Some(entries)) => {
println!("{}", path.display());
for entry in &entries {
println!(" {:30} {}", entry.key, entry.value);
}
}
Ok(None) => {
println!("{}", path.display());
println!(" (no metadata tags found)");
}
let artist_mbid = match metadata::read_artist_mbid(&path) {
Ok(Some(mbid)) => mbid,
Ok(None) => continue,
Err(e) => {
eprintln!("{}: could not read metadata: {e}", path.display());
eprintln!("{}: could not read artist MBID: {e}", path.display());
continue;
}
};
let already_indexed = match db::artist_exists(&conn, &artist_mbid) {
Ok(exists) => exists,
Err(e) => {
eprintln!("DB error checking artist {artist_mbid}: {e}");
continue;
}
};
let artist_name = metadata::read_artist_name(&path).ok().flatten();
let display_name = artist_name.as_deref().unwrap_or(&artist_mbid);
if already_indexed {
if verbose {
println!("Skipping {display_name} (already indexed)");
}
continue;
}
if let Some(client) = lastfm.as_mut() {
let artist_mbid = match metadata::read_artist_mbid(&path) {
Ok(Some(mbid)) => mbid,
Ok(None) => continue,
Err(e) => {
eprintln!("{}: could not read artist MBID: {e}", path.display());
continue;
}
};
if verbose {
println!("Indexing {display_name}...");
}
match client.get_similar_artists(&artist_mbid) {
Ok(similar) => {
if !similar.is_empty() {
println!(" Similar artists:");
for a in similar.iter().take(50) {
println!(" {:.2} {}", a.match_score, a.name);
}
}
match lastfm.get_similar_artists(&artist_mbid) {
Ok(similar) => {
if let Err(e) = db::insert_artist_with_similar(
&conn,
&artist_mbid,
artist_name.as_deref(),
&similar,
) {
eprintln!("DB error inserting artist {artist_mbid}: {e}");
}
Err(e) => eprintln!(" Warning: similar artists lookup failed: {e}"),
}
Err(e) => eprintln!("Last.fm error for {artist_mbid}: {e}"),
}
}
}

View File

@@ -35,6 +35,17 @@ pub fn read_all_metadata(path: &Path) -> Result<Option<Vec<TagEntry>>, lofty::er
Ok(Some(entries))
}
/// Extract the artist name from a music file.
pub fn read_artist_name(path: &Path) -> Result<Option<String>, lofty::error::LoftyError> {
let tagged_file = lofty::read_from_path(path)?;
let Some(tag) = tagged_file.primary_tag().or_else(|| tagged_file.first_tag()) else {
return Ok(None);
};
Ok(tag.get_string(ItemKey::TrackArtist).map(String::from))
}
/// Extract the MusicBrainz artist ID from a music file.
pub fn read_artist_mbid(path: &Path) -> Result<Option<String>, lofty::error::LoftyError> {
let tagged_file = lofty::read_from_path(path)?;