Added the mb db download. Big upsides and downsides
CI / check (push) Successful in 1m11s
CI / docker (push) Successful in 2m21s

This commit is contained in:
Connor Johnstone
2026-03-21 23:22:49 -04:00
parent 31d54651e6
commit 51f2c2ae8f
9 changed files with 2181 additions and 142 deletions
+583
View File
@@ -0,0 +1,583 @@
//! Local MusicBrainz database fetcher.
//!
//! Implements [`MetadataFetcher`] backed by a local SQLite database (populated
//! via [`crate::mb_import`]). All queries are instant local lookups — no rate
//! limiting needed.
use std::sync::Mutex;
use rusqlite::Connection;
use crate::error::{DataError, DataResult};
use crate::traits::MetadataFetcher;
use crate::types::{
ArtistInfo, ArtistSearchResult, ArtistUrl, DiscographyEntry, RecordingDetails, RecordingMatch,
ReleaseGroupEntry, ReleaseMatch, ReleaseRef, ReleaseTrack,
};
/// Statistics about the local MusicBrainz database.
#[derive(Debug, Clone, Default, serde::Serialize)]
pub struct LocalMbStats {
pub artists: u64,
pub release_groups: u64,
pub releases: u64,
pub recordings: u64,
pub tracks: u64,
pub last_import_date: Option<String>,
}
/// A [`MetadataFetcher`] backed by a local SQLite database.
pub struct LocalMusicBrainzFetcher {
conn: Mutex<Connection>,
}
impl LocalMusicBrainzFetcher {
/// Open (or create) a local MusicBrainz SQLite database.
pub fn new(db_path: &str) -> Result<Self, Box<dyn std::error::Error>> {
let conn = Connection::open(db_path)?;
conn.execute_batch("PRAGMA journal_mode = WAL; PRAGMA cache_size = -16000;")?;
Ok(Self {
conn: Mutex::new(conn),
})
}
/// Check whether the database has been populated with data.
pub fn is_available(&self) -> bool {
let conn = self.conn.lock().unwrap();
// Check if the mb_artists table exists and has rows
conn.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='mb_artists'",
[],
|row| row.get::<_, i32>(0),
)
.map(|c| c > 0)
.unwrap_or(false)
&& conn
.query_row("SELECT COUNT(*) FROM mb_artists LIMIT 1", [], |row| {
row.get::<_, i32>(0)
})
.unwrap_or(0)
> 0
}
/// Get statistics about the imported data.
pub fn stats(&self) -> LocalMbStats {
let conn = self.conn.lock().unwrap();
let get_meta = |key: &str| -> Option<String> {
conn.query_row(
"SELECT value FROM mb_import_meta WHERE key = ?1",
rusqlite::params![key],
|row| row.get(0),
)
.ok()
};
LocalMbStats {
artists: get_meta("artist_count")
.and_then(|s| s.parse().ok())
.unwrap_or(0),
release_groups: get_meta("release_group_count")
.and_then(|s| s.parse().ok())
.unwrap_or(0),
releases: get_meta("release_count")
.and_then(|s| s.parse().ok())
.unwrap_or(0),
recordings: get_meta("recording_count")
.and_then(|s| s.parse().ok())
.unwrap_or(0),
tracks: get_meta("track_count")
.and_then(|s| s.parse().ok())
.unwrap_or(0),
last_import_date: get_meta("last_import_date"),
}
}
/// Look up an artist by MBID (returns name and disambiguation).
pub fn get_artist_by_mbid_sync(&self, mbid: &str) -> DataResult<(String, Option<String>)> {
let conn = self.conn.lock().unwrap();
let result = conn.query_row(
"SELECT name, disambiguation FROM mb_artists WHERE mbid = ?1",
rusqlite::params![mbid],
|row| {
let name: String = row.get(0)?;
let disambiguation: Option<String> = row.get(1)?;
Ok((name, disambiguation.filter(|s| !s.is_empty())))
},
);
match result {
Ok(r) => Ok(r),
Err(rusqlite::Error::QueryReturnedNoRows) => {
Err(DataError::Other(format!("artist {mbid} not found locally")))
}
Err(e) => Err(DataError::Other(e.to_string())),
}
}
/// Look up detailed artist info by MBID, including URLs.
pub fn get_artist_info_sync(&self, mbid: &str) -> DataResult<ArtistInfo> {
let conn = self.conn.lock().unwrap();
let artist = conn.query_row(
"SELECT name, disambiguation, country, artist_type, begin_year FROM mb_artists WHERE mbid = ?1",
rusqlite::params![mbid],
|row| {
Ok((
row.get::<_, String>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, Option<String>>(2)?,
row.get::<_, Option<String>>(3)?,
row.get::<_, Option<i32>>(4)?,
))
},
);
let (name, disambiguation, country, artist_type, begin_year) = match artist {
Ok(a) => a,
Err(rusqlite::Error::QueryReturnedNoRows) => {
return Err(DataError::Other(format!("artist {mbid} not found locally")));
}
Err(e) => return Err(DataError::Other(e.to_string())),
};
// Fetch URLs
let mut url_stmt = conn
.prepare("SELECT url, link_type FROM mb_artist_urls WHERE artist_mbid = ?1")
.map_err(|e| DataError::Other(e.to_string()))?;
let urls: Vec<ArtistUrl> = url_stmt
.query_map(rusqlite::params![mbid], |row| {
Ok(ArtistUrl {
url: row.get(0)?,
link_type: row.get(1)?,
})
})
.map_err(|e| DataError::Other(e.to_string()))?
.filter_map(|r| r.ok())
.collect();
Ok(ArtistInfo {
name,
mbid: Some(mbid.to_string()),
disambiguation: disambiguation.filter(|s| !s.is_empty()),
country: country.filter(|s| !s.is_empty()),
artist_type,
begin_year: begin_year.map(|y| y.to_string()),
urls,
})
}
}
impl MetadataFetcher for LocalMusicBrainzFetcher {
async fn search_recording(&self, artist: &str, title: &str) -> DataResult<Vec<RecordingMatch>> {
let conn = self.conn.lock().unwrap();
let query = if artist.is_empty() {
let pattern = format!("%{title}%");
let mut stmt = conn
.prepare(
"SELECT r.mbid, r.title, r.artist_mbid, a.name
FROM mb_recordings r
LEFT JOIN mb_artists a ON r.artist_mbid = a.mbid
WHERE r.title LIKE ?1 COLLATE NOCASE
LIMIT 10",
)
.map_err(|e| DataError::Other(e.to_string()))?;
stmt.query_map(rusqlite::params![pattern], |row| {
Ok(RecordingMatch {
mbid: row.get(0)?,
title: row.get(1)?,
artist_mbid: row.get(2)?,
artist: row
.get::<_, Option<String>>(3)?
.unwrap_or_else(|| "Unknown Artist".into()),
releases: vec![],
score: 100,
})
})
.map_err(|e| DataError::Other(e.to_string()))?
.filter_map(|r| r.ok())
.collect()
} else {
let artist_pattern = format!("%{artist}%");
let title_pattern = format!("%{title}%");
let mut stmt = conn
.prepare(
"SELECT r.mbid, r.title, r.artist_mbid, a.name
FROM mb_recordings r
LEFT JOIN mb_artists a ON r.artist_mbid = a.mbid
WHERE r.title LIKE ?1 COLLATE NOCASE
AND a.name LIKE ?2 COLLATE NOCASE
LIMIT 10",
)
.map_err(|e| DataError::Other(e.to_string()))?;
stmt.query_map(rusqlite::params![title_pattern, artist_pattern], |row| {
Ok(RecordingMatch {
mbid: row.get(0)?,
title: row.get(1)?,
artist_mbid: row.get(2)?,
artist: row
.get::<_, Option<String>>(3)?
.unwrap_or_else(|| "Unknown Artist".into()),
releases: vec![],
score: 100,
})
})
.map_err(|e| DataError::Other(e.to_string()))?
.filter_map(|r| r.ok())
.collect()
};
Ok(query)
}
async fn search_release(&self, artist: &str, album: &str) -> DataResult<Vec<ReleaseMatch>> {
let conn = self.conn.lock().unwrap();
let results = if artist.is_empty() {
let pattern = format!("%{album}%");
let mut stmt = conn
.prepare(
"SELECT r.mbid, r.title, r.artist_mbid, a.name, r.date
FROM mb_releases r
LEFT JOIN mb_artists a ON r.artist_mbid = a.mbid
WHERE r.title LIKE ?1 COLLATE NOCASE
LIMIT 10",
)
.map_err(|e| DataError::Other(e.to_string()))?;
stmt.query_map(rusqlite::params![pattern], |row| {
Ok(ReleaseMatch {
mbid: row.get(0)?,
title: row.get(1)?,
artist_mbid: row.get(2)?,
artist: row
.get::<_, Option<String>>(3)?
.unwrap_or_else(|| "Unknown Artist".into()),
date: row.get(4)?,
track_count: None,
score: 100,
})
})
.map_err(|e| DataError::Other(e.to_string()))?
.filter_map(|r| r.ok())
.collect()
} else {
let artist_pattern = format!("%{artist}%");
let album_pattern = format!("%{album}%");
let mut stmt = conn
.prepare(
"SELECT r.mbid, r.title, r.artist_mbid, a.name, r.date
FROM mb_releases r
LEFT JOIN mb_artists a ON r.artist_mbid = a.mbid
WHERE r.title LIKE ?1 COLLATE NOCASE
AND a.name LIKE ?2 COLLATE NOCASE
LIMIT 10",
)
.map_err(|e| DataError::Other(e.to_string()))?;
stmt.query_map(rusqlite::params![album_pattern, artist_pattern], |row| {
Ok(ReleaseMatch {
mbid: row.get(0)?,
title: row.get(1)?,
artist_mbid: row.get(2)?,
artist: row
.get::<_, Option<String>>(3)?
.unwrap_or_else(|| "Unknown Artist".into()),
date: row.get(4)?,
track_count: None,
score: 100,
})
})
.map_err(|e| DataError::Other(e.to_string()))?
.filter_map(|r| r.ok())
.collect()
};
Ok(results)
}
async fn get_recording(&self, mbid: &str) -> DataResult<RecordingDetails> {
let conn = self.conn.lock().unwrap();
let recording = conn.query_row(
"SELECT r.mbid, r.title, r.artist_mbid, r.duration_ms, a.name
FROM mb_recordings r
LEFT JOIN mb_artists a ON r.artist_mbid = a.mbid
WHERE r.mbid = ?1",
rusqlite::params![mbid],
|row| {
Ok(RecordingDetails {
mbid: row.get(0)?,
title: row.get(1)?,
artist_mbid: row.get(2)?,
duration_ms: row.get(3)?,
artist: row
.get::<_, Option<String>>(4)?
.unwrap_or_else(|| "Unknown Artist".into()),
releases: vec![],
genres: vec![],
secondary_artists: vec![],
})
},
);
match recording {
Ok(mut r) => {
// Fetch releases that contain this recording
let mut stmt = conn
.prepare(
"SELECT DISTINCT rel.mbid, rel.title, rel.date
FROM mb_tracks t
JOIN mb_releases rel ON t.release_mbid = rel.mbid
WHERE t.recording_mbid = ?1
LIMIT 10",
)
.map_err(|e| DataError::Other(e.to_string()))?;
r.releases = stmt
.query_map(rusqlite::params![mbid], |row| {
Ok(ReleaseRef {
mbid: row.get(0)?,
title: row.get(1)?,
date: row.get(2)?,
track_number: None,
})
})
.map_err(|e| DataError::Other(e.to_string()))?
.filter_map(|r| r.ok())
.collect();
Ok(r)
}
Err(rusqlite::Error::QueryReturnedNoRows) => Err(DataError::Other(format!(
"recording {mbid} not found locally"
))),
Err(e) => Err(DataError::Other(e.to_string())),
}
}
async fn search_artist(&self, query: &str, limit: u32) -> DataResult<Vec<ArtistSearchResult>> {
let conn = self.conn.lock().unwrap();
let pattern = format!("%{query}%");
let mut stmt = conn
.prepare(
"SELECT mbid, name, disambiguation, country, artist_type
FROM mb_artists
WHERE name LIKE ?1 COLLATE NOCASE
LIMIT ?2",
)
.map_err(|e| DataError::Other(e.to_string()))?;
let results: Vec<ArtistSearchResult> = stmt
.query_map(rusqlite::params![pattern, limit], |row| {
Ok(ArtistSearchResult {
mbid: row.get(0)?,
name: row.get(1)?,
disambiguation: row.get::<_, Option<String>>(2)?.filter(|s| !s.is_empty()),
country: row.get(3)?,
artist_type: row.get(4)?,
score: 100,
})
})
.map_err(|e| DataError::Other(e.to_string()))?
.filter_map(|r| r.ok())
.collect();
Ok(results)
}
async fn get_artist_releases(
&self,
artist_mbid: &str,
limit: u32,
) -> DataResult<Vec<DiscographyEntry>> {
let conn = self.conn.lock().unwrap();
let mut stmt = conn
.prepare(
"SELECT mbid, title, date, status
FROM mb_releases
WHERE artist_mbid = ?1
LIMIT ?2",
)
.map_err(|e| DataError::Other(e.to_string()))?;
let results: Vec<DiscographyEntry> = stmt
.query_map(rusqlite::params![artist_mbid, limit], |row| {
Ok(DiscographyEntry {
mbid: row.get(0)?,
title: row.get(1)?,
date: row.get(2)?,
release_type: row.get(3)?,
track_count: None,
})
})
.map_err(|e| DataError::Other(e.to_string()))?
.filter_map(|r| r.ok())
.collect();
Ok(results)
}
async fn get_release_tracks(&self, release_mbid: &str) -> DataResult<Vec<ReleaseTrack>> {
let conn = self.conn.lock().unwrap();
let mut stmt = conn
.prepare(
"SELECT recording_mbid, title, track_number, disc_number, duration_ms
FROM mb_tracks
WHERE release_mbid = ?1
ORDER BY disc_number, track_number",
)
.map_err(|e| DataError::Other(e.to_string()))?;
let tracks: Vec<ReleaseTrack> = stmt
.query_map(rusqlite::params![release_mbid], |row| {
Ok(ReleaseTrack {
recording_mbid: row.get(0)?,
title: row.get(1)?,
track_number: row.get(2)?,
disc_number: row.get(3)?,
duration_ms: row.get(4)?,
})
})
.map_err(|e| DataError::Other(e.to_string()))?
.filter_map(|r| r.ok())
.collect();
if tracks.is_empty() {
Err(DataError::Other(format!(
"no tracks found for release {release_mbid}"
)))
} else {
Ok(tracks)
}
}
async fn get_artist_release_groups(
&self,
artist_mbid: &str,
) -> DataResult<Vec<ReleaseGroupEntry>> {
let conn = self.conn.lock().unwrap();
let mut stmt = conn
.prepare(
"SELECT rg.mbid, rg.title, rg.primary_type, rg.secondary_types, rg.first_release_date,
(SELECT r.mbid FROM mb_releases r WHERE r.release_group_mbid = rg.mbid LIMIT 1) as first_release_mbid
FROM mb_release_groups rg
WHERE rg.artist_mbid = ?1
ORDER BY rg.first_release_date",
)
.map_err(|e| DataError::Other(e.to_string()))?;
let results: Vec<ReleaseGroupEntry> = stmt
.query_map(rusqlite::params![artist_mbid], |row| {
let secondary_types_json: Option<String> = row.get(3)?;
let secondary_types: Vec<String> = secondary_types_json
.and_then(|s| serde_json::from_str(&s).ok())
.unwrap_or_default();
Ok(ReleaseGroupEntry {
mbid: row.get(0)?,
title: row.get(1)?,
primary_type: row.get(2)?,
secondary_types,
first_release_date: row.get(4)?,
first_release_mbid: row.get(5)?,
})
})
.map_err(|e| DataError::Other(e.to_string()))?
.filter_map(|r| r.ok())
.collect();
Ok(results)
}
async fn resolve_release_from_group(&self, release_group_mbid: &str) -> DataResult<String> {
let conn = self.conn.lock().unwrap();
let result = conn.query_row(
"SELECT mbid FROM mb_releases WHERE release_group_mbid = ?1 LIMIT 1",
rusqlite::params![release_group_mbid],
|row| row.get::<_, String>(0),
);
match result {
Ok(mbid) => Ok(mbid),
Err(rusqlite::Error::QueryReturnedNoRows) => Err(DataError::Other(format!(
"no releases for release-group {release_group_mbid}"
))),
Err(e) => Err(DataError::Other(e.to_string())),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::mb_import;
fn setup_test_db() -> Connection {
let conn = Connection::open_in_memory().unwrap();
mb_import::create_schema(&conn).unwrap();
// Insert test data
conn.execute(
"INSERT INTO mb_artists (mbid, name, sort_name, disambiguation, artist_type, country, begin_year) VALUES ('a-1', 'Test Artist', 'Artist, Test', 'test', 'Person', 'US', 1990)",
[],
).unwrap();
conn.execute(
"INSERT INTO mb_artist_urls (artist_mbid, url, link_type) VALUES ('a-1', 'https://en.wikipedia.org/wiki/Test', 'wikipedia')",
[],
).unwrap();
conn.execute(
"INSERT INTO mb_release_groups (mbid, title, artist_mbid, primary_type, secondary_types, first_release_date) VALUES ('rg-1', 'Test Album', 'a-1', 'Album', NULL, '2020-01-15')",
[],
).unwrap();
conn.execute(
"INSERT INTO mb_releases (mbid, title, release_group_mbid, artist_mbid, date, country, status) VALUES ('r-1', 'Test Album', 'rg-1', 'a-1', '2020-01-15', 'US', 'Official')",
[],
).unwrap();
conn.execute(
"INSERT INTO mb_tracks (release_mbid, recording_mbid, title, track_number, disc_number, duration_ms, position) VALUES ('r-1', 'rec-1', 'Track One', 1, 1, 240000, 1)",
[],
).unwrap();
conn.execute(
"INSERT INTO mb_recordings (mbid, title, artist_mbid, duration_ms) VALUES ('rec-1', 'Track One', 'a-1', 240000)",
[],
).unwrap();
// Insert import metadata
conn.execute(
"INSERT INTO mb_import_meta (key, value) VALUES ('artist_count', '1')",
[],
)
.unwrap();
conn
}
#[test]
fn test_get_artist_info_sync() {
let conn = setup_test_db();
// We can't easily test the struct directly since it wraps a Mutex<Connection>,
// but we can test the SQL works
let (name, disambig): (String, Option<String>) = conn
.query_row(
"SELECT name, disambiguation FROM mb_artists WHERE mbid = 'a-1'",
[],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap();
assert_eq!(name, "Test Artist");
assert_eq!(disambig, Some("test".to_string()));
}
#[test]
fn test_resolve_release_from_group() {
let conn = setup_test_db();
let mbid: String = conn
.query_row(
"SELECT mbid FROM mb_releases WHERE release_group_mbid = 'rg-1' LIMIT 1",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(mbid, "r-1");
}
}