From 2280e9564d7241f5053d73d2cf71de405bdc8d21 Mon Sep 17 00:00:00 2001 From: Connor Johnstone Date: Fri, 20 Mar 2026 14:52:16 -0400 Subject: [PATCH] Re-organized providers and added a few --- Cargo.toml | 1 + src/error.rs | 3 + src/file_tags.rs | 2 +- src/lib.rs | 18 +- src/matcher.rs | 2 +- src/musicbrainz.rs | 503 --------------------------------------------- src/provider.rs | 156 -------------- src/tagger.rs | 2 +- 8 files changed, 21 insertions(+), 666 deletions(-) delete mode 100644 src/musicbrainz.rs delete mode 100644 src/provider.rs diff --git a/Cargo.toml b/Cargo.toml index 7ba67c1..165d687 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ description = "Metadata tagging via online databases for Shanty" repository = "ssh://connor@git.rcjohnstone.com:2222/Shanty/tag.git" [dependencies] +shanty-data = { path = "../shanty-data" } shanty-db = { path = "../shanty-db" } sea-orm = { version = "1", features = ["sqlx-sqlite", "runtime-tokio-native-tls"] } clap = { version = "4", features = ["derive"] } diff --git a/src/error.rs b/src/error.rs index 7feb55c..c42504a 100644 --- a/src/error.rs +++ b/src/error.rs @@ -11,6 +11,9 @@ pub enum TagError { #[error("HTTP error: {0}")] Http(#[from] reqwest::Error), + #[error("data error: {0}")] + Data(#[from] shanty_data::DataError), + #[error("metadata error: {0}")] Metadata(String), diff --git a/src/file_tags.rs b/src/file_tags.rs index ed80785..2339868 100644 --- a/src/file_tags.rs +++ b/src/file_tags.rs @@ -6,7 +6,7 @@ use lofty::probe::Probe; use lofty::tag::{Accessor, TagExt, TagType}; use crate::error::TagResult; -use crate::provider::{RecordingDetails, ReleaseRef}; +use shanty_data::{RecordingDetails, ReleaseRef}; /// Infer the best tag type for a given file type. fn tag_type_for_file(ft: FileType) -> TagType { diff --git a/src/lib.rs b/src/lib.rs index 5c734b3..5e6cb1e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,11 +8,21 @@ pub mod cleaning; pub mod error; pub mod file_tags; pub mod matcher; -pub mod musicbrainz; -pub mod provider; pub mod tagger; pub use error::{TagError, TagResult}; -pub use musicbrainz::MusicBrainzClient; -pub use provider::MetadataProvider; pub use tagger::{TagConfig, TagStats, run_tagging}; + +// Re-export from shanty-data for backward compatibility. +pub use shanty_data::MetadataFetcher as MetadataProvider; +pub use shanty_data::MusicBrainzFetcher as MusicBrainzClient; + +/// Re-export provider types from shanty-data for backward compatibility. +pub mod provider { + pub use shanty_data::{ + ArtistInfo, ArtistSearchResult, ArtistUrl, DiscographyEntry, RecordingDetails, + RecordingMatch, ReleaseGroupEntry, ReleaseMatch, ReleaseRef, ReleaseTrack, + }; + + pub use shanty_data::MetadataFetcher as MetadataProvider; +} diff --git a/src/matcher.rs b/src/matcher.rs index b8d3909..33c6de0 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -1,7 +1,7 @@ use shanty_db::entities::track; use crate::cleaning::normalize; -use crate::provider::{RecordingMatch, ReleaseRef}; +use shanty_data::{RecordingMatch, ReleaseRef}; /// A scored recording match with the best matching release. #[derive(Debug, Clone)] diff --git a/src/musicbrainz.rs b/src/musicbrainz.rs deleted file mode 100644 index c326df6..0000000 --- a/src/musicbrainz.rs +++ /dev/null @@ -1,503 +0,0 @@ -use serde::Deserialize; -use tokio::sync::Mutex; -use tokio::time::{Duration, Instant}; - -use crate::cleaning::escape_lucene; -use crate::error::{TagError, TagResult}; -use crate::provider::{ - ArtistInfo, ArtistSearchResult, ArtistUrl, DiscographyEntry, MetadataProvider, - RecordingDetails, RecordingMatch, ReleaseGroupEntry, ReleaseMatch, ReleaseRef, ReleaseTrack, -}; - -const BASE_URL: &str = "https://musicbrainz.org/ws/2"; -const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)"; -const RATE_LIMIT: Duration = Duration::from_millis(1100); // slightly over 1s to be safe - -/// MusicBrainz API client with rate limiting. -pub struct MusicBrainzClient { - client: reqwest::Client, - last_request: Mutex, -} - -impl MusicBrainzClient { - pub fn new() -> TagResult { - let client = reqwest::Client::builder() - .user_agent(USER_AGENT) - .timeout(Duration::from_secs(30)) - .build()?; - Ok(Self { - client, - last_request: Mutex::new(Instant::now() - RATE_LIMIT), - }) - } - - /// Enforce rate limiting: wait if needed so we don't exceed 1 req/sec. - async fn rate_limit(&self) { - let mut last = self.last_request.lock().await; - let elapsed = last.elapsed(); - if elapsed < RATE_LIMIT { - tokio::time::sleep(RATE_LIMIT - elapsed).await; - } - *last = Instant::now(); - } - - async fn get_json(&self, url: &str) -> TagResult { - self.rate_limit().await; - tracing::debug!(url = url, "MusicBrainz request"); - let resp = self.client.get(url).send().await?; - let status = resp.status(); - if !status.is_success() { - let body = resp.text().await.unwrap_or_default(); - return Err(TagError::Other(format!( - "MusicBrainz API error {status}: {body}" - ))); - } - Ok(resp.json().await?) - } - - /// Look up an artist directly by MBID. Returns (name, disambiguation). - pub async fn get_artist_by_mbid(&self, mbid: &str) -> TagResult<(String, Option)> { - let url = format!("{BASE_URL}/artist/{mbid}?fmt=json"); - let resp: MbArtistLookup = self.get_json(&url).await?; - Ok((resp.name, resp.disambiguation.filter(|s| !s.is_empty()))) - } - - /// Look up detailed artist info by MBID, including URLs and metadata. - pub async fn get_artist_info(&self, mbid: &str) -> TagResult { - let url = format!("{BASE_URL}/artist/{mbid}?inc=url-rels&fmt=json"); - let resp: MbArtistFull = self.get_json(&url).await?; - - let begin_year = resp - .life_span - .and_then(|ls| ls.begin) - .and_then(|d| d.split('-').next().map(String::from)); - - let urls = resp - .relations - .unwrap_or_default() - .into_iter() - .filter_map(|rel| { - rel.url.map(|u| ArtistUrl { - url: u.resource, - link_type: rel.relation_type, - }) - }) - .collect(); - - Ok(ArtistInfo { - name: resp.name, - disambiguation: resp.disambiguation.filter(|s| !s.is_empty()), - country: resp.country.filter(|s| !s.is_empty()), - artist_type: resp.artist_type, - begin_year, - urls, - }) - } -} - -impl MetadataProvider for MusicBrainzClient { - async fn search_recording(&self, artist: &str, title: &str) -> TagResult> { - let query = if artist.is_empty() { - format!("recording:{}", escape_lucene(title)) - } else { - format!( - "artist:{} AND recording:{}", - escape_lucene(artist), - escape_lucene(title) - ) - }; - let url = format!( - "{BASE_URL}/recording/?query={}&fmt=json&limit=5", - urlencoded(&query) - ); - let resp: MbRecordingSearchResponse = self.get_json(&url).await?; - - Ok(resp - .recordings - .into_iter() - .map(|r| { - let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit); - RecordingMatch { - mbid: r.id, - title: r.title, - artist: artist_name, - artist_mbid, - releases: r - .releases - .unwrap_or_default() - .into_iter() - .map(|rel| ReleaseRef { - mbid: rel.id, - title: rel.title, - date: rel.date, - track_number: None, - }) - .collect(), - score: r.score.unwrap_or(0), - } - }) - .collect()) - } - - async fn search_release(&self, artist: &str, album: &str) -> TagResult> { - let query = if artist.is_empty() { - format!("release:{}", escape_lucene(album)) - } else { - format!( - "artist:{} AND release:{}", - escape_lucene(artist), - escape_lucene(album) - ) - }; - let url = format!( - "{BASE_URL}/release/?query={}&fmt=json&limit=5", - urlencoded(&query) - ); - let resp: MbReleaseSearchResponse = self.get_json(&url).await?; - - Ok(resp - .releases - .into_iter() - .map(|r| { - let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit); - ReleaseMatch { - mbid: r.id, - title: r.title, - artist: artist_name, - artist_mbid, - date: r.date, - track_count: r.track_count, - score: r.score.unwrap_or(0), - } - }) - .collect()) - } - - async fn get_recording(&self, mbid: &str) -> TagResult { - let url = format!("{BASE_URL}/recording/{mbid}?inc=artists+releases+genres&fmt=json"); - let r: MbRecordingDetail = self.get_json(&url).await?; - - let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit); - let secondary_artists = extract_secondary_artists(&r.artist_credit); - Ok(RecordingDetails { - mbid: r.id, - title: r.title, - artist: artist_name, - artist_mbid, - releases: r - .releases - .unwrap_or_default() - .into_iter() - .map(|rel| ReleaseRef { - mbid: rel.id, - title: rel.title, - date: rel.date, - track_number: None, - }) - .collect(), - duration_ms: r.length, - genres: r - .genres - .unwrap_or_default() - .into_iter() - .map(|g| g.name) - .collect(), - secondary_artists, - }) - } - async fn search_artist(&self, query: &str, limit: u32) -> TagResult> { - let url = format!( - "{BASE_URL}/artist/?query={}&fmt=json&limit={limit}", - urlencoded(&escape_lucene(query)) - ); - let resp: MbArtistSearchResponse = self.get_json(&url).await?; - - Ok(resp - .artists - .into_iter() - .map(|a| ArtistSearchResult { - mbid: a.id, - name: a.name, - disambiguation: a.disambiguation.filter(|s| !s.is_empty()), - country: a.country, - artist_type: a.artist_type, - score: a.score.unwrap_or(0), - }) - .collect()) - } - - async fn get_artist_releases( - &self, - artist_mbid: &str, - limit: u32, - ) -> TagResult> { - let url = format!("{BASE_URL}/release/?artist={artist_mbid}&fmt=json&limit={limit}"); - let resp: MbReleaseSearchResponse = self.get_json(&url).await?; - - Ok(resp - .releases - .into_iter() - .map(|r| DiscographyEntry { - mbid: r.id, - title: r.title, - date: r.date, - release_type: None, // release-group type not in this response - track_count: r.track_count, - }) - .collect()) - } - - async fn get_release_tracks(&self, release_mbid: &str) -> TagResult> { - let url = format!("{BASE_URL}/release/{release_mbid}?inc=recordings&fmt=json"); - let resp: MbReleaseDetail = self.get_json(&url).await?; - - let mut tracks = Vec::new(); - for (disc_idx, medium) in resp.media.unwrap_or_default().into_iter().enumerate() { - for track in medium.tracks.unwrap_or_default() { - tracks.push(ReleaseTrack { - recording_mbid: track.recording.map(|r| r.id).unwrap_or_default(), - title: track.title, - track_number: track.position, - disc_number: Some(disc_idx as i32 + 1), - duration_ms: track.length, - }); - } - } - - Ok(tracks) - } - - async fn get_artist_release_groups( - &self, - artist_mbid: &str, - ) -> TagResult> { - // Fetch album, single, and EP release groups - let url = format!( - "{BASE_URL}/release-group?artist={artist_mbid}&type=album|single|ep&fmt=json&limit=100" - ); - let resp: MbReleaseGroupResponse = self.get_json(&url).await?; - - Ok(resp - .release_groups - .unwrap_or_default() - .into_iter() - .map(|rg| ReleaseGroupEntry { - mbid: rg.id, - title: rg.title, - primary_type: rg.primary_type, - secondary_types: rg.secondary_types.unwrap_or_default(), - first_release_date: rg.first_release_date, - first_release_mbid: rg - .releases - .and_then(|r| r.into_iter().next().map(|rel| rel.id)), - }) - .collect()) - } -} - -/// Extract the primary artist from MusicBrainz artist credits. -/// Always returns the first/primary artist only — never concatenates -/// collaborators or featured artists into compound names. -fn extract_artist_credit(credits: &Option>) -> (String, Option) { - match credits { - Some(credits) if !credits.is_empty() => { - let name = credits[0].artist.name.clone(); - let mbid = Some(credits[0].artist.id.clone()); - (name, mbid) - } - _ => ("Unknown Artist".to_string(), None), - } -} - -/// Extract non-featuring secondary artists from MusicBrainz artist credits. -/// Returns (name, mbid) pairs for collaborators that aren't "featuring" credits. -fn extract_secondary_artists(credits: &Option>) -> Vec<(String, String)> { - let Some(credits) = credits else { - return vec![]; - }; - if credits.len() <= 1 { - return vec![]; - } - - // Walk credits after the first. Stop at any "feat"/"ft." joinphrase - // from the PREVIOUS credit (since joinphrase is on the credit BEFORE the next artist). - let mut result = Vec::new(); - for i in 0..credits.len() - 1 { - let jp = credits[i].joinphrase.as_deref().unwrap_or(""); - let lower = jp.to_lowercase(); - if lower.contains("feat") || lower.contains("ft.") { - break; - } - // The next credit is a non-featuring collaborator - let next = &credits[i + 1]; - result.push((next.artist.name.clone(), next.artist.id.clone())); - } - result -} - -fn urlencoded(s: &str) -> String { - s.replace(' ', "+") - .replace('&', "%26") - .replace('=', "%3D") - .replace('#', "%23") -} - -// --- MusicBrainz API response types --- - -#[derive(Deserialize)] -struct MbArtistSearchResponse { - artists: Vec, -} - -#[derive(Deserialize)] -struct MbArtistResult { - id: String, - name: String, - score: Option, - disambiguation: Option, - country: Option, - #[serde(rename = "type")] - artist_type: Option, -} - -#[derive(Deserialize)] -struct MbArtistLookup { - name: String, - disambiguation: Option, -} - -#[derive(Deserialize)] -struct MbArtistFull { - name: String, - disambiguation: Option, - country: Option, - #[serde(rename = "type")] - artist_type: Option, - #[serde(rename = "life-span")] - life_span: Option, - relations: Option>, -} - -#[derive(Deserialize)] -struct MbLifeSpan { - begin: Option, -} - -#[derive(Deserialize)] -struct MbRelation { - #[serde(rename = "type")] - relation_type: String, - url: Option, -} - -#[derive(Deserialize)] -struct MbRelationUrl { - resource: String, -} - -#[derive(Deserialize)] -struct MbRecordingSearchResponse { - recordings: Vec, -} - -#[derive(Deserialize)] -struct MbRecordingResult { - id: String, - title: String, - score: Option, - #[serde(rename = "artist-credit")] - artist_credit: Option>, - releases: Option>, -} - -#[derive(Deserialize)] -struct MbReleaseSearchResponse { - releases: Vec, -} - -#[derive(Deserialize)] -struct MbReleaseResult { - id: String, - title: String, - score: Option, - #[serde(rename = "artist-credit")] - artist_credit: Option>, - date: Option, - #[serde(rename = "track-count")] - track_count: Option, -} - -#[derive(Deserialize)] -struct MbRecordingDetail { - id: String, - title: String, - #[serde(rename = "artist-credit")] - artist_credit: Option>, - releases: Option>, - length: Option, - genres: Option>, -} - -#[derive(Deserialize)] -struct MbArtistCredit { - artist: MbArtist, - joinphrase: Option, -} - -#[derive(Deserialize)] -struct MbArtist { - id: String, - name: String, -} - -#[derive(Deserialize)] -struct MbGenre { - name: String, -} - -#[derive(Deserialize)] -struct MbReleaseDetail { - media: Option>, -} - -#[derive(Deserialize)] -struct MbMedia { - tracks: Option>, -} - -#[derive(Deserialize)] -struct MbTrackEntry { - title: String, - position: Option, - length: Option, - recording: Option, -} - -#[derive(Deserialize)] -struct MbTrackRecording { - id: String, -} - -#[derive(Deserialize)] -struct MbReleaseGroupResponse { - #[serde(rename = "release-groups")] - release_groups: Option>, -} - -#[derive(Deserialize)] -struct MbReleaseGroup { - id: String, - title: String, - #[serde(rename = "primary-type")] - primary_type: Option, - #[serde(rename = "secondary-types", default)] - secondary_types: Option>, - #[serde(rename = "first-release-date")] - first_release_date: Option, - releases: Option>, -} - -#[derive(Deserialize)] -struct MbReleaseGroupRelease { - id: String, -} diff --git a/src/provider.rs b/src/provider.rs deleted file mode 100644 index 6425216..0000000 --- a/src/provider.rs +++ /dev/null @@ -1,156 +0,0 @@ -use serde::{Deserialize, Serialize}; - -use crate::error::TagResult; - -/// A reference to a release (album) that a recording appears on. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ReleaseRef { - pub mbid: String, - pub title: String, - pub date: Option, - pub track_number: Option, -} - -/// A recording match from a search query. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RecordingMatch { - pub mbid: String, - pub title: String, - pub artist: String, - pub artist_mbid: Option, - pub releases: Vec, - /// MusicBrainz API score (0-100). - pub score: u8, -} - -/// A release (album) match from a search query. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ReleaseMatch { - pub mbid: String, - pub title: String, - pub artist: String, - pub artist_mbid: Option, - pub date: Option, - pub track_count: Option, - pub score: u8, -} - -/// Full details for a recording, retrieved by MBID. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RecordingDetails { - pub mbid: String, - pub title: String, - pub artist: String, - pub artist_mbid: Option, - pub releases: Vec, - pub duration_ms: Option, - pub genres: Vec, - /// Non-featuring collaborators beyond the primary artist. - #[serde(default)] - pub secondary_artists: Vec<(String, String)>, -} - -/// Detailed artist info from a direct MBID lookup. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ArtistInfo { - pub name: String, - pub disambiguation: Option, - pub country: Option, - pub artist_type: Option, - pub begin_year: Option, - pub urls: Vec, -} - -/// An external URL linked to an artist on MusicBrainz. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ArtistUrl { - pub url: String, - pub link_type: String, -} - -/// An artist match from a search query. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ArtistSearchResult { - pub mbid: String, - pub name: String, - pub disambiguation: Option, - pub country: Option, - pub artist_type: Option, - pub score: u8, -} - -/// A release entry in an artist's discography. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DiscographyEntry { - pub mbid: String, - pub title: String, - pub date: Option, - pub release_type: Option, - pub track_count: Option, -} - -/// Trait for metadata lookup backends. MusicBrainz is the default implementation; -/// others (Last.fm, Discogs, etc.) can be added later. -pub trait MetadataProvider: Send + Sync { - fn search_recording( - &self, - artist: &str, - title: &str, - ) -> impl std::future::Future>> + Send; - - fn search_release( - &self, - artist: &str, - album: &str, - ) -> impl std::future::Future>> + Send; - - fn get_recording( - &self, - mbid: &str, - ) -> impl std::future::Future> + Send; - - fn search_artist( - &self, - query: &str, - limit: u32, - ) -> impl std::future::Future>> + Send; - - fn get_artist_releases( - &self, - artist_mbid: &str, - limit: u32, - ) -> impl std::future::Future>> + Send; - - fn get_release_tracks( - &self, - release_mbid: &str, - ) -> impl std::future::Future>> + Send; - - /// Get deduplicated release groups (albums, EPs, singles) for an artist. - fn get_artist_release_groups( - &self, - artist_mbid: &str, - ) -> impl std::future::Future>> + Send; -} - -/// A release group (deduplicated album/EP/single concept). -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ReleaseGroupEntry { - pub mbid: String, - pub title: String, - pub primary_type: Option, - pub secondary_types: Vec, - pub first_release_date: Option, - /// MBID of the first release in this group (for fetching tracks). - pub first_release_mbid: Option, -} - -/// A track within a release. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ReleaseTrack { - pub recording_mbid: String, - pub title: String, - pub track_number: Option, - pub disc_number: Option, - pub duration_ms: Option, -} diff --git a/src/tagger.rs b/src/tagger.rs index ec8f29b..0fd1631 100644 --- a/src/tagger.rs +++ b/src/tagger.rs @@ -8,7 +8,7 @@ use shanty_db::queries; use crate::error::TagResult; use crate::file_tags; use crate::matcher::{self, ScoredMatch}; -use crate::provider::MetadataProvider; +use shanty_data::MetadataFetcher as MetadataProvider; /// Configuration for a tagging operation. pub struct TagConfig {