use std::collections::HashMap; use std::sync::Arc; use std::time::Instant; use serde::Deserialize; use tokio::sync::Mutex; use tokio::time::Duration; use crate::error::{DataError, DataResult}; use crate::http::{RateLimiter, escape_lucene, urlencoded}; use crate::traits::MetadataFetcher; use crate::types::{ ArtistInfo, ArtistSearchResult, ArtistUrl, DiscographyEntry, RecordingDetails, RecordingMatch, ReleaseGroupEntry, ReleaseMatch, ReleaseRef, ReleaseTrack, }; const BASE_URL: &str = "https://musicbrainz.org/ws/2"; const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)"; const RATE_LIMIT: Duration = Duration::from_millis(1100); // slightly over 1s to be safe /// How long cached responses stay valid (1 hour). const RESPONSE_CACHE_TTL: Duration = Duration::from_secs(3600); /// A cached HTTP response with a timestamp for expiry. struct CachedResponse { body: String, fetched_at: Instant, } /// MusicBrainz API client with rate limiting and in-memory response caching. pub struct MusicBrainzFetcher { client: reqwest::Client, limiter: RateLimiter, /// In-memory cache: URL -> raw JSON response body. Entries expire after RESPONSE_CACHE_TTL. response_cache: Arc>>, } impl MusicBrainzFetcher { pub fn new() -> DataResult { Self::with_limiter(RateLimiter::new(RATE_LIMIT)) } /// Create a fetcher that shares a rate limiter with other MB clients. pub fn with_limiter(limiter: RateLimiter) -> DataResult { let client = reqwest::Client::builder() .user_agent(USER_AGENT) .timeout(Duration::from_secs(30)) .build()?; Ok(Self { client, limiter, response_cache: Arc::new(Mutex::new(HashMap::new())), }) } /// Get a clone of the rate limiter for sharing with other MB clients. pub fn limiter(&self) -> RateLimiter { self.limiter.clone() } async fn get_json(&self, url: &str) -> DataResult { // Check in-memory cache first { let mut cache = self.response_cache.lock().await; if let Some(entry) = cache.get(url) { if entry.fetched_at.elapsed() < RESPONSE_CACHE_TTL { tracing::debug!(url = url, "MusicBrainz cache hit"); return Ok(serde_json::from_str(&entry.body)?); } else { // Expired — remove it cache.remove(url); } } // Also purge any other expired entries opportunistically (limit to avoid holding lock too long) let expired: Vec = cache .iter() .filter(|(_, v)| v.fetched_at.elapsed() >= RESPONSE_CACHE_TTL) .map(|(k, _)| k.clone()) .take(50) .collect(); for k in expired { cache.remove(&k); } } self.limiter.wait().await; tracing::debug!(url = url, "MusicBrainz request"); let resp = self.client.get(url).send().await?; let status = resp.status(); if !status.is_success() { let body = resp.text().await.unwrap_or_default(); return Err(DataError::Other(format!( "MusicBrainz API error {status}: {body}" ))); } let body = resp.text().await?; // Store in cache { let mut cache = self.response_cache.lock().await; cache.insert( url.to_string(), CachedResponse { body: body.clone(), fetched_at: Instant::now(), }, ); } Ok(serde_json::from_str(&body)?) } /// Look up an artist directly by MBID. Returns (name, disambiguation). pub async fn get_artist_by_mbid(&self, mbid: &str) -> DataResult<(String, Option)> { let url = format!("{BASE_URL}/artist/{mbid}?fmt=json"); let resp: MbArtistLookup = self.get_json(&url).await?; Ok((resp.name, resp.disambiguation.filter(|s| !s.is_empty()))) } /// Look up detailed artist info by MBID, including URLs and metadata. pub async fn get_artist_info(&self, mbid: &str) -> DataResult { let url = format!("{BASE_URL}/artist/{mbid}?inc=url-rels&fmt=json"); let resp: MbArtistFull = self.get_json(&url).await?; let begin_year = resp .life_span .and_then(|ls| ls.begin) .and_then(|d| d.split('-').next().map(String::from)); let urls = resp .relations .unwrap_or_default() .into_iter() .filter_map(|rel| { rel.url.map(|u| ArtistUrl { url: u.resource, link_type: rel.relation_type, }) }) .collect(); Ok(ArtistInfo { name: resp.name, mbid: Some(mbid.to_string()), disambiguation: resp.disambiguation.filter(|s| !s.is_empty()), country: resp.country.filter(|s| !s.is_empty()), artist_type: resp.artist_type, begin_year, urls, }) } /// Resolve a release-group MBID to a release MBID (first release in the group). pub async fn resolve_release_from_group(&self, release_group_mbid: &str) -> DataResult { let url = format!("{BASE_URL}/release?release-group={release_group_mbid}&fmt=json&limit=1"); let resp: serde_json::Value = self.get_json(&url).await?; resp.get("releases") .and_then(|r| r.as_array()) .and_then(|arr| arr.first()) .and_then(|r| r.get("id")) .and_then(|id| id.as_str()) .map(String::from) .ok_or_else(|| { DataError::Other(format!( "no releases for release-group {release_group_mbid}" )) }) } } impl MetadataFetcher for MusicBrainzFetcher { async fn search_recording(&self, artist: &str, title: &str) -> DataResult> { let query = if artist.is_empty() { format!("recording:{}", escape_lucene(title)) } else { format!( "artist:{} AND recording:{}", escape_lucene(artist), escape_lucene(title) ) }; let url = format!( "{BASE_URL}/recording/?query={}&fmt=json&limit=5", urlencoded(&query) ); let resp: MbRecordingSearchResponse = self.get_json(&url).await?; Ok(resp .recordings .into_iter() .map(|r| { let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit); RecordingMatch { mbid: r.id, title: r.title, artist: artist_name, artist_mbid, releases: r .releases .unwrap_or_default() .into_iter() .map(|rel| ReleaseRef { mbid: rel.id, title: rel.title, date: rel.date, track_number: None, }) .collect(), score: r.score.unwrap_or(0), } }) .collect()) } async fn search_release(&self, artist: &str, album: &str) -> DataResult> { let query = if artist.is_empty() { format!("release:{}", escape_lucene(album)) } else { format!( "artist:{} AND release:{}", escape_lucene(artist), escape_lucene(album) ) }; let url = format!( "{BASE_URL}/release/?query={}&fmt=json&limit=5", urlencoded(&query) ); let resp: MbReleaseSearchResponse = self.get_json(&url).await?; Ok(resp .releases .into_iter() .map(|r| { let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit); ReleaseMatch { mbid: r.id, title: r.title, artist: artist_name, artist_mbid, date: r.date, track_count: r.track_count, score: r.score.unwrap_or(0), } }) .collect()) } async fn get_recording(&self, mbid: &str) -> DataResult { let url = format!("{BASE_URL}/recording/{mbid}?inc=artists+releases+genres&fmt=json"); let r: MbRecordingDetail = self.get_json(&url).await?; let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit); Ok(RecordingDetails { mbid: r.id, title: r.title, artist: artist_name, artist_mbid, releases: r .releases .unwrap_or_default() .into_iter() .map(|rel| ReleaseRef { mbid: rel.id, title: rel.title, date: rel.date, track_number: None, }) .collect(), duration_ms: r.length, genres: r .genres .unwrap_or_default() .into_iter() .map(|g| g.name) .collect(), }) } async fn search_artist(&self, query: &str, limit: u32) -> DataResult> { let url = format!( "{BASE_URL}/artist/?query={}&fmt=json&limit={limit}", urlencoded(&escape_lucene(query)) ); let resp: MbArtistSearchResponse = self.get_json(&url).await?; Ok(resp .artists .into_iter() .map(|a| ArtistSearchResult { mbid: a.id, name: a.name, disambiguation: a.disambiguation.filter(|s| !s.is_empty()), country: a.country, artist_type: a.artist_type, score: a.score.unwrap_or(0), }) .collect()) } async fn get_artist_releases( &self, artist_mbid: &str, limit: u32, ) -> DataResult> { let url = format!("{BASE_URL}/release/?artist={artist_mbid}&fmt=json&limit={limit}"); let resp: MbReleaseSearchResponse = self.get_json(&url).await?; Ok(resp .releases .into_iter() .map(|r| DiscographyEntry { mbid: r.id, title: r.title, date: r.date, release_type: None, // release-group type not in this response track_count: r.track_count, }) .collect()) } async fn get_release_tracks(&self, release_mbid: &str) -> DataResult> { let url = format!("{BASE_URL}/release/{release_mbid}?inc=recordings&fmt=json"); let resp: MbReleaseDetail = self.get_json(&url).await?; let mut tracks = Vec::new(); for (disc_idx, medium) in resp.media.unwrap_or_default().into_iter().enumerate() { for track in medium.tracks.unwrap_or_default() { tracks.push(ReleaseTrack { recording_mbid: track.recording.map(|r| r.id).unwrap_or_default(), title: track.title, track_number: track.position, disc_number: Some(disc_idx as i32 + 1), duration_ms: track.length, }); } } Ok(tracks) } async fn get_artist_release_groups( &self, artist_mbid: &str, ) -> DataResult> { // Fetch album, single, and EP release groups let url = format!( "{BASE_URL}/release-group?artist={artist_mbid}&type=album|single|ep&inc=artist-credits&fmt=json&limit=100" ); let resp: MbReleaseGroupResponse = self.get_json(&url).await?; Ok(resp .release_groups .unwrap_or_default() .into_iter() .map(|rg| { let primary = extract_artist_credit(&rg.artist_credit); let featured = primary.1.as_deref() != Some(artist_mbid); ReleaseGroupEntry { mbid: rg.id, title: rg.title, primary_type: rg.primary_type, secondary_types: rg.secondary_types.unwrap_or_default(), first_release_date: rg.first_release_date, first_release_mbid: rg .releases .and_then(|r| r.into_iter().next().map(|rel| rel.id)), featured, } }) .collect()) } async fn resolve_release_from_group(&self, release_group_mbid: &str) -> DataResult { // Delegate to the inherent method MusicBrainzFetcher::resolve_release_from_group(self, release_group_mbid).await } } /// Extract the primary artist from MusicBrainz artist credits. /// Always returns the first/primary artist only -- never concatenates /// collaborators or featured artists into compound names. fn extract_artist_credit(credits: &Option>) -> (String, Option) { match credits { Some(credits) if !credits.is_empty() => { let name = credits[0].artist.name.clone(); let mbid = Some(credits[0].artist.id.clone()); (name, mbid) } _ => ("Unknown Artist".to_string(), None), } } // --- MusicBrainz API response types --- #[derive(Deserialize)] struct MbArtistSearchResponse { artists: Vec, } #[derive(Deserialize)] struct MbArtistResult { id: String, name: String, score: Option, disambiguation: Option, country: Option, #[serde(rename = "type")] artist_type: Option, } #[derive(Deserialize)] struct MbArtistLookup { name: String, disambiguation: Option, } #[derive(Deserialize)] struct MbArtistFull { name: String, disambiguation: Option, country: Option, #[serde(rename = "type")] artist_type: Option, #[serde(rename = "life-span")] life_span: Option, relations: Option>, } #[derive(Deserialize)] struct MbLifeSpan { begin: Option, } #[derive(Deserialize)] struct MbRelation { #[serde(rename = "type")] relation_type: String, url: Option, } #[derive(Deserialize)] struct MbRelationUrl { resource: String, } #[derive(Deserialize)] struct MbRecordingSearchResponse { recordings: Vec, } #[derive(Deserialize)] struct MbRecordingResult { id: String, title: String, score: Option, #[serde(rename = "artist-credit")] artist_credit: Option>, releases: Option>, } #[derive(Deserialize)] struct MbReleaseSearchResponse { releases: Vec, } #[derive(Deserialize)] struct MbReleaseResult { id: String, title: String, score: Option, #[serde(rename = "artist-credit")] artist_credit: Option>, date: Option, #[serde(rename = "track-count")] track_count: Option, } #[derive(Deserialize)] struct MbRecordingDetail { id: String, title: String, #[serde(rename = "artist-credit")] artist_credit: Option>, releases: Option>, length: Option, genres: Option>, } #[derive(Deserialize)] struct MbArtistCredit { artist: MbArtist, } #[derive(Deserialize)] struct MbArtist { id: String, name: String, } #[derive(Deserialize)] struct MbGenre { name: String, } #[derive(Deserialize)] struct MbReleaseDetail { media: Option>, } #[derive(Deserialize)] struct MbMedia { tracks: Option>, } #[derive(Deserialize)] struct MbTrackEntry { title: String, position: Option, length: Option, recording: Option, } #[derive(Deserialize)] struct MbTrackRecording { id: String, } #[derive(Deserialize)] struct MbReleaseGroupResponse { #[serde(rename = "release-groups")] release_groups: Option>, } #[derive(Deserialize)] struct MbReleaseGroup { id: String, title: String, #[serde(rename = "primary-type")] primary_type: Option, #[serde(rename = "secondary-types", default)] secondary_types: Option>, #[serde(rename = "first-release-date")] first_release_date: Option, releases: Option>, #[serde(rename = "artist-credit")] artist_credit: Option>, } #[derive(Deserialize)] struct MbReleaseGroupRelease { id: String, }