diff --git a/Cargo.lock b/Cargo.lock index 9d2ea42..0f86e85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3091,9 +3091,11 @@ dependencies = [ "actix-web", "anyhow", "clap", + "dotenvy", "rand 0.9.2", "serde_json", "shanty-config", + "shanty-data", "shanty-db", "shanty-search", "shanty-tag", @@ -3116,6 +3118,18 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "shanty-data" +version = "0.1.0" +dependencies = [ + "reqwest", + "serde", + "serde_json", + "thiserror", + "tokio", + "tracing", +] + [[package]] name = "shanty-db" version = "0.1.0" @@ -3233,6 +3247,7 @@ dependencies = [ "sea-orm", "serde", "serde_json", + "shanty-data", "shanty-db", "shanty-tag", "thiserror", @@ -3267,6 +3282,7 @@ dependencies = [ "sea-orm", "serde", "serde_json", + "shanty-data", "shanty-db", "strsim", "tempfile", @@ -3319,6 +3335,7 @@ dependencies = [ "serde_json", "serde_yaml", "shanty-config", + "shanty-data", "shanty-db", "shanty-dl", "shanty-index", diff --git a/Cargo.toml b/Cargo.toml index 223661c..3a8bd3b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ members = [ "shanty-playlist", "shanty-serve", "shanty-play", - "shanty-web", + "shanty-web", "shanty-data", ] resolver = "3" @@ -55,6 +55,7 @@ path = "src/main.rs" [dependencies] shanty-config = { path = "shanty-config" } +shanty-data = { path = "shanty-data" } shanty-db = { path = "shanty-db" } shanty-web = { path = "shanty-web" } shanty-tag = { path = "shanty-tag" } @@ -71,3 +72,4 @@ tracing = { workspace = true } tracing-subscriber = { workspace = true } anyhow = { workspace = true } serde_json = { workspace = true } +dotenvy = "0.15" diff --git a/shanty-config/src/lib.rs b/shanty-config/src/lib.rs index b7ee423..9a2ae88 100644 --- a/shanty-config/src/lib.rs +++ b/shanty-config/src/lib.rs @@ -32,6 +32,9 @@ pub struct AppConfig { #[serde(default)] pub indexing: IndexingConfig, + + #[serde(default)] + pub metadata: MetadataConfig, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -94,6 +97,39 @@ pub struct IndexingConfig { pub concurrency: usize, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MetadataConfig { + /// Source for structured metadata: "musicbrainz" (default). + #[serde(default = "default_metadata_source")] + pub metadata_source: String, + + /// Source for artist images: "wikipedia" (default). + #[serde(default = "default_artist_image_source")] + pub artist_image_source: String, + + /// Source for artist bios: "wikipedia" (default) or "lastfm". + #[serde(default = "default_artist_bio_source")] + pub artist_bio_source: String, + + /// Source for lyrics: "lrclib" (default). + #[serde(default = "default_lyrics_source")] + pub lyrics_source: String, + + /// Source for cover art: "coverartarchive" (default). + #[serde(default = "default_cover_art_source")] + pub cover_art_source: String, + + /// Last.fm API key for fetching artist bios. Set via SHANTY_LASTFM_API_KEY env var. + /// Required if artist_bio_source is "lastfm". + #[serde(skip)] + pub lastfm_api_key: Option, + + /// fanart.tv API key for artist images/banners. Set via SHANTY_FANART_API_KEY env var. + /// Required if artist_image_source is "fanarttv". + #[serde(skip)] + pub fanart_api_key: Option, +} + // --- Defaults --- impl Default for AppConfig { @@ -108,6 +144,7 @@ impl Default for AppConfig { tagging: TaggingConfig::default(), download: DownloadConfig::default(), indexing: IndexingConfig::default(), + metadata: MetadataConfig::default(), } } } @@ -154,6 +191,20 @@ impl Default for IndexingConfig { } } +impl Default for MetadataConfig { + fn default() -> Self { + Self { + metadata_source: default_metadata_source(), + artist_image_source: default_artist_image_source(), + artist_bio_source: default_artist_bio_source(), + lyrics_source: default_lyrics_source(), + cover_art_source: default_cover_art_source(), + lastfm_api_key: None, + fanart_api_key: None, + } + } +} + fn default_library_path() -> PathBuf { dirs::audio_dir().unwrap_or_else(|| PathBuf::from("~/Music")) } @@ -206,6 +257,21 @@ fn default_rate_limit_auth() -> u32 { fn default_concurrency() -> usize { 4 } +fn default_metadata_source() -> String { + "musicbrainz".to_string() +} +fn default_artist_image_source() -> String { + "wikipedia".to_string() +} +fn default_artist_bio_source() -> String { + "wikipedia".to_string() +} +fn default_lyrics_source() -> String { + "lrclib".to_string() +} +fn default_cover_art_source() -> String { + "coverartarchive".to_string() +} fn default_cookie_refresh_hours() -> u32 { 6 } @@ -295,6 +361,12 @@ impl AppConfig { if let Ok(v) = std::env::var("SHANTY_WEB_BIND") { config.web.bind = v; } + if let Ok(v) = std::env::var("SHANTY_LASTFM_API_KEY") { + config.metadata.lastfm_api_key = Some(v); + } + if let Ok(v) = std::env::var("SHANTY_FANART_API_KEY") { + config.metadata.fanart_api_key = Some(v); + } config } } diff --git a/shanty-data/Cargo.toml b/shanty-data/Cargo.toml new file mode 100644 index 0000000..9297a81 --- /dev/null +++ b/shanty-data/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "shanty-data" +version.workspace = true +edition.workspace = true +license.workspace = true +description = "External data providers for Shanty — metadata, images, bios, lyrics, cover art" + +[dependencies] +serde = { version = "1", features = ["derive"] } +serde_json = "1" +thiserror = "2" +tracing = "0.1" +tokio = { version = "1", features = ["full"] } +reqwest = { version = "0.12", features = ["json"] } + +[dev-dependencies] +tokio = { version = "1", features = ["full", "test-util"] } diff --git a/shanty-data/src/coverart.rs b/shanty-data/src/coverart.rs new file mode 100644 index 0000000..7770488 --- /dev/null +++ b/shanty-data/src/coverart.rs @@ -0,0 +1,19 @@ +use crate::traits::CoverArtFetcher; + +/// Fetches cover art URLs from the Cover Art Archive. +#[derive(Default)] +pub struct CoverArtArchiveFetcher; + +impl CoverArtArchiveFetcher { + pub fn new() -> Self { + Self + } +} + +impl CoverArtFetcher for CoverArtArchiveFetcher { + fn get_cover_art_url(&self, release_id: &str) -> Option { + Some(format!( + "https://coverartarchive.org/release/{release_id}/front-250" + )) + } +} diff --git a/shanty-data/src/error.rs b/shanty-data/src/error.rs new file mode 100644 index 0000000..f4845cd --- /dev/null +++ b/shanty-data/src/error.rs @@ -0,0 +1,14 @@ +/// Error type for data fetching operations. +#[derive(Debug, thiserror::Error)] +pub enum DataError { + #[error("HTTP error: {0}")] + Http(#[from] reqwest::Error), + + #[error("JSON error: {0}")] + Json(#[from] serde_json::Error), + + #[error("{0}")] + Other(String), +} + +pub type DataResult = Result; diff --git a/shanty-data/src/fanarttv.rs b/shanty-data/src/fanarttv.rs new file mode 100644 index 0000000..1301df5 --- /dev/null +++ b/shanty-data/src/fanarttv.rs @@ -0,0 +1,116 @@ +//! fanart.tv artist image and banner provider. +//! +//! API docs: +//! Endpoint: `GET https://webservice.fanart.tv/v3/music/{musicbrainz_id}?api_key={key}` +//! +//! Returns artist thumbnails, HD backgrounds (banners), logos, and more. + +use crate::error::{DataError, DataResult}; +use crate::http::build_client; +use crate::traits::{ArtistBioFetcher, ArtistImageFetcher}; +use crate::types::ArtistInfo; + +/// fanart.tv image fetcher. Provides artist thumbnails and HD background banners. +pub struct FanartTvFetcher { + client: reqwest::Client, + api_key: String, +} + +impl FanartTvFetcher { + pub fn new(api_key: String) -> DataResult { + let client = build_client("Shanty/0.1.0 (shanty-music-app)", 15)?; + Ok(Self { client, api_key }) + } + + /// Fetch the full fanart.tv response for an artist MBID. + async fn fetch(&self, mbid: &str) -> DataResult> { + let url = format!( + "https://webservice.fanart.tv/v3/music/{mbid}?api_key={}", + self.api_key + ); + + let resp = self.client.get(&url).send().await?; + + if resp.status() == reqwest::StatusCode::NOT_FOUND { + return Ok(None); + } + if !resp.status().is_success() { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + return Err(DataError::Other(format!( + "fanart.tv API error {status}: {body}" + ))); + } + + Ok(Some(resp.json().await?)) + } +} + +impl ArtistImageFetcher for FanartTvFetcher { + async fn get_artist_image(&self, artist_info: &ArtistInfo) -> DataResult> { + let Some(ref mbid) = artist_info.mbid else { + return Ok(None); + }; + + let Some(data) = self.fetch(mbid).await? else { + return Ok(None); + }; + + // Prefer artistthumb, fall back to hdmusiclogo + let url = data + .artistthumb + .as_ref() + .and_then(|imgs| imgs.first()) + .or_else(|| data.hdmusiclogo.as_ref().and_then(|imgs| imgs.first())) + .map(|img| img.url.clone()); + + Ok(url) + } + + async fn get_artist_banner(&self, artist_info: &ArtistInfo) -> DataResult> { + let Some(ref mbid) = artist_info.mbid else { + return Ok(None); + }; + + let Some(data) = self.fetch(mbid).await? else { + return Ok(None); + }; + + // Prefer artistbackground (1920x1080), fall back to musicbanner (1000x185) + let url = data + .artistbackground + .as_ref() + .and_then(|imgs| imgs.first()) + .or_else(|| data.musicbanner.as_ref().and_then(|imgs| imgs.first())) + .map(|img| img.url.clone()); + + Ok(url) + } +} + +// fanart.tv doesn't provide bios — this is a no-op so it can be used as +// a combined image+bio provider without errors. +impl ArtistBioFetcher for FanartTvFetcher { + async fn get_artist_bio(&self, _artist_info: &ArtistInfo) -> DataResult> { + Ok(None) + } +} + +// --- fanart.tv API response types --- + +#[derive(serde::Deserialize)] +struct FanartResponse { + #[serde(default)] + artistthumb: Option>, + #[serde(default)] + artistbackground: Option>, + #[serde(default)] + musicbanner: Option>, + #[serde(default)] + hdmusiclogo: Option>, +} + +#[derive(serde::Deserialize)] +struct FanartImage { + url: String, +} diff --git a/shanty-data/src/http.rs b/shanty-data/src/http.rs new file mode 100644 index 0000000..124b504 --- /dev/null +++ b/shanty-data/src/http.rs @@ -0,0 +1,61 @@ +use std::time::Duration; + +use tokio::sync::Mutex; +use tokio::time::Instant; + +/// A simple rate limiter that enforces a minimum interval between requests. +pub struct RateLimiter { + last_request: Mutex, + interval: Duration, +} + +impl RateLimiter { + pub fn new(interval: Duration) -> Self { + Self { + last_request: Mutex::new(Instant::now() - interval), + interval, + } + } + + /// Wait if needed so we don't exceed the rate limit. + pub async fn wait(&self) { + let mut last = self.last_request.lock().await; + let elapsed = last.elapsed(); + if elapsed < self.interval { + tokio::time::sleep(self.interval - elapsed).await; + } + *last = Instant::now(); + } +} + +/// Build a reqwest client with a custom user agent and timeout. +pub fn build_client(user_agent: &str, timeout_secs: u64) -> reqwest::Result { + reqwest::Client::builder() + .user_agent(user_agent) + .timeout(Duration::from_secs(timeout_secs)) + .build() +} + +/// Simple URL-encode for query parameters. +pub fn urlencoded(s: &str) -> String { + s.replace(' ', "+") + .replace('&', "%26") + .replace('=', "%3D") + .replace('#', "%23") +} + +/// Escape special characters for MusicBrainz Lucene query syntax. +pub fn escape_lucene(s: &str) -> String { + let special = [ + '+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\', + '/', + ]; + let mut result = String::with_capacity(s.len()); + for c in s.chars() { + if special.contains(&c) { + result.push('\\'); + } + result.push(c); + } + result +} diff --git a/shanty-data/src/lastfm.rs b/shanty-data/src/lastfm.rs new file mode 100644 index 0000000..4488aec --- /dev/null +++ b/shanty-data/src/lastfm.rs @@ -0,0 +1,67 @@ +use crate::error::DataResult; +use crate::http::{build_client, urlencoded}; +use crate::traits::ArtistBioFetcher; +use crate::types::ArtistInfo; + +const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)"; + +/// Fetches artist bios from Last.fm. +pub struct LastFmBioFetcher { + api_key: String, + client: reqwest::Client, +} + +impl LastFmBioFetcher { + pub fn new(api_key: String) -> DataResult { + let client = build_client(USER_AGENT, 30)?; + Ok(Self { api_key, client }) + } +} + +impl ArtistBioFetcher for LastFmBioFetcher { + async fn get_artist_bio(&self, artist_info: &ArtistInfo) -> DataResult> { + if self.api_key.is_empty() { + return Ok(None); + } + + let url = format!( + "https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist={}&api_key={}&format=json", + urlencoded(&artist_info.name), + &self.api_key, + ); + + let resp = match self.client.get(&url).send().await { + Ok(r) if r.status().is_success() => r, + _ => return Ok(None), + }; + + let body: serde_json::Value = match resp.json().await { + Ok(v) => v, + Err(_) => return Ok(None), + }; + + let summary = body + .get("artist") + .and_then(|a| a.get("bio")) + .and_then(|b| b.get("summary")) + .and_then(|s| s.as_str()) + .map(strip_html_tags); + + Ok(summary) + } +} + +/// Strip HTML tags from a string with a simple approach. +fn strip_html_tags(s: &str) -> String { + let mut result = String::with_capacity(s.len()); + let mut in_tag = false; + for c in s.chars() { + match c { + '<' => in_tag = true, + '>' => in_tag = false, + _ if !in_tag => result.push(c), + _ => {} + } + } + result.trim().to_string() +} diff --git a/shanty-data/src/lib.rs b/shanty-data/src/lib.rs new file mode 100644 index 0000000..22604c7 --- /dev/null +++ b/shanty-data/src/lib.rs @@ -0,0 +1,20 @@ +pub mod coverart; +pub mod error; +pub mod fanarttv; +pub mod http; +pub mod lastfm; +pub mod lrclib; +pub mod musicbrainz; +pub mod traits; +pub mod types; +pub mod wikipedia; + +pub use coverart::CoverArtArchiveFetcher; +pub use error::{DataError, DataResult}; +pub use fanarttv::FanartTvFetcher; +pub use lastfm::LastFmBioFetcher; +pub use lrclib::LrclibFetcher; +pub use musicbrainz::MusicBrainzFetcher; +pub use traits::*; +pub use types::*; +pub use wikipedia::WikipediaFetcher; diff --git a/shanty-data/src/lrclib.rs b/shanty-data/src/lrclib.rs new file mode 100644 index 0000000..0145dcd --- /dev/null +++ b/shanty-data/src/lrclib.rs @@ -0,0 +1,63 @@ +use crate::error::DataResult; +use crate::http::{build_client, urlencoded}; +use crate::traits::LyricsFetcher; +use crate::types::LyricsResult; + +const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)"; + +/// Fetches lyrics from LRCLIB. +pub struct LrclibFetcher { + client: reqwest::Client, +} + +impl LrclibFetcher { + pub fn new() -> DataResult { + let client = build_client(USER_AGENT, 30)?; + Ok(Self { client }) + } +} + +impl LyricsFetcher for LrclibFetcher { + async fn get_lyrics(&self, artist: &str, title: &str) -> DataResult { + let url = format!( + "https://lrclib.net/api/search?artist_name={}&track_name={}", + urlencoded(artist), + urlencoded(title), + ); + + let resp = self.client.get(&url).send().await?; + + if !resp.status().is_success() { + return Ok(LyricsResult { + found: false, + lyrics: None, + synced_lyrics: None, + }); + } + + let results: Vec = resp.json().await?; + + if let Some(entry) = results.first() { + let plain = entry + .get("plainLyrics") + .and_then(|v| v.as_str()) + .map(String::from); + let synced = entry + .get("syncedLyrics") + .and_then(|v| v.as_str()) + .map(String::from); + + Ok(LyricsResult { + found: plain.is_some() || synced.is_some(), + lyrics: plain, + synced_lyrics: synced, + }) + } else { + Ok(LyricsResult { + found: false, + lyrics: None, + synced_lyrics: None, + }) + } + } +} diff --git a/shanty-data/src/musicbrainz.rs b/shanty-data/src/musicbrainz.rs new file mode 100644 index 0000000..f5b221e --- /dev/null +++ b/shanty-data/src/musicbrainz.rs @@ -0,0 +1,488 @@ +use serde::Deserialize; +use tokio::time::Duration; + +use crate::error::{DataError, DataResult}; +use crate::http::{RateLimiter, escape_lucene, urlencoded}; +use crate::traits::MetadataFetcher; +use crate::types::{ + ArtistInfo, ArtistSearchResult, ArtistUrl, DiscographyEntry, RecordingDetails, RecordingMatch, + ReleaseGroupEntry, ReleaseMatch, ReleaseRef, ReleaseTrack, +}; + +const BASE_URL: &str = "https://musicbrainz.org/ws/2"; +const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)"; +const RATE_LIMIT: Duration = Duration::from_millis(1100); // slightly over 1s to be safe + +/// MusicBrainz API client with rate limiting. +pub struct MusicBrainzFetcher { + client: reqwest::Client, + limiter: RateLimiter, +} + +impl MusicBrainzFetcher { + pub fn new() -> DataResult { + let client = reqwest::Client::builder() + .user_agent(USER_AGENT) + .timeout(Duration::from_secs(30)) + .build()?; + Ok(Self { + client, + limiter: RateLimiter::new(RATE_LIMIT), + }) + } + + async fn get_json(&self, url: &str) -> DataResult { + self.limiter.wait().await; + tracing::debug!(url = url, "MusicBrainz request"); + let resp = self.client.get(url).send().await?; + let status = resp.status(); + if !status.is_success() { + let body = resp.text().await.unwrap_or_default(); + return Err(DataError::Other(format!( + "MusicBrainz API error {status}: {body}" + ))); + } + Ok(resp.json().await?) + } + + /// Look up an artist directly by MBID. Returns (name, disambiguation). + pub async fn get_artist_by_mbid(&self, mbid: &str) -> DataResult<(String, Option)> { + let url = format!("{BASE_URL}/artist/{mbid}?fmt=json"); + let resp: MbArtistLookup = self.get_json(&url).await?; + Ok((resp.name, resp.disambiguation.filter(|s| !s.is_empty()))) + } + + /// Look up detailed artist info by MBID, including URLs and metadata. + pub async fn get_artist_info(&self, mbid: &str) -> DataResult { + let url = format!("{BASE_URL}/artist/{mbid}?inc=url-rels&fmt=json"); + let resp: MbArtistFull = self.get_json(&url).await?; + + let begin_year = resp + .life_span + .and_then(|ls| ls.begin) + .and_then(|d| d.split('-').next().map(String::from)); + + let urls = resp + .relations + .unwrap_or_default() + .into_iter() + .filter_map(|rel| { + rel.url.map(|u| ArtistUrl { + url: u.resource, + link_type: rel.relation_type, + }) + }) + .collect(); + + Ok(ArtistInfo { + name: resp.name, + mbid: Some(mbid.to_string()), + disambiguation: resp.disambiguation.filter(|s| !s.is_empty()), + country: resp.country.filter(|s| !s.is_empty()), + artist_type: resp.artist_type, + begin_year, + urls, + }) + } +} + +impl MetadataFetcher for MusicBrainzFetcher { + async fn search_recording(&self, artist: &str, title: &str) -> DataResult> { + let query = if artist.is_empty() { + format!("recording:{}", escape_lucene(title)) + } else { + format!( + "artist:{} AND recording:{}", + escape_lucene(artist), + escape_lucene(title) + ) + }; + let url = format!( + "{BASE_URL}/recording/?query={}&fmt=json&limit=5", + urlencoded(&query) + ); + let resp: MbRecordingSearchResponse = self.get_json(&url).await?; + + Ok(resp + .recordings + .into_iter() + .map(|r| { + let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit); + RecordingMatch { + mbid: r.id, + title: r.title, + artist: artist_name, + artist_mbid, + releases: r + .releases + .unwrap_or_default() + .into_iter() + .map(|rel| ReleaseRef { + mbid: rel.id, + title: rel.title, + date: rel.date, + track_number: None, + }) + .collect(), + score: r.score.unwrap_or(0), + } + }) + .collect()) + } + + async fn search_release(&self, artist: &str, album: &str) -> DataResult> { + let query = if artist.is_empty() { + format!("release:{}", escape_lucene(album)) + } else { + format!( + "artist:{} AND release:{}", + escape_lucene(artist), + escape_lucene(album) + ) + }; + let url = format!( + "{BASE_URL}/release/?query={}&fmt=json&limit=5", + urlencoded(&query) + ); + let resp: MbReleaseSearchResponse = self.get_json(&url).await?; + + Ok(resp + .releases + .into_iter() + .map(|r| { + let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit); + ReleaseMatch { + mbid: r.id, + title: r.title, + artist: artist_name, + artist_mbid, + date: r.date, + track_count: r.track_count, + score: r.score.unwrap_or(0), + } + }) + .collect()) + } + + async fn get_recording(&self, mbid: &str) -> DataResult { + let url = format!("{BASE_URL}/recording/{mbid}?inc=artists+releases+genres&fmt=json"); + let r: MbRecordingDetail = self.get_json(&url).await?; + + let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit); + let secondary_artists = extract_secondary_artists(&r.artist_credit); + Ok(RecordingDetails { + mbid: r.id, + title: r.title, + artist: artist_name, + artist_mbid, + releases: r + .releases + .unwrap_or_default() + .into_iter() + .map(|rel| ReleaseRef { + mbid: rel.id, + title: rel.title, + date: rel.date, + track_number: None, + }) + .collect(), + duration_ms: r.length, + genres: r + .genres + .unwrap_or_default() + .into_iter() + .map(|g| g.name) + .collect(), + secondary_artists, + }) + } + + async fn search_artist(&self, query: &str, limit: u32) -> DataResult> { + let url = format!( + "{BASE_URL}/artist/?query={}&fmt=json&limit={limit}", + urlencoded(&escape_lucene(query)) + ); + let resp: MbArtistSearchResponse = self.get_json(&url).await?; + + Ok(resp + .artists + .into_iter() + .map(|a| ArtistSearchResult { + mbid: a.id, + name: a.name, + disambiguation: a.disambiguation.filter(|s| !s.is_empty()), + country: a.country, + artist_type: a.artist_type, + score: a.score.unwrap_or(0), + }) + .collect()) + } + + async fn get_artist_releases( + &self, + artist_mbid: &str, + limit: u32, + ) -> DataResult> { + let url = format!("{BASE_URL}/release/?artist={artist_mbid}&fmt=json&limit={limit}"); + let resp: MbReleaseSearchResponse = self.get_json(&url).await?; + + Ok(resp + .releases + .into_iter() + .map(|r| DiscographyEntry { + mbid: r.id, + title: r.title, + date: r.date, + release_type: None, // release-group type not in this response + track_count: r.track_count, + }) + .collect()) + } + + async fn get_release_tracks(&self, release_mbid: &str) -> DataResult> { + let url = format!("{BASE_URL}/release/{release_mbid}?inc=recordings&fmt=json"); + let resp: MbReleaseDetail = self.get_json(&url).await?; + + let mut tracks = Vec::new(); + for (disc_idx, medium) in resp.media.unwrap_or_default().into_iter().enumerate() { + for track in medium.tracks.unwrap_or_default() { + tracks.push(ReleaseTrack { + recording_mbid: track.recording.map(|r| r.id).unwrap_or_default(), + title: track.title, + track_number: track.position, + disc_number: Some(disc_idx as i32 + 1), + duration_ms: track.length, + }); + } + } + + Ok(tracks) + } + + async fn get_artist_release_groups( + &self, + artist_mbid: &str, + ) -> DataResult> { + // Fetch album, single, and EP release groups + let url = format!( + "{BASE_URL}/release-group?artist={artist_mbid}&type=album|single|ep&fmt=json&limit=100" + ); + let resp: MbReleaseGroupResponse = self.get_json(&url).await?; + + Ok(resp + .release_groups + .unwrap_or_default() + .into_iter() + .map(|rg| ReleaseGroupEntry { + mbid: rg.id, + title: rg.title, + primary_type: rg.primary_type, + secondary_types: rg.secondary_types.unwrap_or_default(), + first_release_date: rg.first_release_date, + first_release_mbid: rg + .releases + .and_then(|r| r.into_iter().next().map(|rel| rel.id)), + }) + .collect()) + } +} + +/// Extract the primary artist from MusicBrainz artist credits. +/// Always returns the first/primary artist only -- never concatenates +/// collaborators or featured artists into compound names. +fn extract_artist_credit(credits: &Option>) -> (String, Option) { + match credits { + Some(credits) if !credits.is_empty() => { + let name = credits[0].artist.name.clone(); + let mbid = Some(credits[0].artist.id.clone()); + (name, mbid) + } + _ => ("Unknown Artist".to_string(), None), + } +} + +/// Extract non-featuring secondary artists from MusicBrainz artist credits. +/// Returns (name, mbid) pairs for collaborators that aren't "featuring" credits. +fn extract_secondary_artists(credits: &Option>) -> Vec<(String, String)> { + let Some(credits) = credits else { + return vec![]; + }; + if credits.len() <= 1 { + return vec![]; + } + + // Walk credits after the first. Stop at any "feat"/"ft." joinphrase + // from the PREVIOUS credit (since joinphrase is on the credit BEFORE the next artist). + let mut result = Vec::new(); + for i in 0..credits.len() - 1 { + let jp = credits[i].joinphrase.as_deref().unwrap_or(""); + let lower = jp.to_lowercase(); + if lower.contains("feat") || lower.contains("ft.") { + break; + } + // The next credit is a non-featuring collaborator + let next = &credits[i + 1]; + result.push((next.artist.name.clone(), next.artist.id.clone())); + } + result +} + +// --- MusicBrainz API response types --- + +#[derive(Deserialize)] +struct MbArtistSearchResponse { + artists: Vec, +} + +#[derive(Deserialize)] +struct MbArtistResult { + id: String, + name: String, + score: Option, + disambiguation: Option, + country: Option, + #[serde(rename = "type")] + artist_type: Option, +} + +#[derive(Deserialize)] +struct MbArtistLookup { + name: String, + disambiguation: Option, +} + +#[derive(Deserialize)] +struct MbArtistFull { + name: String, + disambiguation: Option, + country: Option, + #[serde(rename = "type")] + artist_type: Option, + #[serde(rename = "life-span")] + life_span: Option, + relations: Option>, +} + +#[derive(Deserialize)] +struct MbLifeSpan { + begin: Option, +} + +#[derive(Deserialize)] +struct MbRelation { + #[serde(rename = "type")] + relation_type: String, + url: Option, +} + +#[derive(Deserialize)] +struct MbRelationUrl { + resource: String, +} + +#[derive(Deserialize)] +struct MbRecordingSearchResponse { + recordings: Vec, +} + +#[derive(Deserialize)] +struct MbRecordingResult { + id: String, + title: String, + score: Option, + #[serde(rename = "artist-credit")] + artist_credit: Option>, + releases: Option>, +} + +#[derive(Deserialize)] +struct MbReleaseSearchResponse { + releases: Vec, +} + +#[derive(Deserialize)] +struct MbReleaseResult { + id: String, + title: String, + score: Option, + #[serde(rename = "artist-credit")] + artist_credit: Option>, + date: Option, + #[serde(rename = "track-count")] + track_count: Option, +} + +#[derive(Deserialize)] +struct MbRecordingDetail { + id: String, + title: String, + #[serde(rename = "artist-credit")] + artist_credit: Option>, + releases: Option>, + length: Option, + genres: Option>, +} + +#[derive(Deserialize)] +struct MbArtistCredit { + artist: MbArtist, + joinphrase: Option, +} + +#[derive(Deserialize)] +struct MbArtist { + id: String, + name: String, +} + +#[derive(Deserialize)] +struct MbGenre { + name: String, +} + +#[derive(Deserialize)] +struct MbReleaseDetail { + media: Option>, +} + +#[derive(Deserialize)] +struct MbMedia { + tracks: Option>, +} + +#[derive(Deserialize)] +struct MbTrackEntry { + title: String, + position: Option, + length: Option, + recording: Option, +} + +#[derive(Deserialize)] +struct MbTrackRecording { + id: String, +} + +#[derive(Deserialize)] +struct MbReleaseGroupResponse { + #[serde(rename = "release-groups")] + release_groups: Option>, +} + +#[derive(Deserialize)] +struct MbReleaseGroup { + id: String, + title: String, + #[serde(rename = "primary-type")] + primary_type: Option, + #[serde(rename = "secondary-types", default)] + secondary_types: Option>, + #[serde(rename = "first-release-date")] + first_release_date: Option, + releases: Option>, +} + +#[derive(Deserialize)] +struct MbReleaseGroupRelease { + id: String, +} diff --git a/shanty-data/src/traits.rs b/shanty-data/src/traits.rs new file mode 100644 index 0000000..cb5bc8e --- /dev/null +++ b/shanty-data/src/traits.rs @@ -0,0 +1,92 @@ +use std::future::Future; + +use crate::error::DataResult; +use crate::types::{ + ArtistInfo, ArtistSearchResult, DiscographyEntry, LyricsResult, RecordingDetails, + RecordingMatch, ReleaseGroupEntry, ReleaseMatch, ReleaseTrack, +}; + +/// Trait for metadata lookup backends. MusicBrainz is the default implementation; +/// others (Last.fm, Discogs, etc.) can be added later. +pub trait MetadataFetcher: Send + Sync { + fn search_recording( + &self, + artist: &str, + title: &str, + ) -> impl Future>> + Send; + + fn search_release( + &self, + artist: &str, + album: &str, + ) -> impl Future>> + Send; + + fn get_recording( + &self, + mbid: &str, + ) -> impl Future> + Send; + + fn search_artist( + &self, + query: &str, + limit: u32, + ) -> impl Future>> + Send; + + fn get_artist_releases( + &self, + artist_mbid: &str, + limit: u32, + ) -> impl Future>> + Send; + + fn get_release_tracks( + &self, + release_mbid: &str, + ) -> impl Future>> + Send; + + /// Get deduplicated release groups (albums, EPs, singles) for an artist. + fn get_artist_release_groups( + &self, + artist_mbid: &str, + ) -> impl Future>> + Send; +} + +/// Fetches artist image URLs from an external source. +pub trait ArtistImageFetcher: Send + Sync { + /// Thumbnail/profile image for the artist. + fn get_artist_image( + &self, + artist_info: &ArtistInfo, + ) -> impl Future>> + Send; + + /// Wide banner/background image for the artist (e.g., 1920x1080). + /// Returns None by default — providers that don't support banners need not implement this. + fn get_artist_banner( + &self, + artist_info: &ArtistInfo, + ) -> impl Future>> + Send { + let _ = artist_info; + async { Ok(None) } + } +} + +/// Fetches an artist biography from an external source. +pub trait ArtistBioFetcher: Send + Sync { + fn get_artist_bio( + &self, + artist_info: &ArtistInfo, + ) -> impl Future>> + Send; +} + +/// Fetches song lyrics from an external source. +pub trait LyricsFetcher: Send + Sync { + fn get_lyrics( + &self, + artist: &str, + title: &str, + ) -> impl Future> + Send; +} + +/// Fetches cover art URLs for releases. +pub trait CoverArtFetcher: Send + Sync { + fn get_cover_art_url(&self, release_id: &str) -> Option; +} diff --git a/shanty-data/src/types.rs b/shanty-data/src/types.rs new file mode 100644 index 0000000..c8e5389 --- /dev/null +++ b/shanty-data/src/types.rs @@ -0,0 +1,120 @@ +use serde::{Deserialize, Serialize}; + +/// A reference to a release (album) that a recording appears on. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReleaseRef { + pub mbid: String, + pub title: String, + pub date: Option, + pub track_number: Option, +} + +/// A recording match from a search query. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RecordingMatch { + pub mbid: String, + pub title: String, + pub artist: String, + pub artist_mbid: Option, + pub releases: Vec, + /// MusicBrainz API score (0-100). + pub score: u8, +} + +/// A release (album) match from a search query. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReleaseMatch { + pub mbid: String, + pub title: String, + pub artist: String, + pub artist_mbid: Option, + pub date: Option, + pub track_count: Option, + pub score: u8, +} + +/// Full details for a recording, retrieved by MBID. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RecordingDetails { + pub mbid: String, + pub title: String, + pub artist: String, + pub artist_mbid: Option, + pub releases: Vec, + pub duration_ms: Option, + pub genres: Vec, + /// Non-featuring collaborators beyond the primary artist. + #[serde(default)] + pub secondary_artists: Vec<(String, String)>, +} + +/// Detailed artist info from a direct MBID lookup. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ArtistInfo { + pub name: String, + #[serde(default)] + pub mbid: Option, + pub disambiguation: Option, + pub country: Option, + pub artist_type: Option, + pub begin_year: Option, + pub urls: Vec, +} + +/// An external URL linked to an artist on MusicBrainz. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ArtistUrl { + pub url: String, + pub link_type: String, +} + +/// An artist match from a search query. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ArtistSearchResult { + pub mbid: String, + pub name: String, + pub disambiguation: Option, + pub country: Option, + pub artist_type: Option, + pub score: u8, +} + +/// A release entry in an artist's discography. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DiscographyEntry { + pub mbid: String, + pub title: String, + pub date: Option, + pub release_type: Option, + pub track_count: Option, +} + +/// A release group (deduplicated album/EP/single concept). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReleaseGroupEntry { + pub mbid: String, + pub title: String, + pub primary_type: Option, + pub secondary_types: Vec, + pub first_release_date: Option, + /// MBID of the first release in this group (for fetching tracks). + pub first_release_mbid: Option, +} + +/// A track within a release. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReleaseTrack { + pub recording_mbid: String, + pub title: String, + pub track_number: Option, + pub disc_number: Option, + pub duration_ms: Option, +} + +/// Result from a lyrics lookup. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct LyricsResult { + pub found: bool, + pub lyrics: Option, + pub synced_lyrics: Option, +} diff --git a/shanty-data/src/wikipedia.rs b/shanty-data/src/wikipedia.rs new file mode 100644 index 0000000..373689e --- /dev/null +++ b/shanty-data/src/wikipedia.rs @@ -0,0 +1,122 @@ +use crate::error::DataResult; +use crate::http::build_client; +use crate::traits::{ArtistBioFetcher, ArtistImageFetcher}; +use crate::types::ArtistInfo; + +const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)"; + +/// Fetches artist images and bios from Wikipedia/Wikidata. +pub struct WikipediaFetcher { + client: reqwest::Client, +} + +impl WikipediaFetcher { + pub fn new() -> DataResult { + let client = build_client(USER_AGENT, 30)?; + Ok(Self { client }) + } + + /// Find the Wikipedia URL for an artist from their MusicBrainz URLs. + /// Tries a direct Wikipedia link first, then resolves via Wikidata. + async fn resolve_wiki_url(&self, artist_info: &ArtistInfo) -> Option { + // Direct Wikipedia link + if let Some(u) = artist_info.urls.iter().find(|u| u.link_type == "wikipedia") { + return Some(u.url.clone()); + } + + // Resolve via Wikidata + if let Some(wd) = artist_info.urls.iter().find(|u| u.link_type == "wikidata") { + let entity_id = wd.url.split('/').next_back().unwrap_or(""); + return self.resolve_wikidata_to_wikipedia(entity_id).await; + } + + None + } + + /// Resolve a Wikidata entity ID to an English Wikipedia URL. + async fn resolve_wikidata_to_wikipedia(&self, entity_id: &str) -> Option { + if entity_id.is_empty() { + return None; + } + + let url = format!( + "https://www.wikidata.org/w/api.php?action=wbgetentities&ids={entity_id}&props=sitelinks&sitefilter=enwiki&format=json" + ); + + let resp: serde_json::Value = self.client.get(&url).send().await.ok()?.json().await.ok()?; + + let title = resp + .get("entities") + .and_then(|e| e.get(entity_id)) + .and_then(|e| e.get("sitelinks")) + .and_then(|s| s.get("enwiki")) + .and_then(|w| w.get("title")) + .and_then(|t| t.as_str())?; + + Some(format!( + "https://en.wikipedia.org/wiki/{}", + title.replace(' ', "_") + )) + } + + /// Fetch the Wikipedia summary for a given URL, returning (photo_url, bio). + async fn fetch_summary(&self, wiki_url: &str) -> (Option, Option) { + let title = wiki_url.split("/wiki/").nth(1).unwrap_or("").to_string(); + if title.is_empty() { + return (None, None); + } + + let lang = wiki_url + .split("://") + .nth(1) + .and_then(|s| s.split('.').next()) + .unwrap_or("en"); + + let api_url = format!("https://{lang}.wikipedia.org/api/rest_v1/page/summary/{title}"); + + let resp = match self.client.get(&api_url).send().await { + Ok(r) if r.status().is_success() => r, + _ => return (None, None), + }; + + let body: serde_json::Value = match resp.json().await { + Ok(v) => v, + Err(_) => return (None, None), + }; + + let photo_url = body + .get("thumbnail") + .and_then(|t| t.get("source")) + .and_then(|s| s.as_str()) + .map(String::from); + + let bio = body + .get("extract") + .and_then(|e| e.as_str()) + .map(String::from); + + (photo_url, bio) + } +} + +impl ArtistImageFetcher for WikipediaFetcher { + async fn get_artist_image(&self, artist_info: &ArtistInfo) -> DataResult> { + let wiki_url = match self.resolve_wiki_url(artist_info).await { + Some(url) => url, + None => return Ok(None), + }; + let (photo_url, _) = self.fetch_summary(&wiki_url).await; + Ok(photo_url) + } +} + +impl ArtistBioFetcher for WikipediaFetcher { + async fn get_artist_bio(&self, artist_info: &ArtistInfo) -> DataResult> { + let wiki_url = match self.resolve_wiki_url(artist_info).await { + Some(url) => url, + None => return Ok(None), + }; + let (_, bio) = self.fetch_summary(&wiki_url).await; + Ok(bio) + } +} diff --git a/shanty-search b/shanty-search index d358b79..cbd0243 160000 --- a/shanty-search +++ b/shanty-search @@ -1 +1 @@ -Subproject commit d358b79a6bd86dd722d879731b0748e66f49912c +Subproject commit cbd02435160f0eea652f2870c3a54ce3c000a6d6 diff --git a/shanty-tag b/shanty-tag index 3572956..2280e95 160000 --- a/shanty-tag +++ b/shanty-tag @@ -1 +1 @@ -Subproject commit 3572956cde2a222ce4896579fd9e545c5e2b45aa +Subproject commit 2280e9564d7241f5053d73d2cf71de405bdc8d21 diff --git a/shanty-web b/shanty-web index fed86c9..eaaff5f 160000 --- a/shanty-web +++ b/shanty-web @@ -1 +1 @@ -Subproject commit fed86c9e858f55809550ce6c3e421e4c0f4bf5fc +Subproject commit eaaff5f98f8a85ff2572727fc2799ad52bb71520 diff --git a/src/main.rs b/src/main.rs index 1480971..4e80f98 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,9 +6,10 @@ use tracing_actix_web::TracingLogger; use tracing_subscriber::EnvFilter; use shanty_config::AppConfig; +use shanty_data::MusicBrainzFetcher; +use shanty_data::WikipediaFetcher; use shanty_db::Database; use shanty_search::MusicBrainzSearch; -use shanty_tag::MusicBrainzClient; use shanty_web::routes; use shanty_web::state::AppState; @@ -32,6 +33,9 @@ struct Cli { #[actix_web::main] async fn main() -> anyhow::Result<()> { + // Load .env file if present (before anything reads env vars) + dotenvy::dotenv().ok(); + let cli = Cli::parse(); let filter = match cli.verbose { @@ -53,8 +57,9 @@ async fn main() -> anyhow::Result<()> { tracing::info!(url = %config.database_url, "connecting to database"); let db = Database::new(&config.database_url).await?; - let mb_client = MusicBrainzClient::new()?; + let mb_client = MusicBrainzFetcher::new()?; let search = MusicBrainzSearch::new()?; + let wiki_fetcher = WikipediaFetcher::new()?; let bind = format!("{}:{}", config.web.bind, config.web.port); tracing::info!(bind = %bind, "starting server"); @@ -64,6 +69,7 @@ async fn main() -> anyhow::Result<()> { db, mb_client, search, + wiki_fetcher, config: std::sync::Arc::new(tokio::sync::RwLock::new(config)), config_path, tasks: TaskManager::new(),