Re-organized providers and added a few

2026-03-20 14:52:16 -04:00
parent d3f4dc33d5
commit 4ec47252d9
19 changed files with 1302 additions and 6 deletions
@@ -3091,9 +3091,11 @@ dependencies = [
 "actix-web",
 "anyhow",
 "clap",
 "dotenvy",
 "rand 0.9.2",
 "serde_json",
 "shanty-config",
 "shanty-data",
 "shanty-db",
 "shanty-search",
 "shanty-tag",
@@ -3116,6 +3118,18 @@ dependencies = [
 "tracing-subscriber",
 ]
 [[package]]
 name = "shanty-data"
 version = "0.1.0"
 dependencies = [
 "reqwest",
 "serde",
 "serde_json",
 "thiserror",
 "tokio",
 "tracing",
 ]
 [[package]]
 name = "shanty-db"
 version = "0.1.0"
@@ -3233,6 +3247,7 @@ dependencies = [
 "sea-orm",
 "serde",
 "serde_json",
 "shanty-data",
 "shanty-db",
 "shanty-tag",
 "thiserror",
@@ -3267,6 +3282,7 @@ dependencies = [
 "sea-orm",
 "serde",
 "serde_json",
 "shanty-data",
 "shanty-db",
 "strsim",
 "tempfile",
@@ -3319,6 +3335,7 @@ dependencies = [
 "serde_json",
 "serde_yaml",
 "shanty-config",
 "shanty-data",
 "shanty-db",
 "shanty-dl",
 "shanty-index",
@@ -13,7 +13,7 @@ members = [
    "shanty-playlist",
    "shanty-serve",
    "shanty-play",
-    "shanty-web",
+    "shanty-web", "shanty-data",
 ]
 resolver = "3"
@@ -55,6 +55,7 @@ path = "src/main.rs"
 [dependencies]
 shanty-config = { path = "shanty-config" }
 shanty-data = { path = "shanty-data" }
 shanty-db = { path = "shanty-db" }
 shanty-web = { path = "shanty-web" }
 shanty-tag = { path = "shanty-tag" }
@@ -71,3 +72,4 @@ tracing = { workspace = true }
 tracing-subscriber = { workspace = true }
 anyhow = { workspace = true }
 serde_json = { workspace = true }
 dotenvy = "0.15"
@@ -32,6 +32,9 @@ pub struct AppConfig {
    #[serde(default)]
    pub indexing: IndexingConfig,
    #[serde(default)]
    pub metadata: MetadataConfig,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -94,6 +97,39 @@ pub struct IndexingConfig {
    pub concurrency: usize,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct MetadataConfig {
    /// Source for structured metadata: "musicbrainz" (default).
    #[serde(default = "default_metadata_source")]
    pub metadata_source: String,
    /// Source for artist images: "wikipedia" (default).
    #[serde(default = "default_artist_image_source")]
    pub artist_image_source: String,
    /// Source for artist bios: "wikipedia" (default) or "lastfm".
    #[serde(default = "default_artist_bio_source")]
    pub artist_bio_source: String,
    /// Source for lyrics: "lrclib" (default).
    #[serde(default = "default_lyrics_source")]
    pub lyrics_source: String,
    /// Source for cover art: "coverartarchive" (default).
    #[serde(default = "default_cover_art_source")]
    pub cover_art_source: String,
    /// Last.fm API key for fetching artist bios. Set via SHANTY_LASTFM_API_KEY env var.
    /// Required if artist_bio_source is "lastfm".
    #[serde(skip)]
    pub lastfm_api_key: Option<String>,
    /// fanart.tv API key for artist images/banners. Set via SHANTY_FANART_API_KEY env var.
    /// Required if artist_image_source is "fanarttv".
    #[serde(skip)]
    pub fanart_api_key: Option<String>,
 }
 // --- Defaults ---
 impl Default for AppConfig {
@@ -108,6 +144,7 @@ impl Default for AppConfig {
            tagging: TaggingConfig::default(),
            download: DownloadConfig::default(),
            indexing: IndexingConfig::default(),
            metadata: MetadataConfig::default(),
        }
    }
 }
@@ -154,6 +191,20 @@ impl Default for IndexingConfig {
    }
 }
 impl Default for MetadataConfig {
    fn default() -> Self {
        Self {
            metadata_source: default_metadata_source(),
            artist_image_source: default_artist_image_source(),
            artist_bio_source: default_artist_bio_source(),
            lyrics_source: default_lyrics_source(),
            cover_art_source: default_cover_art_source(),
            lastfm_api_key: None,
            fanart_api_key: None,
        }
    }
 }
 fn default_library_path() -> PathBuf {
    dirs::audio_dir().unwrap_or_else(|| PathBuf::from("~/Music"))
 }
@@ -206,6 +257,21 @@ fn default_rate_limit_auth() -> u32 {
 fn default_concurrency() -> usize {
    4
 }
 fn default_metadata_source() -> String {
    "musicbrainz".to_string()
 }
 fn default_artist_image_source() -> String {
    "wikipedia".to_string()
 }
 fn default_artist_bio_source() -> String {
    "wikipedia".to_string()
 }
 fn default_lyrics_source() -> String {
    "lrclib".to_string()
 }
 fn default_cover_art_source() -> String {
    "coverartarchive".to_string()
 }
 fn default_cookie_refresh_hours() -> u32 {
    6
 }
@@ -295,6 +361,12 @@ impl AppConfig {
        if let Ok(v) = std::env::var("SHANTY_WEB_BIND") {
            config.web.bind = v;
        }
        if let Ok(v) = std::env::var("SHANTY_LASTFM_API_KEY") {
            config.metadata.lastfm_api_key = Some(v);
        }
        if let Ok(v) = std::env::var("SHANTY_FANART_API_KEY") {
            config.metadata.fanart_api_key = Some(v);
        }
        config
    }
 }
@@ -0,0 +1,17 @@
 [package]
 name = "shanty-data"
 version.workspace = true
 edition.workspace = true
 license.workspace = true
 description = "External data providers for Shanty — metadata, images, bios, lyrics, cover art"
 [dependencies]
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 thiserror = "2"
 tracing = "0.1"
 tokio = { version = "1", features = ["full"] }
 reqwest = { version = "0.12", features = ["json"] }
 [dev-dependencies]
 tokio = { version = "1", features = ["full", "test-util"] }
@@ -0,0 +1,19 @@
 use crate::traits::CoverArtFetcher;
 /// Fetches cover art URLs from the Cover Art Archive.
 #[derive(Default)]
 pub struct CoverArtArchiveFetcher;
 impl CoverArtArchiveFetcher {
    pub fn new() -> Self {
        Self
    }
 }
 impl CoverArtFetcher for CoverArtArchiveFetcher {
    fn get_cover_art_url(&self, release_id: &str) -> Option<String> {
        Some(format!(
            "https://coverartarchive.org/release/{release_id}/front-250"
        ))
    }
 }
@@ -0,0 +1,14 @@
 /// Error type for data fetching operations.
 #[derive(Debug, thiserror::Error)]
 pub enum DataError {
    #[error("HTTP error: {0}")]
    Http(#[from] reqwest::Error),
    #[error("JSON error: {0}")]
    Json(#[from] serde_json::Error),
    #[error("{0}")]
    Other(String),
 }
 pub type DataResult<T> = Result<T, DataError>;
@@ -0,0 +1,116 @@
 //! fanart.tv artist image and banner provider.
 //!
 //! API docs: <https://fanart.tv/get-an-api-key/>
 //! Endpoint: `GET https://webservice.fanart.tv/v3/music/{musicbrainz_id}?api_key={key}`
 //!
 //! Returns artist thumbnails, HD backgrounds (banners), logos, and more.
 use crate::error::{DataError, DataResult};
 use crate::http::build_client;
 use crate::traits::{ArtistBioFetcher, ArtistImageFetcher};
 use crate::types::ArtistInfo;
 /// fanart.tv image fetcher. Provides artist thumbnails and HD background banners.
 pub struct FanartTvFetcher {
    client: reqwest::Client,
    api_key: String,
 }
 impl FanartTvFetcher {
    pub fn new(api_key: String) -> DataResult<Self> {
        let client = build_client("Shanty/0.1.0 (shanty-music-app)", 15)?;
        Ok(Self { client, api_key })
    }
    /// Fetch the full fanart.tv response for an artist MBID.
    async fn fetch(&self, mbid: &str) -> DataResult<Option<FanartResponse>> {
        let url = format!(
            "https://webservice.fanart.tv/v3/music/{mbid}?api_key={}",
            self.api_key
        );
        let resp = self.client.get(&url).send().await?;
        if resp.status() == reqwest::StatusCode::NOT_FOUND {
            return Ok(None);
        }
        if !resp.status().is_success() {
            let status = resp.status();
            let body = resp.text().await.unwrap_or_default();
            return Err(DataError::Other(format!(
                "fanart.tv API error {status}: {body}"
            )));
        }
        Ok(Some(resp.json().await?))
    }
 }
 impl ArtistImageFetcher for FanartTvFetcher {
    async fn get_artist_image(&self, artist_info: &ArtistInfo) -> DataResult<Option<String>> {
        let Some(ref mbid) = artist_info.mbid else {
            return Ok(None);
        };
        let Some(data) = self.fetch(mbid).await? else {
            return Ok(None);
        };
        // Prefer artistthumb, fall back to hdmusiclogo
        let url = data
            .artistthumb
            .as_ref()
            .and_then(|imgs| imgs.first())
            .or_else(|| data.hdmusiclogo.as_ref().and_then(|imgs| imgs.first()))
            .map(|img| img.url.clone());
        Ok(url)
    }
    async fn get_artist_banner(&self, artist_info: &ArtistInfo) -> DataResult<Option<String>> {
        let Some(ref mbid) = artist_info.mbid else {
            return Ok(None);
        };
        let Some(data) = self.fetch(mbid).await? else {
            return Ok(None);
        };
        // Prefer artistbackground (1920x1080), fall back to musicbanner (1000x185)
        let url = data
            .artistbackground
            .as_ref()
            .and_then(|imgs| imgs.first())
            .or_else(|| data.musicbanner.as_ref().and_then(|imgs| imgs.first()))
            .map(|img| img.url.clone());
        Ok(url)
    }
 }
 // fanart.tv doesn't provide bios — this is a no-op so it can be used as
 // a combined image+bio provider without errors.
 impl ArtistBioFetcher for FanartTvFetcher {
    async fn get_artist_bio(&self, _artist_info: &ArtistInfo) -> DataResult<Option<String>> {
        Ok(None)
    }
 }
 // --- fanart.tv API response types ---
 #[derive(serde::Deserialize)]
 struct FanartResponse {
    #[serde(default)]
    artistthumb: Option<Vec<FanartImage>>,
    #[serde(default)]
    artistbackground: Option<Vec<FanartImage>>,
    #[serde(default)]
    musicbanner: Option<Vec<FanartImage>>,
    #[serde(default)]
    hdmusiclogo: Option<Vec<FanartImage>>,
 }
 #[derive(serde::Deserialize)]
 struct FanartImage {
    url: String,
 }
@@ -0,0 +1,61 @@
 use std::time::Duration;
 use tokio::sync::Mutex;
 use tokio::time::Instant;
 /// A simple rate limiter that enforces a minimum interval between requests.
 pub struct RateLimiter {
    last_request: Mutex<Instant>,
    interval: Duration,
 }
 impl RateLimiter {
    pub fn new(interval: Duration) -> Self {
        Self {
            last_request: Mutex::new(Instant::now() - interval),
            interval,
        }
    }
    /// Wait if needed so we don't exceed the rate limit.
    pub async fn wait(&self) {
        let mut last = self.last_request.lock().await;
        let elapsed = last.elapsed();
        if elapsed < self.interval {
            tokio::time::sleep(self.interval - elapsed).await;
        }
        *last = Instant::now();
    }
 }
 /// Build a reqwest client with a custom user agent and timeout.
 pub fn build_client(user_agent: &str, timeout_secs: u64) -> reqwest::Result<reqwest::Client> {
    reqwest::Client::builder()
        .user_agent(user_agent)
        .timeout(Duration::from_secs(timeout_secs))
        .build()
 }
 /// Simple URL-encode for query parameters.
 pub fn urlencoded(s: &str) -> String {
    s.replace(' ', "+")
        .replace('&', "%26")
        .replace('=', "%3D")
        .replace('#', "%23")
 }
 /// Escape special characters for MusicBrainz Lucene query syntax.
 pub fn escape_lucene(s: &str) -> String {
    let special = [
        '+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\',
        '/',
    ];
    let mut result = String::with_capacity(s.len());
    for c in s.chars() {
        if special.contains(&c) {
            result.push('\\');
        }
        result.push(c);
    }
    result
 }
@@ -0,0 +1,67 @@
 use crate::error::DataResult;
 use crate::http::{build_client, urlencoded};
 use crate::traits::ArtistBioFetcher;
 use crate::types::ArtistInfo;
 const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
 /// Fetches artist bios from Last.fm.
 pub struct LastFmBioFetcher {
    api_key: String,
    client: reqwest::Client,
 }
 impl LastFmBioFetcher {
    pub fn new(api_key: String) -> DataResult<Self> {
        let client = build_client(USER_AGENT, 30)?;
        Ok(Self { api_key, client })
    }
 }
 impl ArtistBioFetcher for LastFmBioFetcher {
    async fn get_artist_bio(&self, artist_info: &ArtistInfo) -> DataResult<Option<String>> {
        if self.api_key.is_empty() {
            return Ok(None);
        }
        let url = format!(
            "https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist={}&api_key={}&format=json",
            urlencoded(&artist_info.name),
            &self.api_key,
        );
        let resp = match self.client.get(&url).send().await {
            Ok(r) if r.status().is_success() => r,
            _ => return Ok(None),
        };
        let body: serde_json::Value = match resp.json().await {
            Ok(v) => v,
            Err(_) => return Ok(None),
        };
        let summary = body
            .get("artist")
            .and_then(|a| a.get("bio"))
            .and_then(|b| b.get("summary"))
            .and_then(|s| s.as_str())
            .map(strip_html_tags);
        Ok(summary)
    }
 }
 /// Strip HTML tags from a string with a simple approach.
 fn strip_html_tags(s: &str) -> String {
    let mut result = String::with_capacity(s.len());
    let mut in_tag = false;
    for c in s.chars() {
        match c {
            '<' => in_tag = true,
            '>' => in_tag = false,
            _ if !in_tag => result.push(c),
            _ => {}
        }
    }
    result.trim().to_string()
 }
@@ -0,0 +1,20 @@
 pub mod coverart;
 pub mod error;
 pub mod fanarttv;
 pub mod http;
 pub mod lastfm;
 pub mod lrclib;
 pub mod musicbrainz;
 pub mod traits;
 pub mod types;
 pub mod wikipedia;
 pub use coverart::CoverArtArchiveFetcher;
 pub use error::{DataError, DataResult};
 pub use fanarttv::FanartTvFetcher;
 pub use lastfm::LastFmBioFetcher;
 pub use lrclib::LrclibFetcher;
 pub use musicbrainz::MusicBrainzFetcher;
 pub use traits::*;
 pub use types::*;
 pub use wikipedia::WikipediaFetcher;
@@ -0,0 +1,63 @@
 use crate::error::DataResult;
 use crate::http::{build_client, urlencoded};
 use crate::traits::LyricsFetcher;
 use crate::types::LyricsResult;
 const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
 /// Fetches lyrics from LRCLIB.
 pub struct LrclibFetcher {
    client: reqwest::Client,
 }
 impl LrclibFetcher {
    pub fn new() -> DataResult<Self> {
        let client = build_client(USER_AGENT, 30)?;
        Ok(Self { client })
    }
 }
 impl LyricsFetcher for LrclibFetcher {
    async fn get_lyrics(&self, artist: &str, title: &str) -> DataResult<LyricsResult> {
        let url = format!(
            "https://lrclib.net/api/search?artist_name={}&track_name={}",
            urlencoded(artist),
            urlencoded(title),
        );
        let resp = self.client.get(&url).send().await?;
        if !resp.status().is_success() {
            return Ok(LyricsResult {
                found: false,
                lyrics: None,
                synced_lyrics: None,
            });
        }
        let results: Vec<serde_json::Value> = resp.json().await?;
        if let Some(entry) = results.first() {
            let plain = entry
                .get("plainLyrics")
                .and_then(|v| v.as_str())
                .map(String::from);
            let synced = entry
                .get("syncedLyrics")
                .and_then(|v| v.as_str())
                .map(String::from);
            Ok(LyricsResult {
                found: plain.is_some() || synced.is_some(),
                lyrics: plain,
                synced_lyrics: synced,
            })
        } else {
            Ok(LyricsResult {
                found: false,
                lyrics: None,
                synced_lyrics: None,
            })
        }
    }
 }
@@ -0,0 +1,488 @@
 use serde::Deserialize;
 use tokio::time::Duration;
 use crate::error::{DataError, DataResult};
 use crate::http::{RateLimiter, escape_lucene, urlencoded};
 use crate::traits::MetadataFetcher;
 use crate::types::{
    ArtistInfo, ArtistSearchResult, ArtistUrl, DiscographyEntry, RecordingDetails, RecordingMatch,
    ReleaseGroupEntry, ReleaseMatch, ReleaseRef, ReleaseTrack,
 };
 const BASE_URL: &str = "https://musicbrainz.org/ws/2";
 const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
 const RATE_LIMIT: Duration = Duration::from_millis(1100); // slightly over 1s to be safe
 /// MusicBrainz API client with rate limiting.
 pub struct MusicBrainzFetcher {
    client: reqwest::Client,
    limiter: RateLimiter,
 }
 impl MusicBrainzFetcher {
    pub fn new() -> DataResult<Self> {
        let client = reqwest::Client::builder()
            .user_agent(USER_AGENT)
            .timeout(Duration::from_secs(30))
            .build()?;
        Ok(Self {
            client,
            limiter: RateLimiter::new(RATE_LIMIT),
        })
    }
    async fn get_json<T: serde::de::DeserializeOwned>(&self, url: &str) -> DataResult<T> {
        self.limiter.wait().await;
        tracing::debug!(url = url, "MusicBrainz request");
        let resp = self.client.get(url).send().await?;
        let status = resp.status();
        if !status.is_success() {
            let body = resp.text().await.unwrap_or_default();
            return Err(DataError::Other(format!(
                "MusicBrainz API error {status}: {body}"
            )));
        }
        Ok(resp.json().await?)
    }
    /// Look up an artist directly by MBID. Returns (name, disambiguation).
    pub async fn get_artist_by_mbid(&self, mbid: &str) -> DataResult<(String, Option<String>)> {
        let url = format!("{BASE_URL}/artist/{mbid}?fmt=json");
        let resp: MbArtistLookup = self.get_json(&url).await?;
        Ok((resp.name, resp.disambiguation.filter(|s| !s.is_empty())))
    }
    /// Look up detailed artist info by MBID, including URLs and metadata.
    pub async fn get_artist_info(&self, mbid: &str) -> DataResult<ArtistInfo> {
        let url = format!("{BASE_URL}/artist/{mbid}?inc=url-rels&fmt=json");
        let resp: MbArtistFull = self.get_json(&url).await?;
        let begin_year = resp
            .life_span
            .and_then(|ls| ls.begin)
            .and_then(|d| d.split('-').next().map(String::from));
        let urls = resp
            .relations
            .unwrap_or_default()
            .into_iter()
            .filter_map(|rel| {
                rel.url.map(|u| ArtistUrl {
                    url: u.resource,
                    link_type: rel.relation_type,
                })
            })
            .collect();
        Ok(ArtistInfo {
            name: resp.name,
            mbid: Some(mbid.to_string()),
            disambiguation: resp.disambiguation.filter(|s| !s.is_empty()),
            country: resp.country.filter(|s| !s.is_empty()),
            artist_type: resp.artist_type,
            begin_year,
            urls,
        })
    }
 }
 impl MetadataFetcher for MusicBrainzFetcher {
    async fn search_recording(&self, artist: &str, title: &str) -> DataResult<Vec<RecordingMatch>> {
        let query = if artist.is_empty() {
            format!("recording:{}", escape_lucene(title))
        } else {
            format!(
                "artist:{} AND recording:{}",
                escape_lucene(artist),
                escape_lucene(title)
            )
        };
        let url = format!(
            "{BASE_URL}/recording/?query={}&fmt=json&limit=5",
            urlencoded(&query)
        );
        let resp: MbRecordingSearchResponse = self.get_json(&url).await?;
        Ok(resp
            .recordings
            .into_iter()
            .map(|r| {
                let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
                RecordingMatch {
                    mbid: r.id,
                    title: r.title,
                    artist: artist_name,
                    artist_mbid,
                    releases: r
                        .releases
                        .unwrap_or_default()
                        .into_iter()
                        .map(|rel| ReleaseRef {
                            mbid: rel.id,
                            title: rel.title,
                            date: rel.date,
                            track_number: None,
                        })
                        .collect(),
                    score: r.score.unwrap_or(0),
                }
            })
            .collect())
    }
    async fn search_release(&self, artist: &str, album: &str) -> DataResult<Vec<ReleaseMatch>> {
        let query = if artist.is_empty() {
            format!("release:{}", escape_lucene(album))
        } else {
            format!(
                "artist:{} AND release:{}",
                escape_lucene(artist),
                escape_lucene(album)
            )
        };
        let url = format!(
            "{BASE_URL}/release/?query={}&fmt=json&limit=5",
            urlencoded(&query)
        );
        let resp: MbReleaseSearchResponse = self.get_json(&url).await?;
        Ok(resp
            .releases
            .into_iter()
            .map(|r| {
                let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
                ReleaseMatch {
                    mbid: r.id,
                    title: r.title,
                    artist: artist_name,
                    artist_mbid,
                    date: r.date,
                    track_count: r.track_count,
                    score: r.score.unwrap_or(0),
                }
            })
            .collect())
    }
    async fn get_recording(&self, mbid: &str) -> DataResult<RecordingDetails> {
        let url = format!("{BASE_URL}/recording/{mbid}?inc=artists+releases+genres&fmt=json");
        let r: MbRecordingDetail = self.get_json(&url).await?;
        let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
        let secondary_artists = extract_secondary_artists(&r.artist_credit);
        Ok(RecordingDetails {
            mbid: r.id,
            title: r.title,
            artist: artist_name,
            artist_mbid,
            releases: r
                .releases
                .unwrap_or_default()
                .into_iter()
                .map(|rel| ReleaseRef {
                    mbid: rel.id,
                    title: rel.title,
                    date: rel.date,
                    track_number: None,
                })
                .collect(),
            duration_ms: r.length,
            genres: r
                .genres
                .unwrap_or_default()
                .into_iter()
                .map(|g| g.name)
                .collect(),
            secondary_artists,
        })
    }
    async fn search_artist(&self, query: &str, limit: u32) -> DataResult<Vec<ArtistSearchResult>> {
        let url = format!(
            "{BASE_URL}/artist/?query={}&fmt=json&limit={limit}",
            urlencoded(&escape_lucene(query))
        );
        let resp: MbArtistSearchResponse = self.get_json(&url).await?;
        Ok(resp
            .artists
            .into_iter()
            .map(|a| ArtistSearchResult {
                mbid: a.id,
                name: a.name,
                disambiguation: a.disambiguation.filter(|s| !s.is_empty()),
                country: a.country,
                artist_type: a.artist_type,
                score: a.score.unwrap_or(0),
            })
            .collect())
    }
    async fn get_artist_releases(
        &self,
        artist_mbid: &str,
        limit: u32,
    ) -> DataResult<Vec<DiscographyEntry>> {
        let url = format!("{BASE_URL}/release/?artist={artist_mbid}&fmt=json&limit={limit}");
        let resp: MbReleaseSearchResponse = self.get_json(&url).await?;
        Ok(resp
            .releases
            .into_iter()
            .map(|r| DiscographyEntry {
                mbid: r.id,
                title: r.title,
                date: r.date,
                release_type: None, // release-group type not in this response
                track_count: r.track_count,
            })
            .collect())
    }
    async fn get_release_tracks(&self, release_mbid: &str) -> DataResult<Vec<ReleaseTrack>> {
        let url = format!("{BASE_URL}/release/{release_mbid}?inc=recordings&fmt=json");
        let resp: MbReleaseDetail = self.get_json(&url).await?;
        let mut tracks = Vec::new();
        for (disc_idx, medium) in resp.media.unwrap_or_default().into_iter().enumerate() {
            for track in medium.tracks.unwrap_or_default() {
                tracks.push(ReleaseTrack {
                    recording_mbid: track.recording.map(|r| r.id).unwrap_or_default(),
                    title: track.title,
                    track_number: track.position,
                    disc_number: Some(disc_idx as i32 + 1),
                    duration_ms: track.length,
                });
            }
        }
        Ok(tracks)
    }
    async fn get_artist_release_groups(
        &self,
        artist_mbid: &str,
    ) -> DataResult<Vec<ReleaseGroupEntry>> {
        // Fetch album, single, and EP release groups
        let url = format!(
            "{BASE_URL}/release-group?artist={artist_mbid}&type=album|single|ep&fmt=json&limit=100"
        );
        let resp: MbReleaseGroupResponse = self.get_json(&url).await?;
        Ok(resp
            .release_groups
            .unwrap_or_default()
            .into_iter()
            .map(|rg| ReleaseGroupEntry {
                mbid: rg.id,
                title: rg.title,
                primary_type: rg.primary_type,
                secondary_types: rg.secondary_types.unwrap_or_default(),
                first_release_date: rg.first_release_date,
                first_release_mbid: rg
                    .releases
                    .and_then(|r| r.into_iter().next().map(|rel| rel.id)),
            })
            .collect())
    }
 }
 /// Extract the primary artist from MusicBrainz artist credits.
 /// Always returns the first/primary artist only -- never concatenates
 /// collaborators or featured artists into compound names.
 fn extract_artist_credit(credits: &Option<Vec<MbArtistCredit>>) -> (String, Option<String>) {
    match credits {
        Some(credits) if !credits.is_empty() => {
            let name = credits[0].artist.name.clone();
            let mbid = Some(credits[0].artist.id.clone());
            (name, mbid)
        }
        _ => ("Unknown Artist".to_string(), None),
    }
 }
 /// Extract non-featuring secondary artists from MusicBrainz artist credits.
 /// Returns (name, mbid) pairs for collaborators that aren't "featuring" credits.
 fn extract_secondary_artists(credits: &Option<Vec<MbArtistCredit>>) -> Vec<(String, String)> {
    let Some(credits) = credits else {
        return vec![];
    };
    if credits.len() <= 1 {
        return vec![];
    }
    // Walk credits after the first. Stop at any "feat"/"ft." joinphrase
    // from the PREVIOUS credit (since joinphrase is on the credit BEFORE the next artist).
    let mut result = Vec::new();
    for i in 0..credits.len() - 1 {
        let jp = credits[i].joinphrase.as_deref().unwrap_or("");
        let lower = jp.to_lowercase();
        if lower.contains("feat") || lower.contains("ft.") {
            break;
        }
        // The next credit is a non-featuring collaborator
        let next = &credits[i + 1];
        result.push((next.artist.name.clone(), next.artist.id.clone()));
    }
    result
 }
 // --- MusicBrainz API response types ---
 #[derive(Deserialize)]
 struct MbArtistSearchResponse {
    artists: Vec<MbArtistResult>,
 }
 #[derive(Deserialize)]
 struct MbArtistResult {
    id: String,
    name: String,
    score: Option<u8>,
    disambiguation: Option<String>,
    country: Option<String>,
    #[serde(rename = "type")]
    artist_type: Option<String>,
 }
 #[derive(Deserialize)]
 struct MbArtistLookup {
    name: String,
    disambiguation: Option<String>,
 }
 #[derive(Deserialize)]
 struct MbArtistFull {
    name: String,
    disambiguation: Option<String>,
    country: Option<String>,
    #[serde(rename = "type")]
    artist_type: Option<String>,
    #[serde(rename = "life-span")]
    life_span: Option<MbLifeSpan>,
    relations: Option<Vec<MbRelation>>,
 }
 #[derive(Deserialize)]
 struct MbLifeSpan {
    begin: Option<String>,
 }
 #[derive(Deserialize)]
 struct MbRelation {
    #[serde(rename = "type")]
    relation_type: String,
    url: Option<MbRelationUrl>,
 }
 #[derive(Deserialize)]
 struct MbRelationUrl {
    resource: String,
 }
 #[derive(Deserialize)]
 struct MbRecordingSearchResponse {
    recordings: Vec<MbRecordingResult>,
 }
 #[derive(Deserialize)]
 struct MbRecordingResult {
    id: String,
    title: String,
    score: Option<u8>,
    #[serde(rename = "artist-credit")]
    artist_credit: Option<Vec<MbArtistCredit>>,
    releases: Option<Vec<MbReleaseResult>>,
 }
 #[derive(Deserialize)]
 struct MbReleaseSearchResponse {
    releases: Vec<MbReleaseResult>,
 }
 #[derive(Deserialize)]
 struct MbReleaseResult {
    id: String,
    title: String,
    score: Option<u8>,
    #[serde(rename = "artist-credit")]
    artist_credit: Option<Vec<MbArtistCredit>>,
    date: Option<String>,
    #[serde(rename = "track-count")]
    track_count: Option<i32>,
 }
 #[derive(Deserialize)]
 struct MbRecordingDetail {
    id: String,
    title: String,
    #[serde(rename = "artist-credit")]
    artist_credit: Option<Vec<MbArtistCredit>>,
    releases: Option<Vec<MbReleaseResult>>,
    length: Option<u64>,
    genres: Option<Vec<MbGenre>>,
 }
 #[derive(Deserialize)]
 struct MbArtistCredit {
    artist: MbArtist,
    joinphrase: Option<String>,
 }
 #[derive(Deserialize)]
 struct MbArtist {
    id: String,
    name: String,
 }
 #[derive(Deserialize)]
 struct MbGenre {
    name: String,
 }
 #[derive(Deserialize)]
 struct MbReleaseDetail {
    media: Option<Vec<MbMedia>>,
 }
 #[derive(Deserialize)]
 struct MbMedia {
    tracks: Option<Vec<MbTrackEntry>>,
 }
 #[derive(Deserialize)]
 struct MbTrackEntry {
    title: String,
    position: Option<i32>,
    length: Option<u64>,
    recording: Option<MbTrackRecording>,
 }
 #[derive(Deserialize)]
 struct MbTrackRecording {
    id: String,
 }
 #[derive(Deserialize)]
 struct MbReleaseGroupResponse {
    #[serde(rename = "release-groups")]
    release_groups: Option<Vec<MbReleaseGroup>>,
 }
 #[derive(Deserialize)]
 struct MbReleaseGroup {
    id: String,
    title: String,
    #[serde(rename = "primary-type")]
    primary_type: Option<String>,
    #[serde(rename = "secondary-types", default)]
    secondary_types: Option<Vec<String>>,
    #[serde(rename = "first-release-date")]
    first_release_date: Option<String>,
    releases: Option<Vec<MbReleaseGroupRelease>>,
 }
 #[derive(Deserialize)]
 struct MbReleaseGroupRelease {
    id: String,
 }
@@ -0,0 +1,92 @@
 use std::future::Future;
 use crate::error::DataResult;
 use crate::types::{
    ArtistInfo, ArtistSearchResult, DiscographyEntry, LyricsResult, RecordingDetails,
    RecordingMatch, ReleaseGroupEntry, ReleaseMatch, ReleaseTrack,
 };
 /// Trait for metadata lookup backends. MusicBrainz is the default implementation;
 /// others (Last.fm, Discogs, etc.) can be added later.
 pub trait MetadataFetcher: Send + Sync {
    fn search_recording(
        &self,
        artist: &str,
        title: &str,
    ) -> impl Future<Output = DataResult<Vec<RecordingMatch>>> + Send;
    fn search_release(
        &self,
        artist: &str,
        album: &str,
    ) -> impl Future<Output = DataResult<Vec<ReleaseMatch>>> + Send;
    fn get_recording(
        &self,
        mbid: &str,
    ) -> impl Future<Output = DataResult<RecordingDetails>> + Send;
    fn search_artist(
        &self,
        query: &str,
        limit: u32,
    ) -> impl Future<Output = DataResult<Vec<ArtistSearchResult>>> + Send;
    fn get_artist_releases(
        &self,
        artist_mbid: &str,
        limit: u32,
    ) -> impl Future<Output = DataResult<Vec<DiscographyEntry>>> + Send;
    fn get_release_tracks(
        &self,
        release_mbid: &str,
    ) -> impl Future<Output = DataResult<Vec<ReleaseTrack>>> + Send;
    /// Get deduplicated release groups (albums, EPs, singles) for an artist.
    fn get_artist_release_groups(
        &self,
        artist_mbid: &str,
    ) -> impl Future<Output = DataResult<Vec<ReleaseGroupEntry>>> + Send;
 }
 /// Fetches artist image URLs from an external source.
 pub trait ArtistImageFetcher: Send + Sync {
    /// Thumbnail/profile image for the artist.
    fn get_artist_image(
        &self,
        artist_info: &ArtistInfo,
    ) -> impl Future<Output = DataResult<Option<String>>> + Send;
    /// Wide banner/background image for the artist (e.g., 1920x1080).
    /// Returns None by default — providers that don't support banners need not implement this.
    fn get_artist_banner(
        &self,
        artist_info: &ArtistInfo,
    ) -> impl Future<Output = DataResult<Option<String>>> + Send {
        let _ = artist_info;
        async { Ok(None) }
    }
 }
 /// Fetches an artist biography from an external source.
 pub trait ArtistBioFetcher: Send + Sync {
    fn get_artist_bio(
        &self,
        artist_info: &ArtistInfo,
    ) -> impl Future<Output = DataResult<Option<String>>> + Send;
 }
 /// Fetches song lyrics from an external source.
 pub trait LyricsFetcher: Send + Sync {
    fn get_lyrics(
        &self,
        artist: &str,
        title: &str,
    ) -> impl Future<Output = DataResult<LyricsResult>> + Send;
 }
 /// Fetches cover art URLs for releases.
 pub trait CoverArtFetcher: Send + Sync {
    fn get_cover_art_url(&self, release_id: &str) -> Option<String>;
 }
@@ -0,0 +1,120 @@
 use serde::{Deserialize, Serialize};
 /// A reference to a release (album) that a recording appears on.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ReleaseRef {
    pub mbid: String,
    pub title: String,
    pub date: Option<String>,
    pub track_number: Option<i32>,
 }
 /// A recording match from a search query.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct RecordingMatch {
    pub mbid: String,
    pub title: String,
    pub artist: String,
    pub artist_mbid: Option<String>,
    pub releases: Vec<ReleaseRef>,
    /// MusicBrainz API score (0-100).
    pub score: u8,
 }
 /// A release (album) match from a search query.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ReleaseMatch {
    pub mbid: String,
    pub title: String,
    pub artist: String,
    pub artist_mbid: Option<String>,
    pub date: Option<String>,
    pub track_count: Option<i32>,
    pub score: u8,
 }
 /// Full details for a recording, retrieved by MBID.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct RecordingDetails {
    pub mbid: String,
    pub title: String,
    pub artist: String,
    pub artist_mbid: Option<String>,
    pub releases: Vec<ReleaseRef>,
    pub duration_ms: Option<u64>,
    pub genres: Vec<String>,
    /// Non-featuring collaborators beyond the primary artist.
    #[serde(default)]
    pub secondary_artists: Vec<(String, String)>,
 }
 /// Detailed artist info from a direct MBID lookup.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ArtistInfo {
    pub name: String,
    #[serde(default)]
    pub mbid: Option<String>,
    pub disambiguation: Option<String>,
    pub country: Option<String>,
    pub artist_type: Option<String>,
    pub begin_year: Option<String>,
    pub urls: Vec<ArtistUrl>,
 }
 /// An external URL linked to an artist on MusicBrainz.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ArtistUrl {
    pub url: String,
    pub link_type: String,
 }
 /// An artist match from a search query.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ArtistSearchResult {
    pub mbid: String,
    pub name: String,
    pub disambiguation: Option<String>,
    pub country: Option<String>,
    pub artist_type: Option<String>,
    pub score: u8,
 }
 /// A release entry in an artist's discography.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct DiscographyEntry {
    pub mbid: String,
    pub title: String,
    pub date: Option<String>,
    pub release_type: Option<String>,
    pub track_count: Option<i32>,
 }
 /// A release group (deduplicated album/EP/single concept).
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ReleaseGroupEntry {
    pub mbid: String,
    pub title: String,
    pub primary_type: Option<String>,
    pub secondary_types: Vec<String>,
    pub first_release_date: Option<String>,
    /// MBID of the first release in this group (for fetching tracks).
    pub first_release_mbid: Option<String>,
 }
 /// A track within a release.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ReleaseTrack {
    pub recording_mbid: String,
    pub title: String,
    pub track_number: Option<i32>,
    pub disc_number: Option<i32>,
    pub duration_ms: Option<u64>,
 }
 /// Result from a lyrics lookup.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct LyricsResult {
    pub found: bool,
    pub lyrics: Option<String>,
    pub synced_lyrics: Option<String>,
 }
@@ -0,0 +1,122 @@
 use crate::error::DataResult;
 use crate::http::build_client;
 use crate::traits::{ArtistBioFetcher, ArtistImageFetcher};
 use crate::types::ArtistInfo;
 const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
 /// Fetches artist images and bios from Wikipedia/Wikidata.
 pub struct WikipediaFetcher {
    client: reqwest::Client,
 }
 impl WikipediaFetcher {
    pub fn new() -> DataResult<Self> {
        let client = build_client(USER_AGENT, 30)?;
        Ok(Self { client })
    }
    /// Find the Wikipedia URL for an artist from their MusicBrainz URLs.
    /// Tries a direct Wikipedia link first, then resolves via Wikidata.
    async fn resolve_wiki_url(&self, artist_info: &ArtistInfo) -> Option<String> {
        // Direct Wikipedia link
        if let Some(u) = artist_info.urls.iter().find(|u| u.link_type == "wikipedia") {
            return Some(u.url.clone());
        }
        // Resolve via Wikidata
        if let Some(wd) = artist_info.urls.iter().find(|u| u.link_type == "wikidata") {
            let entity_id = wd.url.split('/').next_back().unwrap_or("");
            return self.resolve_wikidata_to_wikipedia(entity_id).await;
        }
        None
    }
    /// Resolve a Wikidata entity ID to an English Wikipedia URL.
    async fn resolve_wikidata_to_wikipedia(&self, entity_id: &str) -> Option<String> {
        if entity_id.is_empty() {
            return None;
        }
        let url = format!(
            "https://www.wikidata.org/w/api.php?action=wbgetentities&ids={entity_id}&props=sitelinks&sitefilter=enwiki&format=json"
        );
        let resp: serde_json::Value = self.client.get(&url).send().await.ok()?.json().await.ok()?;
        let title = resp
            .get("entities")
            .and_then(|e| e.get(entity_id))
            .and_then(|e| e.get("sitelinks"))
            .and_then(|s| s.get("enwiki"))
            .and_then(|w| w.get("title"))
            .and_then(|t| t.as_str())?;
        Some(format!(
            "https://en.wikipedia.org/wiki/{}",
            title.replace(' ', "_")
        ))
    }
    /// Fetch the Wikipedia summary for a given URL, returning (photo_url, bio).
    async fn fetch_summary(&self, wiki_url: &str) -> (Option<String>, Option<String>) {
        let title = wiki_url.split("/wiki/").nth(1).unwrap_or("").to_string();
        if title.is_empty() {
            return (None, None);
        }
        let lang = wiki_url
            .split("://")
            .nth(1)
            .and_then(|s| s.split('.').next())
            .unwrap_or("en");
        let api_url = format!("https://{lang}.wikipedia.org/api/rest_v1/page/summary/{title}");
        let resp = match self.client.get(&api_url).send().await {
            Ok(r) if r.status().is_success() => r,
            _ => return (None, None),
        };
        let body: serde_json::Value = match resp.json().await {
            Ok(v) => v,
            Err(_) => return (None, None),
        };
        let photo_url = body
            .get("thumbnail")
            .and_then(|t| t.get("source"))
            .and_then(|s| s.as_str())
            .map(String::from);
        let bio = body
            .get("extract")
            .and_then(|e| e.as_str())
            .map(String::from);
        (photo_url, bio)
    }
 }
 impl ArtistImageFetcher for WikipediaFetcher {
    async fn get_artist_image(&self, artist_info: &ArtistInfo) -> DataResult<Option<String>> {
        let wiki_url = match self.resolve_wiki_url(artist_info).await {
            Some(url) => url,
            None => return Ok(None),
        };
        let (photo_url, _) = self.fetch_summary(&wiki_url).await;
        Ok(photo_url)
    }
 }
 impl ArtistBioFetcher for WikipediaFetcher {
    async fn get_artist_bio(&self, artist_info: &ArtistInfo) -> DataResult<Option<String>> {
        let wiki_url = match self.resolve_wiki_url(artist_info).await {
            Some(url) => url,
            None => return Ok(None),
        };
        let (_, bio) = self.fetch_summary(&wiki_url).await;
        Ok(bio)
    }
 }
@@ -6,9 +6,10 @@ use tracing_actix_web::TracingLogger;
 use tracing_subscriber::EnvFilter;
 use shanty_config::AppConfig;
 use shanty_data::MusicBrainzFetcher;
 use shanty_data::WikipediaFetcher;
 use shanty_db::Database;
 use shanty_search::MusicBrainzSearch;
 use shanty_tag::MusicBrainzClient;
 use shanty_web::routes;
 use shanty_web::state::AppState;
@@ -32,6 +33,9 @@ struct Cli {
 #[actix_web::main]
 async fn main() -> anyhow::Result<()> {
    // Load .env file if present (before anything reads env vars)
    dotenvy::dotenv().ok();
    let cli = Cli::parse();
    let filter = match cli.verbose {
@@ -53,8 +57,9 @@ async fn main() -> anyhow::Result<()> {
    tracing::info!(url = %config.database_url, "connecting to database");
    let db = Database::new(&config.database_url).await?;
-    let mb_client = MusicBrainzClient::new()?;
+    let mb_client = MusicBrainzFetcher::new()?;
    let search = MusicBrainzSearch::new()?;
    let wiki_fetcher = WikipediaFetcher::new()?;
    let bind = format!("{}:{}", config.web.bind, config.web.port);
    tracing::info!(bind = %bind, "starting server");
@@ -64,6 +69,7 @@ async fn main() -> anyhow::Result<()> {
        db,
        mb_client,
        search,
        wiki_fetcher,
        config: std::sync::Arc::new(tokio::sync::RwLock::new(config)),
        config_path,
        tasks: TaskManager::new(),