Main/shanty-data/src/musicbrainz.rs

use std::collections::HashMap;
use std::sync::Arc;
use std::time::Instant;

use serde::Deserialize;
use tokio::sync::Mutex;
use tokio::time::Duration;

use crate::error::{DataError, DataResult};
use crate::http::{RateLimiter, escape_lucene, urlencoded};
use crate::traits::MetadataFetcher;
use crate::types::{
    ArtistInfo, ArtistSearchResult, ArtistUrl, DiscographyEntry, RecordingDetails, RecordingMatch,
    ReleaseGroupEntry, ReleaseMatch, ReleaseRef, ReleaseTrack,
};

const BASE_URL: &str = "https://musicbrainz.org/ws/2";
const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
const RATE_LIMIT: Duration = Duration::from_millis(1100); // slightly over 1s to be safe
/// How long cached responses stay valid (1 hour).
const RESPONSE_CACHE_TTL: Duration = Duration::from_secs(3600);

/// A cached HTTP response with a timestamp for expiry.
struct CachedResponse {
    body: String,
    fetched_at: Instant,
}

/// MusicBrainz API client with rate limiting and in-memory response caching.
pub struct MusicBrainzFetcher {
    client: reqwest::Client,
    limiter: RateLimiter,
    /// In-memory cache: URL -> raw JSON response body. Entries expire after RESPONSE_CACHE_TTL.
    response_cache: Arc<Mutex<HashMap<String, CachedResponse>>>,
}

impl MusicBrainzFetcher {
    pub fn new() -> DataResult<Self> {
        Self::with_limiter(RateLimiter::new(RATE_LIMIT))
    }

    /// Create a fetcher that shares a rate limiter with other MB clients.
    pub fn with_limiter(limiter: RateLimiter) -> DataResult<Self> {
        let client = reqwest::Client::builder()
            .user_agent(USER_AGENT)
            .timeout(Duration::from_secs(30))
            .build()?;
        Ok(Self {
            client,
            limiter,
            response_cache: Arc::new(Mutex::new(HashMap::new())),
        })
    }

    /// Get a clone of the rate limiter for sharing with other MB clients.
    pub fn limiter(&self) -> RateLimiter {
        self.limiter.clone()
    }

    async fn get_json<T: serde::de::DeserializeOwned>(&self, url: &str) -> DataResult<T> {
        // Check in-memory cache first
        {
            let mut cache = self.response_cache.lock().await;
            if let Some(entry) = cache.get(url) {
                if entry.fetched_at.elapsed() < RESPONSE_CACHE_TTL {
                    tracing::debug!(url = url, "MusicBrainz cache hit");
                    return Ok(serde_json::from_str(&entry.body)?);
                } else {
                    // Expired — remove it
                    cache.remove(url);
                }
            }
            // Also purge any other expired entries opportunistically (limit to avoid holding lock too long)
            let expired: Vec<String> = cache
                .iter()
                .filter(|(_, v)| v.fetched_at.elapsed() >= RESPONSE_CACHE_TTL)
                .map(|(k, _)| k.clone())
                .take(50)
                .collect();
            for k in expired {
                cache.remove(&k);
            }
        }

        self.limiter.wait().await;
        tracing::debug!(url = url, "MusicBrainz request");
        let resp = self.client.get(url).send().await?;
        let status = resp.status();
        if !status.is_success() {
            let body = resp.text().await.unwrap_or_default();
            return Err(DataError::Other(format!(
                "MusicBrainz API error {status}: {body}"
            )));
        }
        let body = resp.text().await?;

        // Store in cache
        {
            let mut cache = self.response_cache.lock().await;
            cache.insert(
                url.to_string(),
                CachedResponse {
                    body: body.clone(),
                    fetched_at: Instant::now(),
                },
            );
        }

        Ok(serde_json::from_str(&body)?)
    }

    /// Look up an artist directly by MBID. Returns (name, disambiguation).
    pub async fn get_artist_by_mbid(&self, mbid: &str) -> DataResult<(String, Option<String>)> {
        let url = format!("{BASE_URL}/artist/{mbid}?fmt=json");
        let resp: MbArtistLookup = self.get_json(&url).await?;
        Ok((resp.name, resp.disambiguation.filter(|s| !s.is_empty())))
    }

    /// Look up detailed artist info by MBID, including URLs and metadata.
    pub async fn get_artist_info(&self, mbid: &str) -> DataResult<ArtistInfo> {
        let url = format!("{BASE_URL}/artist/{mbid}?inc=url-rels&fmt=json");
        let resp: MbArtistFull = self.get_json(&url).await?;

        let begin_year = resp
            .life_span
            .and_then(|ls| ls.begin)
            .and_then(|d| d.split('-').next().map(String::from));

        let urls = resp
            .relations
            .unwrap_or_default()
            .into_iter()
            .filter_map(|rel| {
                rel.url.map(|u| ArtistUrl {
                    url: u.resource,
                    link_type: rel.relation_type,
                })
            })
            .collect();

        Ok(ArtistInfo {
            name: resp.name,
            mbid: Some(mbid.to_string()),
            disambiguation: resp.disambiguation.filter(|s| !s.is_empty()),
            country: resp.country.filter(|s| !s.is_empty()),
            artist_type: resp.artist_type,
            begin_year,
            urls,
        })
    }

    /// Resolve a release-group MBID to a release MBID (first release in the group).
    pub async fn resolve_release_from_group(&self, release_group_mbid: &str) -> DataResult<String> {
        let url = format!("{BASE_URL}/release?release-group={release_group_mbid}&fmt=json&limit=1");
        let resp: serde_json::Value = self.get_json(&url).await?;

        resp.get("releases")
            .and_then(|r| r.as_array())
            .and_then(|arr| arr.first())
            .and_then(|r| r.get("id"))
            .and_then(|id| id.as_str())
            .map(String::from)
            .ok_or_else(|| {
                DataError::Other(format!(
                    "no releases for release-group {release_group_mbid}"
                ))
            })
    }
}

impl MetadataFetcher for MusicBrainzFetcher {
    async fn search_recording(&self, artist: &str, title: &str) -> DataResult<Vec<RecordingMatch>> {
        let query = if artist.is_empty() {
            format!("recording:{}", escape_lucene(title))
        } else {
            format!(
                "artist:{} AND recording:{}",
                escape_lucene(artist),
                escape_lucene(title)
            )
        };
        let url = format!(
            "{BASE_URL}/recording/?query={}&fmt=json&limit=5",
            urlencoded(&query)
        );
        let resp: MbRecordingSearchResponse = self.get_json(&url).await?;

        Ok(resp
            .recordings
            .into_iter()
            .map(|r| {
                let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
                RecordingMatch {
                    mbid: r.id,
                    title: r.title,
                    artist: artist_name,
                    artist_mbid,
                    releases: r
                        .releases
                        .unwrap_or_default()
                        .into_iter()
                        .map(|rel| ReleaseRef {
                            mbid: rel.id,
                            title: rel.title,
                            date: rel.date,
                            track_number: None,
                        })
                        .collect(),
                    score: r.score.unwrap_or(0),
                }
            })
            .collect())
    }

    async fn search_release(&self, artist: &str, album: &str) -> DataResult<Vec<ReleaseMatch>> {
        let query = if artist.is_empty() {
            format!("release:{}", escape_lucene(album))
        } else {
            format!(
                "artist:{} AND release:{}",
                escape_lucene(artist),
                escape_lucene(album)
            )
        };
        let url = format!(
            "{BASE_URL}/release/?query={}&fmt=json&limit=5",
            urlencoded(&query)
        );
        let resp: MbReleaseSearchResponse = self.get_json(&url).await?;

        Ok(resp
            .releases
            .into_iter()
            .map(|r| {
                let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
                ReleaseMatch {
                    mbid: r.id,
                    title: r.title,
                    artist: artist_name,
                    artist_mbid,
                    date: r.date,
                    track_count: r.track_count,
                    score: r.score.unwrap_or(0),
                }
            })
            .collect())
    }

    async fn get_recording(&self, mbid: &str) -> DataResult<RecordingDetails> {
        let url = format!("{BASE_URL}/recording/{mbid}?inc=artists+releases+genres&fmt=json");
        let r: MbRecordingDetail = self.get_json(&url).await?;

        let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
        Ok(RecordingDetails {
            mbid: r.id,
            title: r.title,
            artist: artist_name,
            artist_mbid,
            releases: r
                .releases
                .unwrap_or_default()
                .into_iter()
                .map(|rel| ReleaseRef {
                    mbid: rel.id,
                    title: rel.title,
                    date: rel.date,
                    track_number: None,
                })
                .collect(),
            duration_ms: r.length,
            genres: r
                .genres
                .unwrap_or_default()
                .into_iter()
                .map(|g| g.name)
                .collect(),
        })
    }

    async fn search_artist(&self, query: &str, limit: u32) -> DataResult<Vec<ArtistSearchResult>> {
        let url = format!(
            "{BASE_URL}/artist/?query={}&fmt=json&limit={limit}",
            urlencoded(&escape_lucene(query))
        );
        let resp: MbArtistSearchResponse = self.get_json(&url).await?;

        Ok(resp
            .artists
            .into_iter()
            .map(|a| ArtistSearchResult {
                mbid: a.id,
                name: a.name,
                disambiguation: a.disambiguation.filter(|s| !s.is_empty()),
                country: a.country,
                artist_type: a.artist_type,
                score: a.score.unwrap_or(0),
            })
            .collect())
    }

    async fn get_artist_releases(
        &self,
        artist_mbid: &str,
        limit: u32,
    ) -> DataResult<Vec<DiscographyEntry>> {
        let url = format!("{BASE_URL}/release/?artist={artist_mbid}&fmt=json&limit={limit}");
        let resp: MbReleaseSearchResponse = self.get_json(&url).await?;

        Ok(resp
            .releases
            .into_iter()
            .map(|r| DiscographyEntry {
                mbid: r.id,
                title: r.title,
                date: r.date,
                release_type: None, // release-group type not in this response
                track_count: r.track_count,
            })
            .collect())
    }

    async fn get_release_tracks(&self, release_mbid: &str) -> DataResult<Vec<ReleaseTrack>> {
        let url = format!("{BASE_URL}/release/{release_mbid}?inc=recordings&fmt=json");
        let resp: MbReleaseDetail = self.get_json(&url).await?;

        let mut tracks = Vec::new();
        for (disc_idx, medium) in resp.media.unwrap_or_default().into_iter().enumerate() {
            for track in medium.tracks.unwrap_or_default() {
                tracks.push(ReleaseTrack {
                    recording_mbid: track.recording.map(|r| r.id).unwrap_or_default(),
                    title: track.title,
                    track_number: track.position,
                    disc_number: Some(disc_idx as i32 + 1),
                    duration_ms: track.length,
                });
            }
        }

        Ok(tracks)
    }

    async fn get_artist_release_groups(
        &self,
        artist_mbid: &str,
    ) -> DataResult<Vec<ReleaseGroupEntry>> {
        // Fetch album, single, and EP release groups
        let url = format!(
            "{BASE_URL}/release-group?artist={artist_mbid}&type=album|single|ep&inc=artist-credits&fmt=json&limit=100"
        );
        let resp: MbReleaseGroupResponse = self.get_json(&url).await?;

        Ok(resp
            .release_groups
            .unwrap_or_default()
            .into_iter()
            .map(|rg| {
                let primary = extract_artist_credit(&rg.artist_credit);
                let featured = primary.1.as_deref() != Some(artist_mbid);
                ReleaseGroupEntry {
                    mbid: rg.id,
                    title: rg.title,
                    primary_type: rg.primary_type,
                    secondary_types: rg.secondary_types.unwrap_or_default(),
                    first_release_date: rg.first_release_date,
                    first_release_mbid: rg
                        .releases
                        .and_then(|r| r.into_iter().next().map(|rel| rel.id)),
                    featured,
                }
            })
            .collect())
    }

    async fn resolve_release_from_group(&self, release_group_mbid: &str) -> DataResult<String> {
        // Delegate to the inherent method
        MusicBrainzFetcher::resolve_release_from_group(self, release_group_mbid).await
    }
}

/// Extract the primary artist from MusicBrainz artist credits.
/// Always returns the first/primary artist only -- never concatenates
/// collaborators or featured artists into compound names.
fn extract_artist_credit(credits: &Option<Vec<MbArtistCredit>>) -> (String, Option<String>) {
    match credits {
        Some(credits) if !credits.is_empty() => {
            let name = credits[0].artist.name.clone();
            let mbid = Some(credits[0].artist.id.clone());
            (name, mbid)
        }
        _ => ("Unknown Artist".to_string(), None),
    }
}

// --- MusicBrainz API response types ---

#[derive(Deserialize)]
struct MbArtistSearchResponse {
    artists: Vec<MbArtistResult>,
}

#[derive(Deserialize)]
struct MbArtistResult {
    id: String,
    name: String,
    score: Option<u8>,
    disambiguation: Option<String>,
    country: Option<String>,
    #[serde(rename = "type")]
    artist_type: Option<String>,
}

#[derive(Deserialize)]
struct MbArtistLookup {
    name: String,
    disambiguation: Option<String>,
}

#[derive(Deserialize)]
struct MbArtistFull {
    name: String,
    disambiguation: Option<String>,
    country: Option<String>,
    #[serde(rename = "type")]
    artist_type: Option<String>,
    #[serde(rename = "life-span")]
    life_span: Option<MbLifeSpan>,
    relations: Option<Vec<MbRelation>>,
}

#[derive(Deserialize)]
struct MbLifeSpan {
    begin: Option<String>,
}

#[derive(Deserialize)]
struct MbRelation {
    #[serde(rename = "type")]
    relation_type: String,
    url: Option<MbRelationUrl>,
}

#[derive(Deserialize)]
struct MbRelationUrl {
    resource: String,
}

#[derive(Deserialize)]
struct MbRecordingSearchResponse {
    recordings: Vec<MbRecordingResult>,
}

#[derive(Deserialize)]
struct MbRecordingResult {
    id: String,
    title: String,
    score: Option<u8>,
    #[serde(rename = "artist-credit")]
    artist_credit: Option<Vec<MbArtistCredit>>,
    releases: Option<Vec<MbReleaseResult>>,
}

#[derive(Deserialize)]
struct MbReleaseSearchResponse {
    releases: Vec<MbReleaseResult>,
}

#[derive(Deserialize)]
struct MbReleaseResult {
    id: String,
    title: String,
    score: Option<u8>,
    #[serde(rename = "artist-credit")]
    artist_credit: Option<Vec<MbArtistCredit>>,
    date: Option<String>,
    #[serde(rename = "track-count")]
    track_count: Option<i32>,
}

#[derive(Deserialize)]
struct MbRecordingDetail {
    id: String,
    title: String,
    #[serde(rename = "artist-credit")]
    artist_credit: Option<Vec<MbArtistCredit>>,
    releases: Option<Vec<MbReleaseResult>>,
    length: Option<u64>,
    genres: Option<Vec<MbGenre>>,
}

#[derive(Deserialize)]
struct MbArtistCredit {
    artist: MbArtist,
}

#[derive(Deserialize)]
struct MbArtist {
    id: String,
    name: String,
}

#[derive(Deserialize)]
struct MbGenre {
    name: String,
}

#[derive(Deserialize)]
struct MbReleaseDetail {
    media: Option<Vec<MbMedia>>,
}

#[derive(Deserialize)]
struct MbMedia {
    tracks: Option<Vec<MbTrackEntry>>,
}

#[derive(Deserialize)]
struct MbTrackEntry {
    title: String,
    position: Option<i32>,
    length: Option<u64>,
    recording: Option<MbTrackRecording>,
}

#[derive(Deserialize)]
struct MbTrackRecording {
    id: String,
}

#[derive(Deserialize)]
struct MbReleaseGroupResponse {
    #[serde(rename = "release-groups")]
    release_groups: Option<Vec<MbReleaseGroup>>,
}

#[derive(Deserialize)]
struct MbReleaseGroup {
    id: String,
    title: String,
    #[serde(rename = "primary-type")]
    primary_type: Option<String>,
    #[serde(rename = "secondary-types", default)]
    secondary_types: Option<Vec<String>>,
    #[serde(rename = "first-release-date")]
    first_release_date: Option<String>,
    releases: Option<Vec<MbReleaseGroupRelease>>,
    #[serde(rename = "artist-credit")]
    artist_credit: Option<Vec<MbArtistCredit>>,
}

#[derive(Deserialize)]
struct MbReleaseGroupRelease {
    id: String,
}