diff --git a/src/library.rs b/src/library.rs index bb94b17..d4d8a8e 100644 --- a/src/library.rs +++ b/src/library.rs @@ -156,103 +156,63 @@ pub async fn add_artist( } }; - tracing::info!(name = %resolved_name, mbid = %artist_mbid, "fetching discography (release groups)"); + tracing::info!(name = %resolved_name, mbid = %artist_mbid, "loading discography"); - let all_release_groups = provider - .get_artist_release_groups(&artist_mbid) - .await - .map_err(|e| WatchError::Other(format!("failed to fetch release groups: {e}")))?; + // Use the unified discography source — same MBIDs the detail page displays. + // This reads from artist_rg_tracks caches (populated by enrich_artist), + // ensuring wanted_items always have MBIDs that match the detail page. + let disc = load_or_build_discography(conn, &artist_mbid, provider).await; - // Only include release groups where this artist is the primary credit, - // filtered by allowed secondary types (same as the artist detail page) - let release_groups: Vec<_> = all_release_groups - .into_iter() - .filter(|rg| !rg.featured) - .filter(|rg| { - if rg.secondary_types.is_empty() { + if disc.is_empty() { + tracing::warn!( + name = %resolved_name, + mbid = %artist_mbid, + "no discography data available — visit the artist page first to populate caches" + ); + return Ok(AddSummary::default()); + } + + // Deduplicate by MBID and expand into wanted items + let mut summary = AddSummary::default(); + let mut seen_mbids: std::collections::HashSet = std::collections::HashSet::new(); + + // Filter by allowed secondary types + let filtered: Vec<_> = disc + .iter() + .filter(|r| { + if r.rg_type.is_empty() || r.rg_type == "Album" { true } else { - rg.secondary_types - .iter() - .all(|st| allowed_secondary_types.contains(st)) + allowed_secondary_types.iter().any(|st| st == &r.rg_type) } }) .collect(); - tracing::info!(count = release_groups.len(), "found release groups"); + tracing::info!(total = disc.len(), filtered = filtered.len(), "discography loaded"); - let mut summary = AddSummary::default(); - let mut seen_mbids: std::collections::HashSet = std::collections::HashSet::new(); - let mut disc_recordings: Vec = Vec::new(); - - for rg in &release_groups { - // Resolve a concrete release MBID from the release group - let release_mbid = if let Some(ref rid) = rg.first_release_mbid { - rid.clone() - } else { - // Browse releases for this release group to find a concrete release - match provider.resolve_release_from_group(&rg.mbid).await { - Ok(rid) => rid, - Err(e) => { - tracing::warn!(rg = %rg.title, rg_id = %rg.mbid, error = %e, "failed to resolve release from group"); - summary.errors += 1; - continue; - } - } - }; - - tracing::info!(title = %rg.title, release_mbid = %release_mbid, "fetching tracks for release group"); - - let rg_type = rg.primary_type.clone().unwrap_or_default(); - let rg_date = rg.first_release_date.clone(); - - let tracks = match provider.get_release_tracks(&release_mbid).await { - Ok(t) => t, + for rec in &filtered { + if !seen_mbids.insert(rec.mbid.clone()) { + continue; // Already processed this recording + } + match add_track_inner( + conn, + &resolved_name, + &rec.title, + Some(&rec.mbid), + Some(&artist_mbid), + user_id, + ) + .await + { + Ok(true) => summary.tracks_added += 1, + Ok(false) => summary.tracks_already_owned += 1, Err(e) => { - tracing::warn!(rg = %rg.title, error = %e, "failed to fetch tracks"); + tracing::warn!(track = %rec.title, error = %e, "failed to add track"); summary.errors += 1; - continue; - } - }; - - for track in &tracks { - // Build discography cache entry for every recording - disc_recordings.push(DiscRecording { - mbid: track.recording_mbid.clone(), - title: track.title.clone(), - rg_type: rg_type.clone(), - rg_date: rg_date.clone(), - }); - - if !seen_mbids.insert(track.recording_mbid.clone()) { - continue; // Already processed this recording in another release group - } - match add_track_inner( - conn, - &resolved_name, - &track.title, - Some(&track.recording_mbid), - Some(&artist_mbid), - user_id, - ) - .await - { - Ok(true) => summary.tracks_added += 1, - Ok(false) => summary.tracks_already_owned += 1, - Err(e) => { - tracing::warn!(track = %track.title, error = %e, "failed to add track"); - summary.errors += 1; - } } } } - // Populate the discography cache so subsequent top-song watches resolve instantly - if let Ok(json) = serde_json::to_string(&disc_recordings) { - let cache_key = format!("artist_known_recordings:{artist_mbid}"); - let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await; - } - tracing::info!(%summary, "artist watch complete"); Ok(summary) } @@ -449,7 +409,7 @@ async fn finish_add_track( }); } - let is_owned = matching::track_is_owned(conn, artist_name, title).await?; + let is_owned = matching::track_is_owned(conn, recording_mbid.as_deref()).await?; let item = queries::wanted::add( conn, @@ -496,8 +456,8 @@ async fn load_and_resolve_discography( } /// Load the discography cache, or build it from the detail page's cached release group -/// tracks (`artist_rg_tracks:*`). This ensures the MBIDs match what the detail page displays. -/// Falls back to MB API only if no detail page caches exist. +/// tracks (`artist_rg_tracks:*`). Only uses caches populated by enrich_artist() — never +/// fetches from MB API independently, to ensure MBIDs always match the detail page. async fn load_or_build_discography( conn: &DatabaseConnection, artist_mbid: &str, @@ -505,17 +465,17 @@ async fn load_or_build_discography( ) -> Vec { let cache_key = format!("artist_known_recordings:{artist_mbid}"); - // Try loading from cache (handle both new Vec format and old HashMap format) + // Try the pre-built known_recordings cache first if let Ok(Some(json)) = queries::cache::get(conn, &cache_key).await { if let Ok(recordings) = serde_json::from_str::>(&json) { return recordings; } - // Old format — fall through to rebuild tracing::debug!(artist_mbid, "rebuilding discography cache (old format)"); } - // Build from the detail page's cached release group tracks. - // These are the source of truth for which recording MBIDs the detail page will display. + // Build from the detail page's cached release group tracks (artist_rg_tracks:*). + // These are populated by enrich_artist() and are the source of truth. + // We only use cached data — never fetch from MB API here to avoid MBID divergence. let mut recordings = Vec::new(); if let Ok(release_groups) = provider.get_artist_release_groups(artist_mbid).await { for rg in &release_groups { @@ -525,7 +485,6 @@ async fn load_or_build_discography( let rg_type = rg.primary_type.clone().unwrap_or_default(); let rg_date = rg.first_release_date.clone(); - // Prefer the detail page's cached tracks for this release group let rg_cache_key = format!("artist_rg_tracks:{}", rg.mbid); if let Ok(Some(json)) = queries::cache::get(conn, &rg_cache_key).await && let Ok(cached) = serde_json::from_str::(&json) @@ -544,27 +503,16 @@ async fn load_or_build_discography( }); } } - continue; // Got tracks from detail page cache, skip API - } - - // No detail page cache — fetch from API - let release_id = rg.first_release_mbid.as_deref().unwrap_or(&rg.mbid); - if let Ok(tracks) = provider.get_release_tracks(release_id).await { - for t in &tracks { - recordings.push(DiscRecording { - mbid: t.recording_mbid.clone(), - title: t.title.clone(), - rg_type: rg_type.clone(), - rg_date: rg_date.clone(), - }); - } } + // No cache for this RG = skip it (enrich_artist hasn't fetched it yet) } } - // Cache for 7 days - if let Ok(json) = serde_json::to_string(&recordings) { - let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await; + // Cache the result if we found anything + if !recordings.is_empty() { + if let Ok(json) = serde_json::to_string(&recordings) { + let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await; + } } recordings @@ -601,7 +549,7 @@ async fn add_track_inner( return Ok(false); } - let is_owned = matching::track_is_owned(conn, artist_name, title).await?; + let is_owned = matching::track_is_owned(conn, recording_mbid).await?; let item = queries::wanted::add( conn, diff --git a/src/matching.rs b/src/matching.rs index 317c2f6..1a4c2ba 100644 --- a/src/matching.rs +++ b/src/matching.rs @@ -87,34 +87,19 @@ pub async fn album_is_owned( Ok(false) } -/// Check if a specific track is "owned" — a track with matching artist + title exists. +/// Check if a specific track is "owned" — a track with the same recording MBID exists. +/// Uses MBID-only matching to avoid false positives from fuzzy name matching +/// (e.g., "Kill Me" incorrectly matching "How I Get Myself Killed"). pub async fn track_is_owned( conn: &DatabaseConnection, - artist_name: &str, - title: &str, + recording_mbid: Option<&str>, ) -> WatchResult { - let norm_artist = normalize(artist_name); - let norm_title = normalize(title); - - // Fuzzy: search tracks matching the title - let tracks = queries::tracks::search(conn, title).await?; - for track in &tracks { - let title_match = track - .title - .as_deref() - .map(|t| strsim::jaro_winkler(&norm_title, &normalize(t)) > 0.85) - .unwrap_or(false); - let artist_match = track - .artist - .as_deref() - .or(track.album_artist.as_deref()) - .map(|a| strsim::jaro_winkler(&norm_artist, &normalize(a)) > 0.85) - .unwrap_or(false); - if title_match && artist_match { + if let Some(mbid) = recording_mbid { + let all_tracks = queries::tracks::get_by_mbid(conn, mbid).await?; + if !all_tracks.is_empty() { return Ok(true); } } - Ok(false) }