From 17fd738774bab8a2d31c803d95976bba768bec7c Mon Sep 17 00:00:00 2001 From: Connor Johnstone Date: Wed, 25 Mar 2026 21:50:09 -0400 Subject: [PATCH] I **think** I've at least 99% fixed the top songs mismatch --- src/lib.rs | 4 +- src/library.rs | 384 +++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 361 insertions(+), 27 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index bdcabf4..63c58ce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,6 @@ pub mod matching; pub use error::{WatchError, WatchResult}; pub use library::{ - AddSummary, LibrarySummary, WatchListEntry, add_album, add_artist, add_track, library_summary, - list_items, remove_item, + AddSummary, DiscRecording, LibrarySummary, WatchListEntry, add_album, add_artist, add_track, + library_summary, list_items, remove_item, resolve_from_discography, }; diff --git a/src/library.rs b/src/library.rs index 1f309c3..bb94b17 100644 --- a/src/library.rs +++ b/src/library.rs @@ -3,6 +3,7 @@ use std::fmt; use sea_orm::DatabaseConnection; use serde::{Deserialize, Serialize}; +use shanty_db::entities::artist; use shanty_db::entities::wanted_item::{ItemType, WantedStatus}; use shanty_db::queries; use shanty_tag::provider::MetadataProvider; @@ -10,6 +11,68 @@ use shanty_tag::provider::MetadataProvider; use crate::error::{WatchError, WatchResult}; use crate::matching; +/// A recording from an artist's discography, with release group context. +/// Used in the `artist_known_recordings` cache to resolve top song titles +/// to the correct recording MBID (preferring album over EP/single, older over newer). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DiscRecording { + pub mbid: String, + pub title: String, + pub rg_type: String, + pub rg_date: Option, +} + +/// Normalize a title for comparison: lowercase + replace smart apostrophes/quotes. +fn normalize_title(s: &str) -> String { + s.to_lowercase() + .replace(['\u{2019}', '\u{2018}'], "'") + .replace(['\u{201C}', '\u{201D}'], "\"") +} + +/// Resolve a song title to the best matching recording from an artist's discography. +/// +/// Uses fuzzy title matching (Jaro-Winkler > 0.85). When multiple recordings match, +/// prefers: Album > EP > Single, then older release date. +/// Returns `None` if no match exceeds the threshold. +pub fn resolve_from_discography<'a>( + title: &str, + recordings: &'a [DiscRecording], +) -> Option<&'a DiscRecording> { + let title_norm = normalize_title(title); + let mut matches: Vec<(f64, &DiscRecording)> = recordings + .iter() + .map(|r| { + ( + strsim::jaro_winkler(&title_norm, &normalize_title(&r.title)), + r, + ) + }) + .filter(|(score, _)| *score > 0.85) + .collect(); + + if matches.is_empty() { + return None; + } + + matches.sort_by(|(score_a, a), (score_b, b)| { + score_b + .partial_cmp(score_a) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| rg_type_priority(&a.rg_type).cmp(&rg_type_priority(&b.rg_type))) + .then_with(|| a.rg_date.cmp(&b.rg_date)) + }); + + matches.first().map(|(_, r)| *r) +} + +fn rg_type_priority(rg_type: &str) -> u8 { + match rg_type { + "Album" => 0, + "EP" => 1, + _ => 2, + } +} + /// A display-friendly watchlist entry with resolved names. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct WatchListEntry { @@ -120,6 +183,7 @@ pub async fn add_artist( let mut summary = AddSummary::default(); let mut seen_mbids: std::collections::HashSet = std::collections::HashSet::new(); + let mut disc_recordings: Vec = Vec::new(); for rg in &release_groups { // Resolve a concrete release MBID from the release group @@ -139,6 +203,9 @@ pub async fn add_artist( tracing::info!(title = %rg.title, release_mbid = %release_mbid, "fetching tracks for release group"); + let rg_type = rg.primary_type.clone().unwrap_or_default(); + let rg_date = rg.first_release_date.clone(); + let tracks = match provider.get_release_tracks(&release_mbid).await { Ok(t) => t, Err(e) => { @@ -149,6 +216,14 @@ pub async fn add_artist( }; for track in &tracks { + // Build discography cache entry for every recording + disc_recordings.push(DiscRecording { + mbid: track.recording_mbid.clone(), + title: track.title.clone(), + rg_type: rg_type.clone(), + rg_date: rg_date.clone(), + }); + if !seen_mbids.insert(track.recording_mbid.clone()) { continue; // Already processed this recording in another release group } @@ -172,6 +247,12 @@ pub async fn add_artist( } } + // Populate the discography cache so subsequent top-song watches resolve instantly + if let Ok(json) = serde_json::to_string(&disc_recordings) { + let cache_key = format!("artist_known_recordings:{artist_mbid}"); + let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await; + } + tracing::info!(%summary, "artist watch complete"); Ok(summary) } @@ -253,19 +334,129 @@ pub async fn add_track( provider: &impl MetadataProvider, user_id: Option, ) -> WatchResult { - let (resolved_title, resolved_artist, resolved_mbid, resolved_artist_mbid) = + // Fast path: if we have artist name + title, try to resolve the MBID directly from + // the cached discography (no MB API calls needed). This makes top song watches instant. + if let (Some(a), Some(t)) = ( + artist_name.filter(|s| !s.is_empty()), + title.filter(|s| !s.is_empty()), + ) && let Ok(Some(existing_artist)) = queries::artists::find_by_name(conn, a).await + && let Some(ref artist_mbid) = existing_artist.musicbrainz_id + && let Some(disc) = load_and_resolve_discography(conn, artist_mbid, t).await + { + tracing::info!( + title = t, + mbid = %disc.mbid, + rg_type = %disc.rg_type, + "resolved MBID from cached discography (fast path)" + ); + let artist = queries::artists::upsert(conn, a, Some(artist_mbid)).await?; + return finish_add_track(conn, t, a, Some(disc.mbid.clone()), artist, user_id).await; + } + + // Slow path: resolve via MB API + let (resolved_title, resolved_artist, _resolved_mbid, resolved_artist_mbid) = resolve_track_info(artist_name, title, musicbrainz_id, provider).await?; let artist = queries::artists::upsert(conn, &resolved_artist, resolved_artist_mbid.as_deref()).await?; - let is_owned = matching::track_is_owned(conn, &resolved_artist, &resolved_title).await?; + + // Try to resolve an MBID from the artist's discography using fuzzy title matching. + // This ensures the wanted item's MBID matches a recording on the displayed release group. + let disc_mbid = if let Some(ref artist_mbid) = artist.musicbrainz_id { + let recordings = load_or_build_discography(conn, artist_mbid, provider).await; + resolve_from_discography(&resolved_title, &recordings).map(|d| d.mbid.clone()) + } else { + None + }; + + if disc_mbid.is_none() { + tracing::warn!( + title = %resolved_title, + artist = %resolved_artist, + "no discography match found — track will not appear on artist page" + ); + } + + finish_add_track( + conn, + &resolved_title, + &resolved_artist, + disc_mbid, + artist, + user_id, + ) + .await +} + +/// Shared tail of `add_track`: dedup check, create wanted_item, detect ownership. +async fn finish_add_track( + conn: &DatabaseConnection, + title: &str, + artist_name: &str, + recording_mbid: Option, + artist: artist::Model, + user_id: Option, +) -> WatchResult { + // Dedup: skip if a wanted_item with this MBID already exists + if let Some(ref mbid) = recording_mbid + && let Ok(Some(existing)) = queries::wanted::find_by_mbid(conn, mbid).await + { + let artist_name_resolved = if let Some(aid) = existing.artist_id { + queries::artists::get_by_id(conn, aid) + .await + .map(|a| a.name) + .ok() + } else { + Some(artist.name.clone()) + }; + return Ok(WatchListEntry { + id: existing.id, + item_type: existing.item_type, + name: existing.name, + artist_name: artist_name_resolved, + status: existing.status, + added_at: existing.added_at, + }); + } + + // Dedup: skip if a wanted_item with same name + artist already exists + let all_wanted = queries::wanted::list(conn, None, None).await?; + let title_lower = title.to_lowercase(); + if let Some(existing) = all_wanted + .iter() + .find(|w| w.artist_id == Some(artist.id) && w.name.to_lowercase() == title_lower) + { + // Update stale MBID if the new one differs (e.g., resolved from discography) + if let Some(ref new_mbid) = recording_mbid + && existing.musicbrainz_id.as_deref() != Some(new_mbid) + { + let _ = queries::wanted::update_mbid(conn, existing.id, new_mbid).await; + tracing::info!( + id = existing.id, + old_mbid = ?existing.musicbrainz_id, + new_mbid = %new_mbid, + title = title, + "updated stale MBID on existing wanted item" + ); + } + return Ok(WatchListEntry { + id: existing.id, + item_type: existing.item_type, + name: existing.name.clone(), + artist_name: Some(artist.name), + status: existing.status, + added_at: existing.added_at, + }); + } + + let is_owned = matching::track_is_owned(conn, artist_name, title).await?; let item = queries::wanted::add( conn, queries::wanted::AddWantedItem { item_type: ItemType::Track, - name: &resolved_title, - musicbrainz_id: resolved_mbid.as_deref(), + name: title, + musicbrainz_id: recording_mbid.as_deref(), artist_id: Some(artist.id), album_id: None, track_id: None, @@ -284,13 +475,101 @@ pub async fn add_track( Ok(WatchListEntry { id: item.id, item_type: ItemType::Track, - name: resolved_title, + name: title.to_string(), artist_name: Some(artist.name), status, added_at: item.added_at, }) } +/// Load the discography cache and resolve a title. Returns None if cache doesn't exist +/// or title doesn't match. +async fn load_and_resolve_discography( + conn: &DatabaseConnection, + artist_mbid: &str, + title: &str, +) -> Option { + let cache_key = format!("artist_known_recordings:{artist_mbid}"); + let json = queries::cache::get(conn, &cache_key).await.ok()??; + let recordings: Vec = serde_json::from_str(&json).ok()?; + resolve_from_discography(title, &recordings).cloned() +} + +/// Load the discography cache, or build it from the detail page's cached release group +/// tracks (`artist_rg_tracks:*`). This ensures the MBIDs match what the detail page displays. +/// Falls back to MB API only if no detail page caches exist. +async fn load_or_build_discography( + conn: &DatabaseConnection, + artist_mbid: &str, + provider: &impl MetadataProvider, +) -> Vec { + let cache_key = format!("artist_known_recordings:{artist_mbid}"); + + // Try loading from cache (handle both new Vec format and old HashMap format) + if let Ok(Some(json)) = queries::cache::get(conn, &cache_key).await { + if let Ok(recordings) = serde_json::from_str::>(&json) { + return recordings; + } + // Old format — fall through to rebuild + tracing::debug!(artist_mbid, "rebuilding discography cache (old format)"); + } + + // Build from the detail page's cached release group tracks. + // These are the source of truth for which recording MBIDs the detail page will display. + let mut recordings = Vec::new(); + if let Ok(release_groups) = provider.get_artist_release_groups(artist_mbid).await { + for rg in &release_groups { + if rg.featured || !rg.secondary_types.is_empty() { + continue; + } + let rg_type = rg.primary_type.clone().unwrap_or_default(); + let rg_date = rg.first_release_date.clone(); + + // Prefer the detail page's cached tracks for this release group + let rg_cache_key = format!("artist_rg_tracks:{}", rg.mbid); + if let Ok(Some(json)) = queries::cache::get(conn, &rg_cache_key).await + && let Ok(cached) = serde_json::from_str::(&json) + && let Some(tracks) = cached.get("tracks").and_then(|t| t.as_array()) + { + for t in tracks { + if let (Some(mbid), Some(title)) = ( + t.get("recording_mbid").and_then(|v| v.as_str()), + t.get("title").and_then(|v| v.as_str()), + ) { + recordings.push(DiscRecording { + mbid: mbid.to_string(), + title: title.to_string(), + rg_type: rg_type.clone(), + rg_date: rg_date.clone(), + }); + } + } + continue; // Got tracks from detail page cache, skip API + } + + // No detail page cache — fetch from API + let release_id = rg.first_release_mbid.as_deref().unwrap_or(&rg.mbid); + if let Ok(tracks) = provider.get_release_tracks(release_id).await { + for t in &tracks { + recordings.push(DiscRecording { + mbid: t.recording_mbid.clone(), + title: t.title.clone(), + rg_type: rg_type.clone(), + rg_date: rg_date.clone(), + }); + } + } + } + } + + // Cache for 7 days + if let Ok(json) = serde_json::to_string(&recordings) { + let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await; + } + + recordings +} + /// Internal: add a single track wanted item. Returns Ok(true) if added as Wanted, /// Ok(false) if already owned. async fn add_track_inner( @@ -310,6 +589,18 @@ async fn add_track_inner( } let artist = queries::artists::upsert(conn, artist_name, artist_mbid).await?; + + // Also check by name + artist_id to catch race conditions (rapid double-clicks) + let all_wanted = queries::wanted::list(conn, None, None).await?; + let title_lower = title.to_lowercase(); + if all_wanted + .iter() + .any(|w| w.artist_id == Some(artist.id) && w.name.to_lowercase() == title_lower) + { + tracing::debug!(title = title, "already in watchlist by name, skipping"); + return Ok(false); + } + let is_owned = matching::track_is_owned(conn, artist_name, title).await?; let item = queries::wanted::add( @@ -407,38 +698,81 @@ async fn resolve_album_info( /// Resolve track info from MBID if needed. /// Returns (title, artist_name, recording_mbid, artist_mbid). +/// When an MBID is provided, validates it via MB lookup. If the MBID is stale/invalid, +/// falls back to searching by artist+title to find the correct recording MBID. async fn resolve_track_info( artist_name: Option<&str>, title: Option<&str>, mbid: Option<&str>, provider: &impl MetadataProvider, ) -> WatchResult<(String, String, Option, Option)> { + let has_name = title.filter(|s| !s.is_empty()).is_some() + && artist_name.filter(|s| !s.is_empty()).is_some(); + + // If we have an MBID, validate it via lookup + if let Some(mbid) = mbid.filter(|s| !s.is_empty()) { + match provider.get_recording(mbid).await { + Ok(details) => { + return Ok(( + title + .filter(|s| !s.is_empty()) + .map(String::from) + .unwrap_or(details.title), + artist_name + .filter(|s| !s.is_empty()) + .map(String::from) + .unwrap_or(details.artist), + Some(mbid.to_string()), + details.artist_mbid, + )); + } + Err(e) => { + // MBID is stale/invalid — fall back to search if we have name info + if has_name { + tracing::warn!(mbid = mbid, error = %e, "MBID validation failed, falling back to search"); + } else { + return Err(WatchError::Other(format!( + "MusicBrainz lookup failed for {mbid}: {e}" + ))); + } + } + } + } + + // Search by artist + title if let (Some(t), Some(a)) = ( title.filter(|s| !s.is_empty()), artist_name.filter(|s| !s.is_empty()), ) { - return Ok((t.to_string(), a.to_string(), mbid.map(String::from), None)); + // Try to find the correct recording MBID via search + let results = provider + .search_recording(a, t) + .await + .map_err(|e| WatchError::Other(format!("MusicBrainz search failed: {e}")))?; + + // Prefer a result credited to this artist (by artist name match) + // to avoid picking up compilation/VA versions of the same song + let a_lower = a.to_lowercase(); + let best = results + .iter() + .find(|r| r.artist.to_lowercase() == a_lower) + .or(results.first()); + + if let Some(best) = best { + return Ok(( + t.to_string(), + a.to_string(), + Some(best.mbid.clone()), + best.artist_mbid.clone(), + )); + } + + // No search results — return without MBID + return Ok((t.to_string(), a.to_string(), None, None)); } - let mbid = - mbid.ok_or_else(|| WatchError::Other("either artist+title or --mbid is required".into()))?; - - let details = provider - .get_recording(mbid) - .await - .map_err(|e| WatchError::Other(format!("MusicBrainz lookup failed: {e}")))?; - - Ok(( - title - .filter(|s| !s.is_empty()) - .map(String::from) - .unwrap_or(details.title), - artist_name - .filter(|s| !s.is_empty()) - .map(String::from) - .unwrap_or(details.artist), - Some(mbid.to_string()), - details.artist_mbid, + Err(WatchError::Other( + "either artist+title or a valid MBID is required".into(), )) }