I **think** I've at least 99% fixed the top songs mismatch

2026-03-25 21:50:09 -04:00
parent 1a890b0c11
commit 159cdda386
3 changed files with 146 additions and 58 deletions
@@ -118,11 +118,18 @@ async fn list_artists(
            .iter()
            .filter(|w| w.artist_id == Some(a.id))
            .collect();
-        let total_watched = artist_wanted.len();
+        // Deduplicate by MBID to match the detail page's counting logic
+        let total_watched = artist_wanted
+            .iter()
+            .filter_map(|w| w.musicbrainz_id.as_deref())
+            .collect::<std::collections::HashSet<_>>()
+            .len();
        let total_owned = artist_wanted
            .iter()
            .filter(|w| w.status == WantedStatus::Owned)
-            .count();
+            .filter_map(|w| w.musicbrainz_id.as_deref())
+            .collect::<std::collections::HashSet<_>>()
+            .len();

        items.push(ArtistListItem {
            id: a.id,
@@ -435,39 +442,46 @@ pub async fn enrich_artist(
            }
        };

-        // Cross-reference with wanted items to add status (by MBID or by name for this artist)
+        // Cross-reference with wanted items to add status.
+        // Resolve each top song title → discography recording MBID → wanted_item.
+        // This uses the same fuzzy-match + album-preference logic as add_track,
+        // so the MBID is guaranteed to match a recording on the discography page.
        let all_wanted = queries::wanted::list(state.db.conn(), None, None).await?;
-        let artist_wanted: Vec<_> = all_wanted
+        let wanted_by_mbid: std::collections::HashMap<
+            &str,
+            &shanty_db::entities::wanted_item::Model,
+        > = all_wanted
            .iter()
            .filter(|w| id.is_some() && w.artist_id == id)
+            .filter_map(|w| w.musicbrainz_id.as_deref().map(|m| (m, w)))
            .collect();

+        // Load the discography cache for fuzzy title → MBID resolution
+        let disc_recordings: Vec<shanty_watch::DiscRecording> =
+            if let Some(ref artist_mbid) = artist.musicbrainz_id {
+                let cache_key = format!("artist_known_recordings:{artist_mbid}");
+                if let Ok(Some(json)) = queries::cache::get(state.db.conn(), &cache_key).await {
+                    serde_json::from_str(&json).unwrap_or_default()
+                } else {
+                    vec![]
+                }
+            } else {
+                vec![]
+            };
+
        tracks
            .iter()
            .map(|t| {
-                // Try matching by MBID first
-                let status = t
-                    .mbid
-                    .as_deref()
-                    .and_then(|track_mbid| {
-                        all_wanted
-                            .iter()
-                            .find(|w| w.musicbrainz_id.as_deref() == Some(track_mbid))
-                    })
-                    // Fall back to matching by title (case-insensitive) within this artist's wanted items
-                    .or_else(|| {
-                        let name_lower = t.name.to_lowercase();
-                        artist_wanted
-                            .iter()
-                            .find(|w| w.name.to_lowercase() == name_lower)
-                            .copied()
-                    })
-                    .map(|w| match w.status {
-                        WantedStatus::Owned => "owned",
-                        WantedStatus::Downloaded => "downloaded",
-                        WantedStatus::Wanted => "wanted",
-                        WantedStatus::Available => "available",
-                    });
+                // Resolve the top song title to a discography MBID, then look up the wanted item
+                let matched = shanty_watch::resolve_from_discography(&t.name, &disc_recordings)
+                    .and_then(|disc| wanted_by_mbid.get(disc.mbid.as_str()).copied());
+
+                let status = matched.map(|w| match w.status {
+                    WantedStatus::Owned => "owned",
+                    WantedStatus::Downloaded => "downloaded",
+                    WantedStatus::Wanted => "wanted",
+                    WantedStatus::Available => "available",
+                });
                serde_json::json!({
                    "name": t.name,
                    "playcount": t.playcount,
@@ -561,6 +575,7 @@ pub async fn enrich_artist(
    let mut seen_watched: std::collections::HashSet<String> = std::collections::HashSet::new();
    let mut seen_owned: std::collections::HashSet<String> = std::collections::HashSet::new();
    let mut albums: Vec<FullAlbumInfo> = Vec::new();
+    let mut disc_recordings: Vec<shanty_watch::DiscRecording> = Vec::new();

    for rg in &release_groups {
        if skip_track_fetch {
@@ -627,9 +642,20 @@ pub async fn enrich_artist(
        let mut owned: u32 = 0;
        let mut downloaded: u32 = 0;

+        let rg_type = rg.primary_type.clone().unwrap_or_default();
+        let rg_date = rg.first_release_date.clone();
+
        for track in &cached.tracks {
            let rec_id = &track.recording_mbid;

+            // Collect for known_recordings cache rebuild
+            disc_recordings.push(shanty_watch::DiscRecording {
+                mbid: rec_id.clone(),
+                title: track.title.clone(),
+                rg_type: rg_type.clone(),
+                rg_date: rg_date.clone(),
+            });
+
            // Add to artist-level unique available set
            seen_available.insert(rec_id.clone());

@@ -689,6 +715,17 @@ pub async fn enrich_artist(
        });
    }

+    // Rebuild the known_recordings cache from the detail page's actual track data.
+    // This ensures add_track's fast path uses MBIDs that match the displayed release groups.
+    if !skip_track_fetch
+        && !disc_recordings.is_empty()
+        && let Ok(json) = serde_json::to_string(&disc_recordings)
+    {
+        let cache_key = format!("artist_known_recordings:{mbid}");
+        let _ =
+            queries::cache::set(state.db.conn(), &cache_key, "computed", &json, 7 * 86400).await;
+    }
+
    // Sort: owned first, then partial, then wanted, then unwatched; within each by date
    albums.sort_by(|a, b| {
        let order = |s: &str| match s {
@@ -417,18 +417,32 @@ async fn process_tag(
        .map_err(|e| e.to_string())?;

    // Ensure a wanted_item exists for this track (marks imported files as Owned)
-    if let Some(ref mbid) = track.musicbrainz_id
-        && queries::wanted::find_by_mbid(conn, mbid)
+    // Check by MBID first, then by name+artist to avoid duplicates from MBID mismatches
+    let has_wanted = if let Some(ref mbid) = track.musicbrainz_id {
+        queries::wanted::find_by_mbid(conn, mbid)
            .await
            .map_err(|e| e.to_string())?
-            .is_none()
-    {
+            .is_some()
+    } else {
+        false
+    };
+    let has_wanted = has_wanted || {
+        // Also check by name + artist_id (normalize unicode dashes and case)
+        let all_wanted = queries::wanted::list(conn, None, None)
+            .await
+            .unwrap_or_default();
+        let title_norm = normalize_for_match(track.title.as_deref().unwrap_or(""));
+        all_wanted
+            .iter()
+            .any(|w| w.artist_id == track.artist_id && normalize_for_match(&w.name) == title_norm)
+    };
+    if !has_wanted {
        let item = queries::wanted::add(
            conn,
            queries::wanted::AddWantedItem {
                item_type: shanty_db::entities::wanted_item::ItemType::Track,
                name: track.title.as_deref().unwrap_or("Unknown"),
-                musicbrainz_id: Some(mbid),
+                musicbrainz_id: track.musicbrainz_id.as_deref(),
                artist_id: track.artist_id,
                album_id: track.album_id,
                track_id: Some(track.id),
@@ -574,3 +588,16 @@ async fn process_enrich(

    Ok(vec![])
 }
+
+/// Normalize a string for fuzzy matching: lowercase, replace unicode dashes/quotes with ASCII.
+fn normalize_for_match(s: &str) -> String {
+    s.to_lowercase()
+        .replace(
+            [
+                '\u{2010}', '\u{2011}', '\u{2012}', '\u{2013}', '\u{2014}', '\u{2015}',
+            ],
+            "-",
+        )
+        .replace(['\u{2018}', '\u{2019}'], "'")
+        .replace(['\u{201C}', '\u{201D}'], "\"")
+}