Unified the track logic. Seems to work much better

2026-03-26 17:38:16 -04:00
parent 17fd738774
commit de2847d41c
2 changed files with 63 additions and 130 deletions
@@ -156,103 +156,63 @@ pub async fn add_artist(
        }
    };

-    tracing::info!(name = %resolved_name, mbid = %artist_mbid, "fetching discography (release groups)");
+    tracing::info!(name = %resolved_name, mbid = %artist_mbid, "loading discography");

-    let all_release_groups = provider
-        .get_artist_release_groups(&artist_mbid)
-        .await
-        .map_err(|e| WatchError::Other(format!("failed to fetch release groups: {e}")))?;
+    // Use the unified discography source — same MBIDs the detail page displays.
+    // This reads from artist_rg_tracks caches (populated by enrich_artist),
+    // ensuring wanted_items always have MBIDs that match the detail page.
+    let disc = load_or_build_discography(conn, &artist_mbid, provider).await;

-    // Only include release groups where this artist is the primary credit,
-    // filtered by allowed secondary types (same as the artist detail page)
-    let release_groups: Vec<_> = all_release_groups
-        .into_iter()
-        .filter(|rg| !rg.featured)
-        .filter(|rg| {
-            if rg.secondary_types.is_empty() {
+    if disc.is_empty() {
+        tracing::warn!(
+            name = %resolved_name,
+            mbid = %artist_mbid,
+            "no discography data available — visit the artist page first to populate caches"
+        );
+        return Ok(AddSummary::default());
+    }
+
+    // Deduplicate by MBID and expand into wanted items
+    let mut summary = AddSummary::default();
+    let mut seen_mbids: std::collections::HashSet<String> = std::collections::HashSet::new();
+
+    // Filter by allowed secondary types
+    let filtered: Vec<_> = disc
+        .iter()
+        .filter(|r| {
+            if r.rg_type.is_empty() || r.rg_type == "Album" {
                true
            } else {
-                rg.secondary_types
-                    .iter()
-                    .all(|st| allowed_secondary_types.contains(st))
+                allowed_secondary_types.iter().any(|st| st == &r.rg_type)
            }
        })
        .collect();

-    tracing::info!(count = release_groups.len(), "found release groups");
+    tracing::info!(total = disc.len(), filtered = filtered.len(), "discography loaded");

-    let mut summary = AddSummary::default();
-    let mut seen_mbids: std::collections::HashSet<String> = std::collections::HashSet::new();
-    let mut disc_recordings: Vec<DiscRecording> = Vec::new();
-
-    for rg in &release_groups {
-        // Resolve a concrete release MBID from the release group
-        let release_mbid = if let Some(ref rid) = rg.first_release_mbid {
-            rid.clone()
-        } else {
-            // Browse releases for this release group to find a concrete release
-            match provider.resolve_release_from_group(&rg.mbid).await {
-                Ok(rid) => rid,
-                Err(e) => {
-                    tracing::warn!(rg = %rg.title, rg_id = %rg.mbid, error = %e, "failed to resolve release from group");
-                    summary.errors += 1;
-                    continue;
-                }
-            }
-        };
-
-        tracing::info!(title = %rg.title, release_mbid = %release_mbid, "fetching tracks for release group");
-
-        let rg_type = rg.primary_type.clone().unwrap_or_default();
-        let rg_date = rg.first_release_date.clone();
-
-        let tracks = match provider.get_release_tracks(&release_mbid).await {
-            Ok(t) => t,
+    for rec in &filtered {
+        if !seen_mbids.insert(rec.mbid.clone()) {
+            continue; // Already processed this recording
+        }
+        match add_track_inner(
+            conn,
+            &resolved_name,
+            &rec.title,
+            Some(&rec.mbid),
+            Some(&artist_mbid),
+            user_id,
+        )
+        .await
+        {
+            Ok(true) => summary.tracks_added += 1,
+            Ok(false) => summary.tracks_already_owned += 1,
            Err(e) => {
-                tracing::warn!(rg = %rg.title, error = %e, "failed to fetch tracks");
+                tracing::warn!(track = %rec.title, error = %e, "failed to add track");
                summary.errors += 1;
-                continue;
-            }
-        };
-
-        for track in &tracks {
-            // Build discography cache entry for every recording
-            disc_recordings.push(DiscRecording {
-                mbid: track.recording_mbid.clone(),
-                title: track.title.clone(),
-                rg_type: rg_type.clone(),
-                rg_date: rg_date.clone(),
-            });
-
-            if !seen_mbids.insert(track.recording_mbid.clone()) {
-                continue; // Already processed this recording in another release group
-            }
-            match add_track_inner(
-                conn,
-                &resolved_name,
-                &track.title,
-                Some(&track.recording_mbid),
-                Some(&artist_mbid),
-                user_id,
-            )
-            .await
-            {
-                Ok(true) => summary.tracks_added += 1,
-                Ok(false) => summary.tracks_already_owned += 1,
-                Err(e) => {
-                    tracing::warn!(track = %track.title, error = %e, "failed to add track");
-                    summary.errors += 1;
-                }
            }
        }
    }

-    // Populate the discography cache so subsequent top-song watches resolve instantly
-    if let Ok(json) = serde_json::to_string(&disc_recordings) {
-        let cache_key = format!("artist_known_recordings:{artist_mbid}");
-        let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await;
-    }
-
    tracing::info!(%summary, "artist watch complete");
    Ok(summary)
 }
@@ -449,7 +409,7 @@ async fn finish_add_track(
        });
    }

-    let is_owned = matching::track_is_owned(conn, artist_name, title).await?;
+    let is_owned = matching::track_is_owned(conn, recording_mbid.as_deref()).await?;

    let item = queries::wanted::add(
        conn,
@@ -496,8 +456,8 @@ async fn load_and_resolve_discography(
 }

 /// Load the discography cache, or build it from the detail page's cached release group
-/// tracks (`artist_rg_tracks:*`). This ensures the MBIDs match what the detail page displays.
-/// Falls back to MB API only if no detail page caches exist.
+/// tracks (`artist_rg_tracks:*`). Only uses caches populated by enrich_artist() — never
+/// fetches from MB API independently, to ensure MBIDs always match the detail page.
 async fn load_or_build_discography(
    conn: &DatabaseConnection,
    artist_mbid: &str,
@@ -505,17 +465,17 @@ async fn load_or_build_discography(
 ) -> Vec<DiscRecording> {
    let cache_key = format!("artist_known_recordings:{artist_mbid}");

-    // Try loading from cache (handle both new Vec format and old HashMap format)
+    // Try the pre-built known_recordings cache first
    if let Ok(Some(json)) = queries::cache::get(conn, &cache_key).await {
        if let Ok(recordings) = serde_json::from_str::<Vec<DiscRecording>>(&json) {
            return recordings;
        }
-        // Old format — fall through to rebuild
        tracing::debug!(artist_mbid, "rebuilding discography cache (old format)");
    }

-    // Build from the detail page's cached release group tracks.
-    // These are the source of truth for which recording MBIDs the detail page will display.
+    // Build from the detail page's cached release group tracks (artist_rg_tracks:*).
+    // These are populated by enrich_artist() and are the source of truth.
+    // We only use cached data — never fetch from MB API here to avoid MBID divergence.
    let mut recordings = Vec::new();
    if let Ok(release_groups) = provider.get_artist_release_groups(artist_mbid).await {
        for rg in &release_groups {
@@ -525,7 +485,6 @@ async fn load_or_build_discography(
            let rg_type = rg.primary_type.clone().unwrap_or_default();
            let rg_date = rg.first_release_date.clone();

-            // Prefer the detail page's cached tracks for this release group
            let rg_cache_key = format!("artist_rg_tracks:{}", rg.mbid);
            if let Ok(Some(json)) = queries::cache::get(conn, &rg_cache_key).await
                && let Ok(cached) = serde_json::from_str::<serde_json::Value>(&json)
@@ -544,27 +503,16 @@ async fn load_or_build_discography(
                        });
                    }
                }
-                continue; // Got tracks from detail page cache, skip API
-            }
-
-            // No detail page cache — fetch from API
-            let release_id = rg.first_release_mbid.as_deref().unwrap_or(&rg.mbid);
-            if let Ok(tracks) = provider.get_release_tracks(release_id).await {
-                for t in &tracks {
-                    recordings.push(DiscRecording {
-                        mbid: t.recording_mbid.clone(),
-                        title: t.title.clone(),
-                        rg_type: rg_type.clone(),
-                        rg_date: rg_date.clone(),
-                    });
-                }
            }
+            // No cache for this RG = skip it (enrich_artist hasn't fetched it yet)
        }
    }

-    // Cache for 7 days
-    if let Ok(json) = serde_json::to_string(&recordings) {
-        let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await;
+    // Cache the result if we found anything
+    if !recordings.is_empty() {
+        if let Ok(json) = serde_json::to_string(&recordings) {
+            let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await;
+        }
    }

    recordings
@@ -601,7 +549,7 @@ async fn add_track_inner(
        return Ok(false);
    }

-    let is_owned = matching::track_is_owned(conn, artist_name, title).await?;
+    let is_owned = matching::track_is_owned(conn, recording_mbid).await?;

    let item = queries::wanted::add(
        conn,
@@ -87,34 +87,19 @@ pub async fn album_is_owned(
    Ok(false)
 }

-/// Check if a specific track is "owned" — a track with matching artist + title exists.
+/// Check if a specific track is "owned" — a track with the same recording MBID exists.
+/// Uses MBID-only matching to avoid false positives from fuzzy name matching
+/// (e.g., "Kill Me" incorrectly matching "How I Get Myself Killed").
 pub async fn track_is_owned(
    conn: &DatabaseConnection,
-    artist_name: &str,
-    title: &str,
+    recording_mbid: Option<&str>,
 ) -> WatchResult<bool> {
-    let norm_artist = normalize(artist_name);
-    let norm_title = normalize(title);
-
-    // Fuzzy: search tracks matching the title
-    let tracks = queries::tracks::search(conn, title).await?;
-    for track in &tracks {
-        let title_match = track
-            .title
-            .as_deref()
-            .map(|t| strsim::jaro_winkler(&norm_title, &normalize(t)) > 0.85)
-            .unwrap_or(false);
-        let artist_match = track
-            .artist
-            .as_deref()
-            .or(track.album_artist.as_deref())
-            .map(|a| strsim::jaro_winkler(&norm_artist, &normalize(a)) > 0.85)
-            .unwrap_or(false);
-        if title_match && artist_match {
+    if let Some(mbid) = recording_mbid {
+        let all_tracks = queries::tracks::get_by_mbid(conn, mbid).await?;
+        if !all_tracks.is_empty() {
            return Ok(true);
        }
    }
-
    Ok(false)
 }