Unified the track logic. Seems to work much better

This commit is contained in:
Connor Johnstone
2026-03-26 17:38:16 -04:00
parent 17fd738774
commit de2847d41c
2 changed files with 63 additions and 130 deletions
+56 -108
View File
@@ -156,103 +156,63 @@ pub async fn add_artist(
}
};
tracing::info!(name = %resolved_name, mbid = %artist_mbid, "fetching discography (release groups)");
tracing::info!(name = %resolved_name, mbid = %artist_mbid, "loading discography");
let all_release_groups = provider
.get_artist_release_groups(&artist_mbid)
.await
.map_err(|e| WatchError::Other(format!("failed to fetch release groups: {e}")))?;
// Use the unified discography source — same MBIDs the detail page displays.
// This reads from artist_rg_tracks caches (populated by enrich_artist),
// ensuring wanted_items always have MBIDs that match the detail page.
let disc = load_or_build_discography(conn, &artist_mbid, provider).await;
// Only include release groups where this artist is the primary credit,
// filtered by allowed secondary types (same as the artist detail page)
let release_groups: Vec<_> = all_release_groups
.into_iter()
.filter(|rg| !rg.featured)
.filter(|rg| {
if rg.secondary_types.is_empty() {
if disc.is_empty() {
tracing::warn!(
name = %resolved_name,
mbid = %artist_mbid,
"no discography data available — visit the artist page first to populate caches"
);
return Ok(AddSummary::default());
}
// Deduplicate by MBID and expand into wanted items
let mut summary = AddSummary::default();
let mut seen_mbids: std::collections::HashSet<String> = std::collections::HashSet::new();
// Filter by allowed secondary types
let filtered: Vec<_> = disc
.iter()
.filter(|r| {
if r.rg_type.is_empty() || r.rg_type == "Album" {
true
} else {
rg.secondary_types
.iter()
.all(|st| allowed_secondary_types.contains(st))
allowed_secondary_types.iter().any(|st| st == &r.rg_type)
}
})
.collect();
tracing::info!(count = release_groups.len(), "found release groups");
tracing::info!(total = disc.len(), filtered = filtered.len(), "discography loaded");
let mut summary = AddSummary::default();
let mut seen_mbids: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut disc_recordings: Vec<DiscRecording> = Vec::new();
for rg in &release_groups {
// Resolve a concrete release MBID from the release group
let release_mbid = if let Some(ref rid) = rg.first_release_mbid {
rid.clone()
} else {
// Browse releases for this release group to find a concrete release
match provider.resolve_release_from_group(&rg.mbid).await {
Ok(rid) => rid,
Err(e) => {
tracing::warn!(rg = %rg.title, rg_id = %rg.mbid, error = %e, "failed to resolve release from group");
summary.errors += 1;
continue;
}
}
};
tracing::info!(title = %rg.title, release_mbid = %release_mbid, "fetching tracks for release group");
let rg_type = rg.primary_type.clone().unwrap_or_default();
let rg_date = rg.first_release_date.clone();
let tracks = match provider.get_release_tracks(&release_mbid).await {
Ok(t) => t,
for rec in &filtered {
if !seen_mbids.insert(rec.mbid.clone()) {
continue; // Already processed this recording
}
match add_track_inner(
conn,
&resolved_name,
&rec.title,
Some(&rec.mbid),
Some(&artist_mbid),
user_id,
)
.await
{
Ok(true) => summary.tracks_added += 1,
Ok(false) => summary.tracks_already_owned += 1,
Err(e) => {
tracing::warn!(rg = %rg.title, error = %e, "failed to fetch tracks");
tracing::warn!(track = %rec.title, error = %e, "failed to add track");
summary.errors += 1;
continue;
}
};
for track in &tracks {
// Build discography cache entry for every recording
disc_recordings.push(DiscRecording {
mbid: track.recording_mbid.clone(),
title: track.title.clone(),
rg_type: rg_type.clone(),
rg_date: rg_date.clone(),
});
if !seen_mbids.insert(track.recording_mbid.clone()) {
continue; // Already processed this recording in another release group
}
match add_track_inner(
conn,
&resolved_name,
&track.title,
Some(&track.recording_mbid),
Some(&artist_mbid),
user_id,
)
.await
{
Ok(true) => summary.tracks_added += 1,
Ok(false) => summary.tracks_already_owned += 1,
Err(e) => {
tracing::warn!(track = %track.title, error = %e, "failed to add track");
summary.errors += 1;
}
}
}
}
// Populate the discography cache so subsequent top-song watches resolve instantly
if let Ok(json) = serde_json::to_string(&disc_recordings) {
let cache_key = format!("artist_known_recordings:{artist_mbid}");
let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await;
}
tracing::info!(%summary, "artist watch complete");
Ok(summary)
}
@@ -449,7 +409,7 @@ async fn finish_add_track(
});
}
let is_owned = matching::track_is_owned(conn, artist_name, title).await?;
let is_owned = matching::track_is_owned(conn, recording_mbid.as_deref()).await?;
let item = queries::wanted::add(
conn,
@@ -496,8 +456,8 @@ async fn load_and_resolve_discography(
}
/// Load the discography cache, or build it from the detail page's cached release group
/// tracks (`artist_rg_tracks:*`). This ensures the MBIDs match what the detail page displays.
/// Falls back to MB API only if no detail page caches exist.
/// tracks (`artist_rg_tracks:*`). Only uses caches populated by enrich_artist() — never
/// fetches from MB API independently, to ensure MBIDs always match the detail page.
async fn load_or_build_discography(
conn: &DatabaseConnection,
artist_mbid: &str,
@@ -505,17 +465,17 @@ async fn load_or_build_discography(
) -> Vec<DiscRecording> {
let cache_key = format!("artist_known_recordings:{artist_mbid}");
// Try loading from cache (handle both new Vec format and old HashMap format)
// Try the pre-built known_recordings cache first
if let Ok(Some(json)) = queries::cache::get(conn, &cache_key).await {
if let Ok(recordings) = serde_json::from_str::<Vec<DiscRecording>>(&json) {
return recordings;
}
// Old format — fall through to rebuild
tracing::debug!(artist_mbid, "rebuilding discography cache (old format)");
}
// Build from the detail page's cached release group tracks.
// These are the source of truth for which recording MBIDs the detail page will display.
// Build from the detail page's cached release group tracks (artist_rg_tracks:*).
// These are populated by enrich_artist() and are the source of truth.
// We only use cached data — never fetch from MB API here to avoid MBID divergence.
let mut recordings = Vec::new();
if let Ok(release_groups) = provider.get_artist_release_groups(artist_mbid).await {
for rg in &release_groups {
@@ -525,7 +485,6 @@ async fn load_or_build_discography(
let rg_type = rg.primary_type.clone().unwrap_or_default();
let rg_date = rg.first_release_date.clone();
// Prefer the detail page's cached tracks for this release group
let rg_cache_key = format!("artist_rg_tracks:{}", rg.mbid);
if let Ok(Some(json)) = queries::cache::get(conn, &rg_cache_key).await
&& let Ok(cached) = serde_json::from_str::<serde_json::Value>(&json)
@@ -544,27 +503,16 @@ async fn load_or_build_discography(
});
}
}
continue; // Got tracks from detail page cache, skip API
}
// No detail page cache — fetch from API
let release_id = rg.first_release_mbid.as_deref().unwrap_or(&rg.mbid);
if let Ok(tracks) = provider.get_release_tracks(release_id).await {
for t in &tracks {
recordings.push(DiscRecording {
mbid: t.recording_mbid.clone(),
title: t.title.clone(),
rg_type: rg_type.clone(),
rg_date: rg_date.clone(),
});
}
}
// No cache for this RG = skip it (enrich_artist hasn't fetched it yet)
}
}
// Cache for 7 days
if let Ok(json) = serde_json::to_string(&recordings) {
let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await;
// Cache the result if we found anything
if !recordings.is_empty() {
if let Ok(json) = serde_json::to_string(&recordings) {
let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await;
}
}
recordings
@@ -601,7 +549,7 @@ async fn add_track_inner(
return Ok(false);
}
let is_owned = matching::track_is_owned(conn, artist_name, title).await?;
let is_owned = matching::track_is_owned(conn, recording_mbid).await?;
let item = queries::wanted::add(
conn,
+7 -22
View File
@@ -87,34 +87,19 @@ pub async fn album_is_owned(
Ok(false)
}
/// Check if a specific track is "owned" — a track with matching artist + title exists.
/// Check if a specific track is "owned" — a track with the same recording MBID exists.
/// Uses MBID-only matching to avoid false positives from fuzzy name matching
/// (e.g., "Kill Me" incorrectly matching "How I Get Myself Killed").
pub async fn track_is_owned(
conn: &DatabaseConnection,
artist_name: &str,
title: &str,
recording_mbid: Option<&str>,
) -> WatchResult<bool> {
let norm_artist = normalize(artist_name);
let norm_title = normalize(title);
// Fuzzy: search tracks matching the title
let tracks = queries::tracks::search(conn, title).await?;
for track in &tracks {
let title_match = track
.title
.as_deref()
.map(|t| strsim::jaro_winkler(&norm_title, &normalize(t)) > 0.85)
.unwrap_or(false);
let artist_match = track
.artist
.as_deref()
.or(track.album_artist.as_deref())
.map(|a| strsim::jaro_winkler(&norm_artist, &normalize(a)) > 0.85)
.unwrap_or(false);
if title_match && artist_match {
if let Some(mbid) = recording_mbid {
let all_tracks = queries::tracks::get_by_mbid(conn, mbid).await?;
if !all_tracks.is_empty() {
return Ok(true);
}
}
Ok(false)
}