I **think** I've at least 99% fixed the top songs mismatch

This commit is contained in:
Connor Johnstone
2026-03-25 21:50:09 -04:00
parent 1a890b0c11
commit 159cdda386
3 changed files with 146 additions and 58 deletions
+64 -27
View File
@@ -118,11 +118,18 @@ async fn list_artists(
.iter()
.filter(|w| w.artist_id == Some(a.id))
.collect();
let total_watched = artist_wanted.len();
// Deduplicate by MBID to match the detail page's counting logic
let total_watched = artist_wanted
.iter()
.filter_map(|w| w.musicbrainz_id.as_deref())
.collect::<std::collections::HashSet<_>>()
.len();
let total_owned = artist_wanted
.iter()
.filter(|w| w.status == WantedStatus::Owned)
.count();
.filter_map(|w| w.musicbrainz_id.as_deref())
.collect::<std::collections::HashSet<_>>()
.len();
items.push(ArtistListItem {
id: a.id,
@@ -435,39 +442,46 @@ pub async fn enrich_artist(
}
};
// Cross-reference with wanted items to add status (by MBID or by name for this artist)
// Cross-reference with wanted items to add status.
// Resolve each top song title → discography recording MBID → wanted_item.
// This uses the same fuzzy-match + album-preference logic as add_track,
// so the MBID is guaranteed to match a recording on the discography page.
let all_wanted = queries::wanted::list(state.db.conn(), None, None).await?;
let artist_wanted: Vec<_> = all_wanted
let wanted_by_mbid: std::collections::HashMap<
&str,
&shanty_db::entities::wanted_item::Model,
> = all_wanted
.iter()
.filter(|w| id.is_some() && w.artist_id == id)
.filter_map(|w| w.musicbrainz_id.as_deref().map(|m| (m, w)))
.collect();
// Load the discography cache for fuzzy title → MBID resolution
let disc_recordings: Vec<shanty_watch::DiscRecording> =
if let Some(ref artist_mbid) = artist.musicbrainz_id {
let cache_key = format!("artist_known_recordings:{artist_mbid}");
if let Ok(Some(json)) = queries::cache::get(state.db.conn(), &cache_key).await {
serde_json::from_str(&json).unwrap_or_default()
} else {
vec![]
}
} else {
vec![]
};
tracks
.iter()
.map(|t| {
// Try matching by MBID first
let status = t
.mbid
.as_deref()
.and_then(|track_mbid| {
all_wanted
.iter()
.find(|w| w.musicbrainz_id.as_deref() == Some(track_mbid))
})
// Fall back to matching by title (case-insensitive) within this artist's wanted items
.or_else(|| {
let name_lower = t.name.to_lowercase();
artist_wanted
.iter()
.find(|w| w.name.to_lowercase() == name_lower)
.copied()
})
.map(|w| match w.status {
WantedStatus::Owned => "owned",
WantedStatus::Downloaded => "downloaded",
WantedStatus::Wanted => "wanted",
WantedStatus::Available => "available",
});
// Resolve the top song title to a discography MBID, then look up the wanted item
let matched = shanty_watch::resolve_from_discography(&t.name, &disc_recordings)
.and_then(|disc| wanted_by_mbid.get(disc.mbid.as_str()).copied());
let status = matched.map(|w| match w.status {
WantedStatus::Owned => "owned",
WantedStatus::Downloaded => "downloaded",
WantedStatus::Wanted => "wanted",
WantedStatus::Available => "available",
});
serde_json::json!({
"name": t.name,
"playcount": t.playcount,
@@ -561,6 +575,7 @@ pub async fn enrich_artist(
let mut seen_watched: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut seen_owned: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut albums: Vec<FullAlbumInfo> = Vec::new();
let mut disc_recordings: Vec<shanty_watch::DiscRecording> = Vec::new();
for rg in &release_groups {
if skip_track_fetch {
@@ -627,9 +642,20 @@ pub async fn enrich_artist(
let mut owned: u32 = 0;
let mut downloaded: u32 = 0;
let rg_type = rg.primary_type.clone().unwrap_or_default();
let rg_date = rg.first_release_date.clone();
for track in &cached.tracks {
let rec_id = &track.recording_mbid;
// Collect for known_recordings cache rebuild
disc_recordings.push(shanty_watch::DiscRecording {
mbid: rec_id.clone(),
title: track.title.clone(),
rg_type: rg_type.clone(),
rg_date: rg_date.clone(),
});
// Add to artist-level unique available set
seen_available.insert(rec_id.clone());
@@ -689,6 +715,17 @@ pub async fn enrich_artist(
});
}
// Rebuild the known_recordings cache from the detail page's actual track data.
// This ensures add_track's fast path uses MBIDs that match the displayed release groups.
if !skip_track_fetch
&& !disc_recordings.is_empty()
&& let Ok(json) = serde_json::to_string(&disc_recordings)
{
let cache_key = format!("artist_known_recordings:{mbid}");
let _ =
queries::cache::set(state.db.conn(), &cache_key, "computed", &json, 7 * 86400).await;
}
// Sort: owned first, then partial, then wanted, then unwatched; within each by date
albums.sort_by(|a, b| {
let order = |s: &str| match s {
+32 -5
View File
@@ -417,18 +417,32 @@ async fn process_tag(
.map_err(|e| e.to_string())?;
// Ensure a wanted_item exists for this track (marks imported files as Owned)
if let Some(ref mbid) = track.musicbrainz_id
&& queries::wanted::find_by_mbid(conn, mbid)
// Check by MBID first, then by name+artist to avoid duplicates from MBID mismatches
let has_wanted = if let Some(ref mbid) = track.musicbrainz_id {
queries::wanted::find_by_mbid(conn, mbid)
.await
.map_err(|e| e.to_string())?
.is_none()
{
.is_some()
} else {
false
};
let has_wanted = has_wanted || {
// Also check by name + artist_id (normalize unicode dashes and case)
let all_wanted = queries::wanted::list(conn, None, None)
.await
.unwrap_or_default();
let title_norm = normalize_for_match(track.title.as_deref().unwrap_or(""));
all_wanted
.iter()
.any(|w| w.artist_id == track.artist_id && normalize_for_match(&w.name) == title_norm)
};
if !has_wanted {
let item = queries::wanted::add(
conn,
queries::wanted::AddWantedItem {
item_type: shanty_db::entities::wanted_item::ItemType::Track,
name: track.title.as_deref().unwrap_or("Unknown"),
musicbrainz_id: Some(mbid),
musicbrainz_id: track.musicbrainz_id.as_deref(),
artist_id: track.artist_id,
album_id: track.album_id,
track_id: Some(track.id),
@@ -574,3 +588,16 @@ async fn process_enrich(
Ok(vec![])
}
/// Normalize a string for fuzzy matching: lowercase, replace unicode dashes/quotes with ASCII.
fn normalize_for_match(s: &str) -> String {
s.to_lowercase()
.replace(
[
'\u{2010}', '\u{2011}', '\u{2012}', '\u{2013}', '\u{2014}', '\u{2015}',
],
"-",
)
.replace(['\u{2018}', '\u{2019}'], "'")
.replace(['\u{201C}', '\u{201D}'], "\"")
}