I **think** I've at least 99% fixed the top songs mismatch

This commit is contained in:
Connor Johnstone
2026-03-25 21:50:09 -04:00
parent 827944170a
commit 17fd738774
2 changed files with 361 additions and 27 deletions
+359 -25
View File
@@ -3,6 +3,7 @@ use std::fmt;
use sea_orm::DatabaseConnection;
use serde::{Deserialize, Serialize};
use shanty_db::entities::artist;
use shanty_db::entities::wanted_item::{ItemType, WantedStatus};
use shanty_db::queries;
use shanty_tag::provider::MetadataProvider;
@@ -10,6 +11,68 @@ use shanty_tag::provider::MetadataProvider;
use crate::error::{WatchError, WatchResult};
use crate::matching;
/// A recording from an artist's discography, with release group context.
/// Used in the `artist_known_recordings` cache to resolve top song titles
/// to the correct recording MBID (preferring album over EP/single, older over newer).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiscRecording {
pub mbid: String,
pub title: String,
pub rg_type: String,
pub rg_date: Option<String>,
}
/// Normalize a title for comparison: lowercase + replace smart apostrophes/quotes.
fn normalize_title(s: &str) -> String {
s.to_lowercase()
.replace(['\u{2019}', '\u{2018}'], "'")
.replace(['\u{201C}', '\u{201D}'], "\"")
}
/// Resolve a song title to the best matching recording from an artist's discography.
///
/// Uses fuzzy title matching (Jaro-Winkler > 0.85). When multiple recordings match,
/// prefers: Album > EP > Single, then older release date.
/// Returns `None` if no match exceeds the threshold.
pub fn resolve_from_discography<'a>(
title: &str,
recordings: &'a [DiscRecording],
) -> Option<&'a DiscRecording> {
let title_norm = normalize_title(title);
let mut matches: Vec<(f64, &DiscRecording)> = recordings
.iter()
.map(|r| {
(
strsim::jaro_winkler(&title_norm, &normalize_title(&r.title)),
r,
)
})
.filter(|(score, _)| *score > 0.85)
.collect();
if matches.is_empty() {
return None;
}
matches.sort_by(|(score_a, a), (score_b, b)| {
score_b
.partial_cmp(score_a)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| rg_type_priority(&a.rg_type).cmp(&rg_type_priority(&b.rg_type)))
.then_with(|| a.rg_date.cmp(&b.rg_date))
});
matches.first().map(|(_, r)| *r)
}
fn rg_type_priority(rg_type: &str) -> u8 {
match rg_type {
"Album" => 0,
"EP" => 1,
_ => 2,
}
}
/// A display-friendly watchlist entry with resolved names.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WatchListEntry {
@@ -120,6 +183,7 @@ pub async fn add_artist(
let mut summary = AddSummary::default();
let mut seen_mbids: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut disc_recordings: Vec<DiscRecording> = Vec::new();
for rg in &release_groups {
// Resolve a concrete release MBID from the release group
@@ -139,6 +203,9 @@ pub async fn add_artist(
tracing::info!(title = %rg.title, release_mbid = %release_mbid, "fetching tracks for release group");
let rg_type = rg.primary_type.clone().unwrap_or_default();
let rg_date = rg.first_release_date.clone();
let tracks = match provider.get_release_tracks(&release_mbid).await {
Ok(t) => t,
Err(e) => {
@@ -149,6 +216,14 @@ pub async fn add_artist(
};
for track in &tracks {
// Build discography cache entry for every recording
disc_recordings.push(DiscRecording {
mbid: track.recording_mbid.clone(),
title: track.title.clone(),
rg_type: rg_type.clone(),
rg_date: rg_date.clone(),
});
if !seen_mbids.insert(track.recording_mbid.clone()) {
continue; // Already processed this recording in another release group
}
@@ -172,6 +247,12 @@ pub async fn add_artist(
}
}
// Populate the discography cache so subsequent top-song watches resolve instantly
if let Ok(json) = serde_json::to_string(&disc_recordings) {
let cache_key = format!("artist_known_recordings:{artist_mbid}");
let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await;
}
tracing::info!(%summary, "artist watch complete");
Ok(summary)
}
@@ -253,19 +334,129 @@ pub async fn add_track(
provider: &impl MetadataProvider,
user_id: Option<i32>,
) -> WatchResult<WatchListEntry> {
let (resolved_title, resolved_artist, resolved_mbid, resolved_artist_mbid) =
// Fast path: if we have artist name + title, try to resolve the MBID directly from
// the cached discography (no MB API calls needed). This makes top song watches instant.
if let (Some(a), Some(t)) = (
artist_name.filter(|s| !s.is_empty()),
title.filter(|s| !s.is_empty()),
) && let Ok(Some(existing_artist)) = queries::artists::find_by_name(conn, a).await
&& let Some(ref artist_mbid) = existing_artist.musicbrainz_id
&& let Some(disc) = load_and_resolve_discography(conn, artist_mbid, t).await
{
tracing::info!(
title = t,
mbid = %disc.mbid,
rg_type = %disc.rg_type,
"resolved MBID from cached discography (fast path)"
);
let artist = queries::artists::upsert(conn, a, Some(artist_mbid)).await?;
return finish_add_track(conn, t, a, Some(disc.mbid.clone()), artist, user_id).await;
}
// Slow path: resolve via MB API
let (resolved_title, resolved_artist, _resolved_mbid, resolved_artist_mbid) =
resolve_track_info(artist_name, title, musicbrainz_id, provider).await?;
let artist =
queries::artists::upsert(conn, &resolved_artist, resolved_artist_mbid.as_deref()).await?;
let is_owned = matching::track_is_owned(conn, &resolved_artist, &resolved_title).await?;
// Try to resolve an MBID from the artist's discography using fuzzy title matching.
// This ensures the wanted item's MBID matches a recording on the displayed release group.
let disc_mbid = if let Some(ref artist_mbid) = artist.musicbrainz_id {
let recordings = load_or_build_discography(conn, artist_mbid, provider).await;
resolve_from_discography(&resolved_title, &recordings).map(|d| d.mbid.clone())
} else {
None
};
if disc_mbid.is_none() {
tracing::warn!(
title = %resolved_title,
artist = %resolved_artist,
"no discography match found — track will not appear on artist page"
);
}
finish_add_track(
conn,
&resolved_title,
&resolved_artist,
disc_mbid,
artist,
user_id,
)
.await
}
/// Shared tail of `add_track`: dedup check, create wanted_item, detect ownership.
async fn finish_add_track(
conn: &DatabaseConnection,
title: &str,
artist_name: &str,
recording_mbid: Option<String>,
artist: artist::Model,
user_id: Option<i32>,
) -> WatchResult<WatchListEntry> {
// Dedup: skip if a wanted_item with this MBID already exists
if let Some(ref mbid) = recording_mbid
&& let Ok(Some(existing)) = queries::wanted::find_by_mbid(conn, mbid).await
{
let artist_name_resolved = if let Some(aid) = existing.artist_id {
queries::artists::get_by_id(conn, aid)
.await
.map(|a| a.name)
.ok()
} else {
Some(artist.name.clone())
};
return Ok(WatchListEntry {
id: existing.id,
item_type: existing.item_type,
name: existing.name,
artist_name: artist_name_resolved,
status: existing.status,
added_at: existing.added_at,
});
}
// Dedup: skip if a wanted_item with same name + artist already exists
let all_wanted = queries::wanted::list(conn, None, None).await?;
let title_lower = title.to_lowercase();
if let Some(existing) = all_wanted
.iter()
.find(|w| w.artist_id == Some(artist.id) && w.name.to_lowercase() == title_lower)
{
// Update stale MBID if the new one differs (e.g., resolved from discography)
if let Some(ref new_mbid) = recording_mbid
&& existing.musicbrainz_id.as_deref() != Some(new_mbid)
{
let _ = queries::wanted::update_mbid(conn, existing.id, new_mbid).await;
tracing::info!(
id = existing.id,
old_mbid = ?existing.musicbrainz_id,
new_mbid = %new_mbid,
title = title,
"updated stale MBID on existing wanted item"
);
}
return Ok(WatchListEntry {
id: existing.id,
item_type: existing.item_type,
name: existing.name.clone(),
artist_name: Some(artist.name),
status: existing.status,
added_at: existing.added_at,
});
}
let is_owned = matching::track_is_owned(conn, artist_name, title).await?;
let item = queries::wanted::add(
conn,
queries::wanted::AddWantedItem {
item_type: ItemType::Track,
name: &resolved_title,
musicbrainz_id: resolved_mbid.as_deref(),
name: title,
musicbrainz_id: recording_mbid.as_deref(),
artist_id: Some(artist.id),
album_id: None,
track_id: None,
@@ -284,13 +475,101 @@ pub async fn add_track(
Ok(WatchListEntry {
id: item.id,
item_type: ItemType::Track,
name: resolved_title,
name: title.to_string(),
artist_name: Some(artist.name),
status,
added_at: item.added_at,
})
}
/// Load the discography cache and resolve a title. Returns None if cache doesn't exist
/// or title doesn't match.
async fn load_and_resolve_discography(
conn: &DatabaseConnection,
artist_mbid: &str,
title: &str,
) -> Option<DiscRecording> {
let cache_key = format!("artist_known_recordings:{artist_mbid}");
let json = queries::cache::get(conn, &cache_key).await.ok()??;
let recordings: Vec<DiscRecording> = serde_json::from_str(&json).ok()?;
resolve_from_discography(title, &recordings).cloned()
}
/// Load the discography cache, or build it from the detail page's cached release group
/// tracks (`artist_rg_tracks:*`). This ensures the MBIDs match what the detail page displays.
/// Falls back to MB API only if no detail page caches exist.
async fn load_or_build_discography(
conn: &DatabaseConnection,
artist_mbid: &str,
provider: &impl MetadataProvider,
) -> Vec<DiscRecording> {
let cache_key = format!("artist_known_recordings:{artist_mbid}");
// Try loading from cache (handle both new Vec format and old HashMap format)
if let Ok(Some(json)) = queries::cache::get(conn, &cache_key).await {
if let Ok(recordings) = serde_json::from_str::<Vec<DiscRecording>>(&json) {
return recordings;
}
// Old format — fall through to rebuild
tracing::debug!(artist_mbid, "rebuilding discography cache (old format)");
}
// Build from the detail page's cached release group tracks.
// These are the source of truth for which recording MBIDs the detail page will display.
let mut recordings = Vec::new();
if let Ok(release_groups) = provider.get_artist_release_groups(artist_mbid).await {
for rg in &release_groups {
if rg.featured || !rg.secondary_types.is_empty() {
continue;
}
let rg_type = rg.primary_type.clone().unwrap_or_default();
let rg_date = rg.first_release_date.clone();
// Prefer the detail page's cached tracks for this release group
let rg_cache_key = format!("artist_rg_tracks:{}", rg.mbid);
if let Ok(Some(json)) = queries::cache::get(conn, &rg_cache_key).await
&& let Ok(cached) = serde_json::from_str::<serde_json::Value>(&json)
&& let Some(tracks) = cached.get("tracks").and_then(|t| t.as_array())
{
for t in tracks {
if let (Some(mbid), Some(title)) = (
t.get("recording_mbid").and_then(|v| v.as_str()),
t.get("title").and_then(|v| v.as_str()),
) {
recordings.push(DiscRecording {
mbid: mbid.to_string(),
title: title.to_string(),
rg_type: rg_type.clone(),
rg_date: rg_date.clone(),
});
}
}
continue; // Got tracks from detail page cache, skip API
}
// No detail page cache — fetch from API
let release_id = rg.first_release_mbid.as_deref().unwrap_or(&rg.mbid);
if let Ok(tracks) = provider.get_release_tracks(release_id).await {
for t in &tracks {
recordings.push(DiscRecording {
mbid: t.recording_mbid.clone(),
title: t.title.clone(),
rg_type: rg_type.clone(),
rg_date: rg_date.clone(),
});
}
}
}
}
// Cache for 7 days
if let Ok(json) = serde_json::to_string(&recordings) {
let _ = queries::cache::set(conn, &cache_key, "computed", &json, 7 * 86400).await;
}
recordings
}
/// Internal: add a single track wanted item. Returns Ok(true) if added as Wanted,
/// Ok(false) if already owned.
async fn add_track_inner(
@@ -310,6 +589,18 @@ async fn add_track_inner(
}
let artist = queries::artists::upsert(conn, artist_name, artist_mbid).await?;
// Also check by name + artist_id to catch race conditions (rapid double-clicks)
let all_wanted = queries::wanted::list(conn, None, None).await?;
let title_lower = title.to_lowercase();
if all_wanted
.iter()
.any(|w| w.artist_id == Some(artist.id) && w.name.to_lowercase() == title_lower)
{
tracing::debug!(title = title, "already in watchlist by name, skipping");
return Ok(false);
}
let is_owned = matching::track_is_owned(conn, artist_name, title).await?;
let item = queries::wanted::add(
@@ -407,38 +698,81 @@ async fn resolve_album_info(
/// Resolve track info from MBID if needed.
/// Returns (title, artist_name, recording_mbid, artist_mbid).
/// When an MBID is provided, validates it via MB lookup. If the MBID is stale/invalid,
/// falls back to searching by artist+title to find the correct recording MBID.
async fn resolve_track_info(
artist_name: Option<&str>,
title: Option<&str>,
mbid: Option<&str>,
provider: &impl MetadataProvider,
) -> WatchResult<(String, String, Option<String>, Option<String>)> {
let has_name = title.filter(|s| !s.is_empty()).is_some()
&& artist_name.filter(|s| !s.is_empty()).is_some();
// If we have an MBID, validate it via lookup
if let Some(mbid) = mbid.filter(|s| !s.is_empty()) {
match provider.get_recording(mbid).await {
Ok(details) => {
return Ok((
title
.filter(|s| !s.is_empty())
.map(String::from)
.unwrap_or(details.title),
artist_name
.filter(|s| !s.is_empty())
.map(String::from)
.unwrap_or(details.artist),
Some(mbid.to_string()),
details.artist_mbid,
));
}
Err(e) => {
// MBID is stale/invalid — fall back to search if we have name info
if has_name {
tracing::warn!(mbid = mbid, error = %e, "MBID validation failed, falling back to search");
} else {
return Err(WatchError::Other(format!(
"MusicBrainz lookup failed for {mbid}: {e}"
)));
}
}
}
}
// Search by artist + title
if let (Some(t), Some(a)) = (
title.filter(|s| !s.is_empty()),
artist_name.filter(|s| !s.is_empty()),
) {
return Ok((t.to_string(), a.to_string(), mbid.map(String::from), None));
// Try to find the correct recording MBID via search
let results = provider
.search_recording(a, t)
.await
.map_err(|e| WatchError::Other(format!("MusicBrainz search failed: {e}")))?;
// Prefer a result credited to this artist (by artist name match)
// to avoid picking up compilation/VA versions of the same song
let a_lower = a.to_lowercase();
let best = results
.iter()
.find(|r| r.artist.to_lowercase() == a_lower)
.or(results.first());
if let Some(best) = best {
return Ok((
t.to_string(),
a.to_string(),
Some(best.mbid.clone()),
best.artist_mbid.clone(),
));
}
// No search results — return without MBID
return Ok((t.to_string(), a.to_string(), None, None));
}
let mbid =
mbid.ok_or_else(|| WatchError::Other("either artist+title or --mbid is required".into()))?;
let details = provider
.get_recording(mbid)
.await
.map_err(|e| WatchError::Other(format!("MusicBrainz lookup failed: {e}")))?;
Ok((
title
.filter(|s| !s.is_empty())
.map(String::from)
.unwrap_or(details.title),
artist_name
.filter(|s| !s.is_empty())
.map(String::from)
.unwrap_or(details.artist),
Some(mbid.to_string()),
details.artist_mbid,
Err(WatchError::Other(
"either artist+title or a valid MBID is required".into(),
))
}