use shanty_db::entities::track; use crate::cleaning::normalize; use shanty_data::{RecordingMatch, ReleaseRef}; /// A scored recording match with the best matching release. #[derive(Debug, Clone)] pub struct ScoredMatch { pub recording: RecordingMatch, pub confidence: f64, pub best_release: Option, } /// Build a search query (artist, title) from a track's metadata. /// Falls back to filename parsing if metadata is insufficient. pub fn build_query(track: &track::Model) -> Option<(String, String)> { let artist = track .album_artist .as_deref() .or(track.artist.as_deref()) .filter(|s| !s.is_empty()); let title = track.title.as_deref().filter(|s| !s.is_empty()); match (artist, title) { (Some(a), Some(t)) => Some((a.to_string(), t.to_string())), (None, Some(t)) => Some((String::new(), t.to_string())), _ => parse_filename(&track.file_path), } } /// Parse "Artist - Title" from a filename, stripping extension and path. pub fn parse_filename(file_path: &str) -> Option<(String, String)> { let filename = std::path::Path::new(file_path).file_stem()?.to_str()?; // Try common "Artist - Title" pattern if let Some((artist, title)) = filename.split_once(" - ") { let artist = artist.trim().to_string(); let title = title.trim().to_string(); if !artist.is_empty() && !title.is_empty() { return Some((artist, title)); } } // If no delimiter found, treat entire filename as the title let name = filename.trim().to_string(); if !name.is_empty() { Some((String::new(), name)) } else { None } } /// Score a candidate recording against the track's known metadata. /// Returns a confidence value from 0.0 to 1.0. pub fn score_match(track: &track::Model, candidate: &RecordingMatch) -> f64 { let track_title = track.title.as_deref().map(normalize).unwrap_or_default(); let candidate_title = normalize(&candidate.title); let track_artist = track .artist .as_deref() .or(track.album_artist.as_deref()) .map(normalize) .unwrap_or_default(); let candidate_artist = normalize(&candidate.artist); // Title similarity (weighted 0.6) let title_sim = if track_title.is_empty() || candidate_title.is_empty() { 0.0 } else { strsim::jaro_winkler(&track_title, &candidate_title) }; // Artist similarity (weighted 0.4) let artist_sim = if track_artist.is_empty() || candidate_artist.is_empty() { 0.3 // neutral-ish when we have no artist to compare } else { strsim::jaro_winkler(&track_artist, &candidate_artist) }; let mut score = 0.6 * title_sim + 0.4 * artist_sim; // Album match bonus: strongly prefer recordings that appear on the track's album. // This is critical for imported files that already have correct album tags. if let Some(ref album) = track.album { let track_album = normalize(album); if !track_album.is_empty() { let mut best_album_sim = 0.0f64; for release in &candidate.releases { let release_title = normalize(&release.title); let sim = strsim::jaro_winkler(&track_album, &release_title); best_album_sim = best_album_sim.max(sim); } if best_album_sim > 0.85 { score += 0.15; // Strong bonus for matching album } else if best_album_sim < 0.5 { score -= 0.10; // Penalty for clearly wrong album } } } // Bonus: duration within 3 seconds if let Some(track_dur) = track.duration { // MusicBrainz search results don't always include duration, // but the score from the API itself is a signal if track_dur > 0.0 && candidate.score > 90 { score += 0.03; } } score.min(1.0) } /// Select the best match from candidates that exceeds the confidence threshold. pub fn select_best_match( track: &track::Model, candidates: Vec, threshold: f64, ) -> Option { let mut best: Option = None; for candidate in candidates { let confidence = score_match(track, &candidate); tracing::debug!( title = %candidate.title, artist = %candidate.artist, confidence = confidence, "candidate" ); if confidence >= threshold { // Pick the release that best matches the track's album name let best_release = pick_best_release(track, &candidate.releases); let scored = ScoredMatch { recording: candidate, confidence, best_release, }; match &best { Some(current) if scored.confidence <= current.confidence => {} _ => best = Some(scored), } } } best } /// Pick the best release from candidates based on the track's album metadata. /// If the track has an album name, prefer the release with the closest title match. /// Otherwise, fall back to the first release. pub fn pick_best_release(track: &track::Model, releases: &[ReleaseRef]) -> Option { if releases.is_empty() { return None; } let track_album = track.album.as_deref().map(normalize).unwrap_or_default(); if track_album.is_empty() { return releases.first().cloned(); } let mut best: Option<(f64, &ReleaseRef)> = None; for release in releases { let sim = strsim::jaro_winkler(&track_album, &normalize(&release.title)); match best { Some((best_sim, _)) if sim <= best_sim => {} _ => best = Some((sim, release)), } } best.map(|(_, r)| r.clone()) } #[cfg(test)] mod tests { use super::*; #[test] fn test_parse_filename_artist_title() { let result = parse_filename("/music/Pink Floyd - Time.mp3"); assert_eq!(result, Some(("Pink Floyd".into(), "Time".into()))); } #[test] fn test_parse_filename_title_only() { let result = parse_filename("/music/some_song.mp3"); assert_eq!(result, Some(("".into(), "some_song".into()))); } #[test] fn test_parse_filename_nested_path() { let result = parse_filename("/music/Artist/Album/03 - Track Name.flac"); // The "03" gets treated as artist since it splits on " - " assert_eq!(result, Some(("03".into(), "Track Name".into()))); } #[test] fn test_build_query_with_metadata() { let track = track::Model { id: 1, file_path: "/music/test.mp3".into(), title: Some("Time".into()), artist: Some("Pink Floyd".into()), album: None, album_artist: None, track_number: None, disc_number: None, duration: None, genre: None, year: None, codec: None, bitrate: None, file_size: 1000, fingerprint: None, musicbrainz_id: None, artist_id: None, album_id: None, file_mtime: None, added_at: chrono::Utc::now().naive_utc(), updated_at: chrono::Utc::now().naive_utc(), }; let result = build_query(&track); assert_eq!(result, Some(("Pink Floyd".into(), "Time".into()))); } #[test] fn test_build_query_falls_back_to_filename() { let track = track::Model { id: 1, file_path: "/music/Radiohead - Creep.mp3".into(), title: None, artist: None, album: None, album_artist: None, track_number: None, disc_number: None, duration: None, genre: None, year: None, codec: None, bitrate: None, file_size: 1000, fingerprint: None, musicbrainz_id: None, artist_id: None, album_id: None, file_mtime: None, added_at: chrono::Utc::now().naive_utc(), updated_at: chrono::Utc::now().naive_utc(), }; let result = build_query(&track); assert_eq!(result, Some(("Radiohead".into(), "Creep".into()))); } #[test] fn test_score_match_exact() { let track = track::Model { id: 1, file_path: "/test.mp3".into(), title: Some("Time".into()), artist: Some("Pink Floyd".into()), album: None, album_artist: None, track_number: None, disc_number: None, duration: None, genre: None, year: None, codec: None, bitrate: None, file_size: 1000, fingerprint: None, musicbrainz_id: None, artist_id: None, album_id: None, file_mtime: None, added_at: chrono::Utc::now().naive_utc(), updated_at: chrono::Utc::now().naive_utc(), }; let candidate = RecordingMatch { mbid: "123".into(), title: "Time".into(), artist: "Pink Floyd".into(), artist_mbid: None, releases: vec![], score: 100, }; let score = score_match(&track, &candidate); assert!(score > 0.95, "exact match should score > 0.95, got {score}"); } #[test] fn test_score_match_fuzzy() { let track = track::Model { id: 1, file_path: "/test.mp3".into(), title: Some("Comfortably Numb".into()), artist: Some("Pink Floyd".into()), album: None, album_artist: None, track_number: None, disc_number: None, duration: None, genre: None, year: None, codec: None, bitrate: None, file_size: 1000, fingerprint: None, musicbrainz_id: None, artist_id: None, album_id: None, file_mtime: None, added_at: chrono::Utc::now().naive_utc(), updated_at: chrono::Utc::now().naive_utc(), }; // Slight misspelling let candidate = RecordingMatch { mbid: "123".into(), title: "Comfortably Numb".into(), artist: "Pink Flloyd".into(), // typo artist_mbid: None, releases: vec![], score: 95, }; let score = score_match(&track, &candidate); assert!(score > 0.85, "fuzzy match should score > 0.85, got {score}"); } }