300 lines
9.3 KiB
Rust
300 lines
9.3 KiB
Rust
use shanty_db::entities::track;
|
|
|
|
use crate::cleaning::normalize;
|
|
use shanty_data::{RecordingMatch, ReleaseRef};
|
|
|
|
/// A scored recording match with the best matching release.
|
|
#[derive(Debug, Clone)]
|
|
pub struct ScoredMatch {
|
|
pub recording: RecordingMatch,
|
|
pub confidence: f64,
|
|
pub best_release: Option<ReleaseRef>,
|
|
}
|
|
|
|
/// Build a search query (artist, title) from a track's metadata.
|
|
/// Falls back to filename parsing if metadata is insufficient.
|
|
pub fn build_query(track: &track::Model) -> Option<(String, String)> {
|
|
let artist = track
|
|
.album_artist
|
|
.as_deref()
|
|
.or(track.artist.as_deref())
|
|
.filter(|s| !s.is_empty());
|
|
let title = track.title.as_deref().filter(|s| !s.is_empty());
|
|
|
|
match (artist, title) {
|
|
(Some(a), Some(t)) => Some((a.to_string(), t.to_string())),
|
|
(None, Some(t)) => Some((String::new(), t.to_string())),
|
|
_ => parse_filename(&track.file_path),
|
|
}
|
|
}
|
|
|
|
/// Parse "Artist - Title" from a filename, stripping extension and path.
|
|
pub fn parse_filename(file_path: &str) -> Option<(String, String)> {
|
|
let filename = std::path::Path::new(file_path).file_stem()?.to_str()?;
|
|
|
|
// Try common "Artist - Title" pattern
|
|
if let Some((artist, title)) = filename.split_once(" - ") {
|
|
let artist = artist.trim().to_string();
|
|
let title = title.trim().to_string();
|
|
if !artist.is_empty() && !title.is_empty() {
|
|
return Some((artist, title));
|
|
}
|
|
}
|
|
|
|
// If no delimiter found, treat entire filename as the title
|
|
let name = filename.trim().to_string();
|
|
if !name.is_empty() {
|
|
Some((String::new(), name))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
/// Score a candidate recording against the track's known metadata.
|
|
/// Returns a confidence value from 0.0 to 1.0.
|
|
pub fn score_match(track: &track::Model, candidate: &RecordingMatch) -> f64 {
|
|
let track_title = track.title.as_deref().map(normalize).unwrap_or_default();
|
|
let candidate_title = normalize(&candidate.title);
|
|
|
|
let track_artist = track
|
|
.artist
|
|
.as_deref()
|
|
.or(track.album_artist.as_deref())
|
|
.map(normalize)
|
|
.unwrap_or_default();
|
|
let candidate_artist = normalize(&candidate.artist);
|
|
|
|
// Title similarity (weighted 0.6)
|
|
let title_sim = if track_title.is_empty() || candidate_title.is_empty() {
|
|
0.0
|
|
} else {
|
|
strsim::jaro_winkler(&track_title, &candidate_title)
|
|
};
|
|
|
|
// Artist similarity (weighted 0.4)
|
|
let artist_sim = if track_artist.is_empty() || candidate_artist.is_empty() {
|
|
0.3 // neutral-ish when we have no artist to compare
|
|
} else {
|
|
strsim::jaro_winkler(&track_artist, &candidate_artist)
|
|
};
|
|
|
|
let mut score = 0.6 * title_sim + 0.4 * artist_sim;
|
|
|
|
// Bonus: album name matches a release
|
|
if let Some(ref album) = track.album {
|
|
let track_album = normalize(album);
|
|
if !track_album.is_empty() {
|
|
for release in &candidate.releases {
|
|
let release_title = normalize(&release.title);
|
|
let album_sim = strsim::jaro_winkler(&track_album, &release_title);
|
|
if album_sim > 0.85 {
|
|
score += 0.05;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Bonus: duration within 3 seconds
|
|
if let Some(track_dur) = track.duration {
|
|
// MusicBrainz search results don't always include duration,
|
|
// but the score from the API itself is a signal
|
|
if track_dur > 0.0 && candidate.score > 90 {
|
|
score += 0.03;
|
|
}
|
|
}
|
|
|
|
score.min(1.0)
|
|
}
|
|
|
|
/// Select the best match from candidates that exceeds the confidence threshold.
|
|
pub fn select_best_match(
|
|
track: &track::Model,
|
|
candidates: Vec<RecordingMatch>,
|
|
threshold: f64,
|
|
) -> Option<ScoredMatch> {
|
|
let mut best: Option<ScoredMatch> = None;
|
|
|
|
for candidate in candidates {
|
|
let confidence = score_match(track, &candidate);
|
|
tracing::debug!(
|
|
title = %candidate.title,
|
|
artist = %candidate.artist,
|
|
confidence = confidence,
|
|
"candidate"
|
|
);
|
|
|
|
if confidence >= threshold {
|
|
let best_release = candidate.releases.first().cloned();
|
|
let scored = ScoredMatch {
|
|
recording: candidate,
|
|
confidence,
|
|
best_release,
|
|
};
|
|
match &best {
|
|
Some(current) if scored.confidence <= current.confidence => {}
|
|
_ => best = Some(scored),
|
|
}
|
|
}
|
|
}
|
|
|
|
best
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_parse_filename_artist_title() {
|
|
let result = parse_filename("/music/Pink Floyd - Time.mp3");
|
|
assert_eq!(result, Some(("Pink Floyd".into(), "Time".into())));
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_filename_title_only() {
|
|
let result = parse_filename("/music/some_song.mp3");
|
|
assert_eq!(result, Some(("".into(), "some_song".into())));
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_filename_nested_path() {
|
|
let result = parse_filename("/music/Artist/Album/03 - Track Name.flac");
|
|
// The "03" gets treated as artist since it splits on " - "
|
|
assert_eq!(result, Some(("03".into(), "Track Name".into())));
|
|
}
|
|
|
|
#[test]
|
|
fn test_build_query_with_metadata() {
|
|
let track = track::Model {
|
|
id: 1,
|
|
file_path: "/music/test.mp3".into(),
|
|
title: Some("Time".into()),
|
|
artist: Some("Pink Floyd".into()),
|
|
album: None,
|
|
album_artist: None,
|
|
track_number: None,
|
|
disc_number: None,
|
|
duration: None,
|
|
genre: None,
|
|
year: None,
|
|
codec: None,
|
|
bitrate: None,
|
|
file_size: 1000,
|
|
fingerprint: None,
|
|
musicbrainz_id: None,
|
|
artist_id: None,
|
|
album_id: None,
|
|
file_mtime: None,
|
|
added_at: chrono::Utc::now().naive_utc(),
|
|
updated_at: chrono::Utc::now().naive_utc(),
|
|
};
|
|
let result = build_query(&track);
|
|
assert_eq!(result, Some(("Pink Floyd".into(), "Time".into())));
|
|
}
|
|
|
|
#[test]
|
|
fn test_build_query_falls_back_to_filename() {
|
|
let track = track::Model {
|
|
id: 1,
|
|
file_path: "/music/Radiohead - Creep.mp3".into(),
|
|
title: None,
|
|
artist: None,
|
|
album: None,
|
|
album_artist: None,
|
|
track_number: None,
|
|
disc_number: None,
|
|
duration: None,
|
|
genre: None,
|
|
year: None,
|
|
codec: None,
|
|
bitrate: None,
|
|
file_size: 1000,
|
|
fingerprint: None,
|
|
musicbrainz_id: None,
|
|
artist_id: None,
|
|
album_id: None,
|
|
file_mtime: None,
|
|
added_at: chrono::Utc::now().naive_utc(),
|
|
updated_at: chrono::Utc::now().naive_utc(),
|
|
};
|
|
let result = build_query(&track);
|
|
assert_eq!(result, Some(("Radiohead".into(), "Creep".into())));
|
|
}
|
|
|
|
#[test]
|
|
fn test_score_match_exact() {
|
|
let track = track::Model {
|
|
id: 1,
|
|
file_path: "/test.mp3".into(),
|
|
title: Some("Time".into()),
|
|
artist: Some("Pink Floyd".into()),
|
|
album: None,
|
|
album_artist: None,
|
|
track_number: None,
|
|
disc_number: None,
|
|
duration: None,
|
|
genre: None,
|
|
year: None,
|
|
codec: None,
|
|
bitrate: None,
|
|
file_size: 1000,
|
|
fingerprint: None,
|
|
musicbrainz_id: None,
|
|
artist_id: None,
|
|
album_id: None,
|
|
file_mtime: None,
|
|
added_at: chrono::Utc::now().naive_utc(),
|
|
updated_at: chrono::Utc::now().naive_utc(),
|
|
};
|
|
let candidate = RecordingMatch {
|
|
mbid: "123".into(),
|
|
title: "Time".into(),
|
|
artist: "Pink Floyd".into(),
|
|
artist_mbid: None,
|
|
releases: vec![],
|
|
score: 100,
|
|
};
|
|
let score = score_match(&track, &candidate);
|
|
assert!(score > 0.95, "exact match should score > 0.95, got {score}");
|
|
}
|
|
|
|
#[test]
|
|
fn test_score_match_fuzzy() {
|
|
let track = track::Model {
|
|
id: 1,
|
|
file_path: "/test.mp3".into(),
|
|
title: Some("Comfortably Numb".into()),
|
|
artist: Some("Pink Floyd".into()),
|
|
album: None,
|
|
album_artist: None,
|
|
track_number: None,
|
|
disc_number: None,
|
|
duration: None,
|
|
genre: None,
|
|
year: None,
|
|
codec: None,
|
|
bitrate: None,
|
|
file_size: 1000,
|
|
fingerprint: None,
|
|
musicbrainz_id: None,
|
|
artist_id: None,
|
|
album_id: None,
|
|
file_mtime: None,
|
|
added_at: chrono::Utc::now().naive_utc(),
|
|
updated_at: chrono::Utc::now().naive_utc(),
|
|
};
|
|
// Slight misspelling
|
|
let candidate = RecordingMatch {
|
|
mbid: "123".into(),
|
|
title: "Comfortably Numb".into(),
|
|
artist: "Pink Flloyd".into(), // typo
|
|
artist_mbid: None,
|
|
releases: vec![],
|
|
score: 95,
|
|
};
|
|
let score = score_match(&track, &candidate);
|
|
assert!(score > 0.85, "fuzzy match should score > 0.85, got {score}");
|
|
}
|
|
}
|