Initial commit
This commit is contained in:
+305
@@ -0,0 +1,305 @@
|
||||
use shanty_db::entities::track;
|
||||
|
||||
use crate::cleaning::normalize;
|
||||
use crate::provider::{RecordingMatch, ReleaseRef};
|
||||
|
||||
/// A scored recording match with the best matching release.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ScoredMatch {
|
||||
pub recording: RecordingMatch,
|
||||
pub confidence: f64,
|
||||
pub best_release: Option<ReleaseRef>,
|
||||
}
|
||||
|
||||
/// Build a search query (artist, title) from a track's metadata.
|
||||
/// Falls back to filename parsing if metadata is insufficient.
|
||||
pub fn build_query(track: &track::Model) -> Option<(String, String)> {
|
||||
let artist = track
|
||||
.album_artist
|
||||
.as_deref()
|
||||
.or(track.artist.as_deref())
|
||||
.filter(|s| !s.is_empty());
|
||||
let title = track.title.as_deref().filter(|s| !s.is_empty());
|
||||
|
||||
match (artist, title) {
|
||||
(Some(a), Some(t)) => Some((a.to_string(), t.to_string())),
|
||||
(None, Some(t)) => Some((String::new(), t.to_string())),
|
||||
_ => parse_filename(&track.file_path),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse "Artist - Title" from a filename, stripping extension and path.
|
||||
pub fn parse_filename(file_path: &str) -> Option<(String, String)> {
|
||||
let filename = std::path::Path::new(file_path)
|
||||
.file_stem()?
|
||||
.to_str()?;
|
||||
|
||||
// Try common "Artist - Title" pattern
|
||||
if let Some((artist, title)) = filename.split_once(" - ") {
|
||||
let artist = artist.trim().to_string();
|
||||
let title = title.trim().to_string();
|
||||
if !artist.is_empty() && !title.is_empty() {
|
||||
return Some((artist, title));
|
||||
}
|
||||
}
|
||||
|
||||
// If no delimiter found, treat entire filename as the title
|
||||
let name = filename.trim().to_string();
|
||||
if !name.is_empty() {
|
||||
Some((String::new(), name))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Score a candidate recording against the track's known metadata.
|
||||
/// Returns a confidence value from 0.0 to 1.0.
|
||||
pub fn score_match(track: &track::Model, candidate: &RecordingMatch) -> f64 {
|
||||
let track_title = track
|
||||
.title
|
||||
.as_deref()
|
||||
.map(normalize)
|
||||
.unwrap_or_default();
|
||||
let candidate_title = normalize(&candidate.title);
|
||||
|
||||
let track_artist = track
|
||||
.artist
|
||||
.as_deref()
|
||||
.or(track.album_artist.as_deref())
|
||||
.map(normalize)
|
||||
.unwrap_or_default();
|
||||
let candidate_artist = normalize(&candidate.artist);
|
||||
|
||||
// Title similarity (weighted 0.6)
|
||||
let title_sim = if track_title.is_empty() || candidate_title.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
strsim::jaro_winkler(&track_title, &candidate_title)
|
||||
};
|
||||
|
||||
// Artist similarity (weighted 0.4)
|
||||
let artist_sim = if track_artist.is_empty() || candidate_artist.is_empty() {
|
||||
0.3 // neutral-ish when we have no artist to compare
|
||||
} else {
|
||||
strsim::jaro_winkler(&track_artist, &candidate_artist)
|
||||
};
|
||||
|
||||
let mut score = 0.6 * title_sim + 0.4 * artist_sim;
|
||||
|
||||
// Bonus: album name matches a release
|
||||
if let Some(ref album) = track.album {
|
||||
let track_album = normalize(album);
|
||||
if !track_album.is_empty() {
|
||||
for release in &candidate.releases {
|
||||
let release_title = normalize(&release.title);
|
||||
let album_sim = strsim::jaro_winkler(&track_album, &release_title);
|
||||
if album_sim > 0.85 {
|
||||
score += 0.05;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bonus: duration within 3 seconds
|
||||
if let Some(track_dur) = track.duration {
|
||||
// MusicBrainz search results don't always include duration,
|
||||
// but the score from the API itself is a signal
|
||||
if track_dur > 0.0 && candidate.score > 90 {
|
||||
score += 0.03;
|
||||
}
|
||||
}
|
||||
|
||||
score.min(1.0)
|
||||
}
|
||||
|
||||
/// Select the best match from candidates that exceeds the confidence threshold.
|
||||
pub fn select_best_match(
|
||||
track: &track::Model,
|
||||
candidates: Vec<RecordingMatch>,
|
||||
threshold: f64,
|
||||
) -> Option<ScoredMatch> {
|
||||
let mut best: Option<ScoredMatch> = None;
|
||||
|
||||
for candidate in candidates {
|
||||
let confidence = score_match(track, &candidate);
|
||||
tracing::debug!(
|
||||
title = %candidate.title,
|
||||
artist = %candidate.artist,
|
||||
confidence = confidence,
|
||||
"candidate"
|
||||
);
|
||||
|
||||
if confidence >= threshold {
|
||||
let best_release = candidate.releases.first().cloned();
|
||||
let scored = ScoredMatch {
|
||||
recording: candidate,
|
||||
confidence,
|
||||
best_release,
|
||||
};
|
||||
match &best {
|
||||
Some(current) if scored.confidence <= current.confidence => {}
|
||||
_ => best = Some(scored),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
best
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_filename_artist_title() {
|
||||
let result = parse_filename("/music/Pink Floyd - Time.mp3");
|
||||
assert_eq!(result, Some(("Pink Floyd".into(), "Time".into())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_filename_title_only() {
|
||||
let result = parse_filename("/music/some_song.mp3");
|
||||
assert_eq!(result, Some(("".into(), "some_song".into())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_filename_nested_path() {
|
||||
let result = parse_filename("/music/Artist/Album/03 - Track Name.flac");
|
||||
// The "03" gets treated as artist since it splits on " - "
|
||||
assert_eq!(result, Some(("03".into(), "Track Name".into())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_query_with_metadata() {
|
||||
let track = track::Model {
|
||||
id: 1,
|
||||
file_path: "/music/test.mp3".into(),
|
||||
title: Some("Time".into()),
|
||||
artist: Some("Pink Floyd".into()),
|
||||
album: None,
|
||||
album_artist: None,
|
||||
track_number: None,
|
||||
disc_number: None,
|
||||
duration: None,
|
||||
genre: None,
|
||||
year: None,
|
||||
codec: None,
|
||||
bitrate: None,
|
||||
file_size: 1000,
|
||||
fingerprint: None,
|
||||
musicbrainz_id: None,
|
||||
artist_id: None,
|
||||
album_id: None,
|
||||
file_mtime: None,
|
||||
added_at: chrono::Utc::now().naive_utc(),
|
||||
updated_at: chrono::Utc::now().naive_utc(),
|
||||
};
|
||||
let result = build_query(&track);
|
||||
assert_eq!(result, Some(("Pink Floyd".into(), "Time".into())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_query_falls_back_to_filename() {
|
||||
let track = track::Model {
|
||||
id: 1,
|
||||
file_path: "/music/Radiohead - Creep.mp3".into(),
|
||||
title: None,
|
||||
artist: None,
|
||||
album: None,
|
||||
album_artist: None,
|
||||
track_number: None,
|
||||
disc_number: None,
|
||||
duration: None,
|
||||
genre: None,
|
||||
year: None,
|
||||
codec: None,
|
||||
bitrate: None,
|
||||
file_size: 1000,
|
||||
fingerprint: None,
|
||||
musicbrainz_id: None,
|
||||
artist_id: None,
|
||||
album_id: None,
|
||||
file_mtime: None,
|
||||
added_at: chrono::Utc::now().naive_utc(),
|
||||
updated_at: chrono::Utc::now().naive_utc(),
|
||||
};
|
||||
let result = build_query(&track);
|
||||
assert_eq!(result, Some(("Radiohead".into(), "Creep".into())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_score_match_exact() {
|
||||
let track = track::Model {
|
||||
id: 1,
|
||||
file_path: "/test.mp3".into(),
|
||||
title: Some("Time".into()),
|
||||
artist: Some("Pink Floyd".into()),
|
||||
album: None,
|
||||
album_artist: None,
|
||||
track_number: None,
|
||||
disc_number: None,
|
||||
duration: None,
|
||||
genre: None,
|
||||
year: None,
|
||||
codec: None,
|
||||
bitrate: None,
|
||||
file_size: 1000,
|
||||
fingerprint: None,
|
||||
musicbrainz_id: None,
|
||||
artist_id: None,
|
||||
album_id: None,
|
||||
file_mtime: None,
|
||||
added_at: chrono::Utc::now().naive_utc(),
|
||||
updated_at: chrono::Utc::now().naive_utc(),
|
||||
};
|
||||
let candidate = RecordingMatch {
|
||||
mbid: "123".into(),
|
||||
title: "Time".into(),
|
||||
artist: "Pink Floyd".into(),
|
||||
artist_mbid: None,
|
||||
releases: vec![],
|
||||
score: 100,
|
||||
};
|
||||
let score = score_match(&track, &candidate);
|
||||
assert!(score > 0.95, "exact match should score > 0.95, got {score}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_score_match_fuzzy() {
|
||||
let track = track::Model {
|
||||
id: 1,
|
||||
file_path: "/test.mp3".into(),
|
||||
title: Some("Comfortably Numb".into()),
|
||||
artist: Some("Pink Floyd".into()),
|
||||
album: None,
|
||||
album_artist: None,
|
||||
track_number: None,
|
||||
disc_number: None,
|
||||
duration: None,
|
||||
genre: None,
|
||||
year: None,
|
||||
codec: None,
|
||||
bitrate: None,
|
||||
file_size: 1000,
|
||||
fingerprint: None,
|
||||
musicbrainz_id: None,
|
||||
artist_id: None,
|
||||
album_id: None,
|
||||
file_mtime: None,
|
||||
added_at: chrono::Utc::now().naive_utc(),
|
||||
updated_at: chrono::Utc::now().naive_utc(),
|
||||
};
|
||||
// Slight misspelling
|
||||
let candidate = RecordingMatch {
|
||||
mbid: "123".into(),
|
||||
title: "Comfortably Numb".into(),
|
||||
artist: "Pink Flloyd".into(), // typo
|
||||
artist_mbid: None,
|
||||
releases: vec![],
|
||||
score: 95,
|
||||
};
|
||||
let score = score_match(&track, &candidate);
|
||||
assert!(score > 0.85, "fuzzy match should score > 0.85, got {score}");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user