Initial commit
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
target/
|
||||
.env
|
||||
*.db
|
||||
*.db-journal
|
||||
30
Cargo.toml
Normal file
30
Cargo.toml
Normal file
@@ -0,0 +1,30 @@
|
||||
[package]
|
||||
name = "shanty-tag"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
description = "Metadata tagging via online databases for Shanty"
|
||||
repository = "ssh://connor@git.rcjohnstone.com:2222/Shanty/tag.git"
|
||||
|
||||
[dependencies]
|
||||
shanty-db = { path = "../shanty-db" }
|
||||
sea-orm = { version = "1", features = ["sqlx-sqlite", "runtime-tokio-native-tls"] }
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
thiserror = "2"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
anyhow = "1"
|
||||
reqwest = { version = "0.12", features = ["json"] }
|
||||
strsim = "0.11"
|
||||
unicode-normalization = "0.1"
|
||||
lofty = "0.22"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
dirs = "6"
|
||||
regex = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1", features = ["full", "test-util"] }
|
||||
tempfile = "3"
|
||||
22
readme.md
Normal file
22
readme.md
Normal file
@@ -0,0 +1,22 @@
|
||||
# shanty-tag
|
||||
|
||||
Metadata tagging via online databases for [Shanty](ssh://connor@git.rcjohnstone.com:2222/Shanty/shanty.git).
|
||||
|
||||
Queries MusicBrainz to fill in missing metadata on indexed music files. Uses fuzzy
|
||||
matching to handle minor spelling differences and a configurable confidence threshold.
|
||||
|
||||
## Usage
|
||||
|
||||
```sh
|
||||
# Tag all untagged tracks (dry run)
|
||||
shanty-tag --all --dry-run -vv
|
||||
|
||||
# Tag all untagged tracks for real
|
||||
shanty-tag --all
|
||||
|
||||
# Tag a specific track and write tags back to the file
|
||||
shanty-tag --track 42 --write-tags
|
||||
|
||||
# Custom confidence threshold
|
||||
shanty-tag --all --confidence 0.9
|
||||
```
|
||||
110
src/cleaning.rs
Normal file
110
src/cleaning.rs
Normal file
@@ -0,0 +1,110 @@
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use regex::Regex;
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
static STRIP_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
|
||||
vec![
|
||||
// (Official Video), (Official Audio), (Official Music Video), (Lyric Video), etc.
|
||||
Regex::new(r"\(official\s*(video|audio|music\s*video|lyric\s*video|visualizer)\)").unwrap(),
|
||||
// (Remastered), (Remastered 2011), (Remaster)
|
||||
Regex::new(r"\(remaster(ed)?\s*(\d{4})?\)").unwrap(),
|
||||
// [Live], [Bonus Track], [Deluxe], [Explicit]
|
||||
Regex::new(r"\[(live|bonus(\s*track)?|deluxe|explicit|clean)\]").unwrap(),
|
||||
// (feat. Artist), [feat. Artist], (ft. Artist)
|
||||
Regex::new(r"[\(\[](feat\.?|ft\.?)\s+[^\)\]]+[\)\]]").unwrap(),
|
||||
// (with Artist)
|
||||
Regex::new(r"\(with\s+[^)]+\)").unwrap(),
|
||||
// Trailing " - Single", " - EP"
|
||||
Regex::new(r"\s*-\s*(single|ep)\s*$").unwrap(),
|
||||
]
|
||||
});
|
||||
|
||||
/// Normalize a string for fuzzy comparison.
|
||||
///
|
||||
/// Applies unicode NFC normalization, lowercasing, stripping common suffixes
|
||||
/// (video tags, remaster notes, featuring credits), and trimming.
|
||||
pub fn normalize(s: &str) -> String {
|
||||
// Unicode NFC normalization
|
||||
let s: String = s.nfc().collect();
|
||||
|
||||
// Lowercase
|
||||
let mut s = s.to_lowercase();
|
||||
|
||||
// Strip known patterns
|
||||
for pattern in STRIP_PATTERNS.iter() {
|
||||
s = pattern.replace_all(&s, "").to_string();
|
||||
}
|
||||
|
||||
// Trim whitespace and punctuation
|
||||
s.trim()
|
||||
.trim_matches(|c: char| c.is_ascii_punctuation() || c.is_whitespace())
|
||||
.to_string()
|
||||
}
|
||||
|
||||
/// Escape special characters for MusicBrainz Lucene query syntax.
|
||||
pub fn escape_lucene(s: &str) -> String {
|
||||
let special = [
|
||||
'+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':',
|
||||
'\\', '/',
|
||||
];
|
||||
let mut result = String::with_capacity(s.len());
|
||||
for c in s.chars() {
|
||||
if special.contains(&c) {
|
||||
result.push('\\');
|
||||
}
|
||||
result.push(c);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_normalize_basic() {
|
||||
assert_eq!(normalize("Hello World"), "hello world");
|
||||
assert_eq!(normalize(" spaces "), "spaces");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_strips_official_video() {
|
||||
assert_eq!(normalize("Time (Official Video)"), "time");
|
||||
assert_eq!(normalize("Money (Official Music Video)"), "money");
|
||||
assert_eq!(normalize("Comfortably Numb (Official Audio)"), "comfortably numb");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_strips_remastered() {
|
||||
assert_eq!(normalize("Time (Remastered 2011)"), "time");
|
||||
assert_eq!(normalize("Money (Remastered)"), "money");
|
||||
assert_eq!(normalize("Shine On (Remaster)"), "shine on");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_strips_feat() {
|
||||
assert_eq!(normalize("Song (feat. Artist)"), "song");
|
||||
assert_eq!(normalize("Song [ft. Someone]"), "song");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_strips_brackets() {
|
||||
assert_eq!(normalize("Song [Live]"), "song");
|
||||
assert_eq!(normalize("Song [Bonus Track]"), "song");
|
||||
assert_eq!(normalize("Song [Explicit]"), "song");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_normalize_unicode() {
|
||||
// NFC normalization — decomposed é should become composed é
|
||||
assert_eq!(normalize("café"), normalize("café"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_escape_lucene() {
|
||||
assert_eq!(escape_lucene("AC/DC"), r"AC\/DC");
|
||||
assert_eq!(escape_lucene("test (hello)"), r"test \(hello\)");
|
||||
assert_eq!(escape_lucene("simple"), "simple");
|
||||
}
|
||||
}
|
||||
30
src/error.rs
Normal file
30
src/error.rs
Normal file
@@ -0,0 +1,30 @@
|
||||
use shanty_db::DbError;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum TagError {
|
||||
#[error("database error: {0}")]
|
||||
Db(#[from] DbError),
|
||||
|
||||
#[error("I/O error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("HTTP error: {0}")]
|
||||
Http(#[from] reqwest::Error),
|
||||
|
||||
#[error("metadata error: {0}")]
|
||||
Metadata(String),
|
||||
|
||||
#[error("no match found for track {0}")]
|
||||
NoMatch(i32),
|
||||
|
||||
#[error("{0}")]
|
||||
Other(String),
|
||||
}
|
||||
|
||||
impl From<lofty::error::LoftyError> for TagError {
|
||||
fn from(e: lofty::error::LoftyError) -> Self {
|
||||
TagError::Metadata(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
pub type TagResult<T> = Result<T, TagError>;
|
||||
70
src/file_tags.rs
Normal file
70
src/file_tags.rs
Normal file
@@ -0,0 +1,70 @@
|
||||
use std::path::Path;
|
||||
|
||||
use lofty::config::{ParseOptions, WriteOptions};
|
||||
use lofty::file::{FileType, TaggedFileExt};
|
||||
use lofty::probe::Probe;
|
||||
use lofty::tag::{Accessor, TagExt, TagType};
|
||||
|
||||
use crate::error::TagResult;
|
||||
use crate::provider::{RecordingDetails, ReleaseRef};
|
||||
|
||||
/// Infer the best tag type for a given file type.
|
||||
fn tag_type_for_file(ft: FileType) -> TagType {
|
||||
match ft {
|
||||
FileType::Mpeg => TagType::Id3v2,
|
||||
FileType::Flac | FileType::Vorbis | FileType::Opus | FileType::Speex => {
|
||||
TagType::VorbisComments
|
||||
}
|
||||
FileType::Mp4 => TagType::Mp4Ilst,
|
||||
FileType::Ape => TagType::Ape,
|
||||
_ => TagType::Id3v2,
|
||||
}
|
||||
}
|
||||
|
||||
/// Write updated metadata back to the music file's embedded tags.
|
||||
pub fn write_tags(
|
||||
file_path: &str,
|
||||
details: &RecordingDetails,
|
||||
release: Option<&ReleaseRef>,
|
||||
year: Option<i32>,
|
||||
genre: Option<&str>,
|
||||
) -> TagResult<()> {
|
||||
let path = Path::new(file_path);
|
||||
|
||||
let tagged_file = Probe::open(path)?
|
||||
.options(ParseOptions::default())
|
||||
.read()?;
|
||||
|
||||
// Determine the tag type to use
|
||||
let tag_type = tagged_file
|
||||
.primary_tag()
|
||||
.map(|t| t.tag_type())
|
||||
.unwrap_or_else(|| tag_type_for_file(tagged_file.file_type()));
|
||||
|
||||
let mut tag = tagged_file
|
||||
.primary_tag()
|
||||
.cloned()
|
||||
.unwrap_or_else(|| lofty::tag::Tag::new(tag_type));
|
||||
|
||||
// Set metadata
|
||||
tag.set_title(details.title.clone());
|
||||
tag.set_artist(details.artist.clone());
|
||||
|
||||
if let Some(release) = release {
|
||||
tag.set_album(release.title.clone());
|
||||
}
|
||||
|
||||
if let Some(y) = year {
|
||||
tag.set_year(y as u32);
|
||||
}
|
||||
|
||||
if let Some(g) = genre {
|
||||
tag.set_genre(g.to_string());
|
||||
}
|
||||
|
||||
// Write to file
|
||||
tag.save_to_path(path, WriteOptions::default())?;
|
||||
|
||||
tracing::info!(path = file_path, "wrote tags to file");
|
||||
Ok(())
|
||||
}
|
||||
18
src/lib.rs
Normal file
18
src/lib.rs
Normal file
@@ -0,0 +1,18 @@
|
||||
//! Metadata tagging via online databases for Shanty.
|
||||
//!
|
||||
//! Fills in missing or incorrect metadata on music files by querying online
|
||||
//! databases such as MusicBrainz. Supports fuzzy matching and configurable
|
||||
//! confidence thresholds.
|
||||
|
||||
pub mod cleaning;
|
||||
pub mod error;
|
||||
pub mod file_tags;
|
||||
pub mod matcher;
|
||||
pub mod musicbrainz;
|
||||
pub mod provider;
|
||||
pub mod tagger;
|
||||
|
||||
pub use error::{TagError, TagResult};
|
||||
pub use musicbrainz::MusicBrainzClient;
|
||||
pub use provider::MetadataProvider;
|
||||
pub use tagger::{TagConfig, TagStats, run_tagging};
|
||||
95
src/main.rs
Normal file
95
src/main.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::Parser;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use shanty_db::Database;
|
||||
use shanty_tag::{MusicBrainzClient, TagConfig, run_tagging};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "shanty-tag", about = "Fill in missing metadata on music files via MusicBrainz")]
|
||||
struct Cli {
|
||||
/// Database URL. Defaults to sqlite://<XDG_DATA_HOME>/shanty/shanty.db?mode=rwc
|
||||
#[arg(long, env = "SHANTY_DATABASE_URL")]
|
||||
database: Option<String>,
|
||||
|
||||
/// Tag all untagged tracks in the database.
|
||||
#[arg(long)]
|
||||
all: bool,
|
||||
|
||||
/// Tag a specific track by its database ID.
|
||||
#[arg(long)]
|
||||
track: Option<i32>,
|
||||
|
||||
/// Preview matches without writing to DB or files.
|
||||
#[arg(long)]
|
||||
dry_run: bool,
|
||||
|
||||
/// Write updated tags back to music files.
|
||||
#[arg(long)]
|
||||
write_tags: bool,
|
||||
|
||||
/// Minimum match confidence (0.0 - 1.0).
|
||||
#[arg(long, default_value = "0.8")]
|
||||
confidence: f64,
|
||||
|
||||
/// Increase verbosity (-v info, -vv debug, -vvv trace).
|
||||
#[arg(short, long, action = clap::ArgAction::Count)]
|
||||
verbose: u8,
|
||||
}
|
||||
|
||||
fn default_database_url() -> String {
|
||||
let data_dir = dirs::data_dir()
|
||||
.unwrap_or_else(|| PathBuf::from("."))
|
||||
.join("shanty");
|
||||
std::fs::create_dir_all(&data_dir).ok();
|
||||
let db_path = data_dir.join("shanty.db");
|
||||
format!("sqlite://{}?mode=rwc", db_path.display())
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
// Set up tracing
|
||||
let filter = match cli.verbose {
|
||||
0 => "warn",
|
||||
1 => "info,shanty_tag=info",
|
||||
2 => "info,shanty_tag=debug",
|
||||
_ => "debug,shanty_tag=trace",
|
||||
};
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(filter)),
|
||||
)
|
||||
.init();
|
||||
|
||||
// Validate args
|
||||
if !cli.all && cli.track.is_none() {
|
||||
anyhow::bail!("specify either --all or --track <id>");
|
||||
}
|
||||
|
||||
// Connect to database
|
||||
let database_url = cli.database.unwrap_or_else(default_database_url);
|
||||
tracing::info!(url = %database_url, "connecting to database");
|
||||
let db = Database::new(&database_url).await?;
|
||||
|
||||
// Create MusicBrainz client
|
||||
let provider = MusicBrainzClient::new()?;
|
||||
|
||||
// Run tagging
|
||||
let config = TagConfig {
|
||||
dry_run: cli.dry_run,
|
||||
write_tags: cli.write_tags,
|
||||
confidence: cli.confidence,
|
||||
};
|
||||
|
||||
if config.dry_run {
|
||||
println!("DRY RUN — no changes will be written");
|
||||
}
|
||||
|
||||
let stats = run_tagging(db.conn(), &provider, &config, cli.track).await?;
|
||||
println!("\nTagging complete: {stats}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
305
src/matcher.rs
Normal file
305
src/matcher.rs
Normal file
@@ -0,0 +1,305 @@
|
||||
use shanty_db::entities::track;
|
||||
|
||||
use crate::cleaning::normalize;
|
||||
use crate::provider::{RecordingMatch, ReleaseRef};
|
||||
|
||||
/// A scored recording match with the best matching release.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ScoredMatch {
|
||||
pub recording: RecordingMatch,
|
||||
pub confidence: f64,
|
||||
pub best_release: Option<ReleaseRef>,
|
||||
}
|
||||
|
||||
/// Build a search query (artist, title) from a track's metadata.
|
||||
/// Falls back to filename parsing if metadata is insufficient.
|
||||
pub fn build_query(track: &track::Model) -> Option<(String, String)> {
|
||||
let artist = track
|
||||
.album_artist
|
||||
.as_deref()
|
||||
.or(track.artist.as_deref())
|
||||
.filter(|s| !s.is_empty());
|
||||
let title = track.title.as_deref().filter(|s| !s.is_empty());
|
||||
|
||||
match (artist, title) {
|
||||
(Some(a), Some(t)) => Some((a.to_string(), t.to_string())),
|
||||
(None, Some(t)) => Some((String::new(), t.to_string())),
|
||||
_ => parse_filename(&track.file_path),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse "Artist - Title" from a filename, stripping extension and path.
|
||||
pub fn parse_filename(file_path: &str) -> Option<(String, String)> {
|
||||
let filename = std::path::Path::new(file_path)
|
||||
.file_stem()?
|
||||
.to_str()?;
|
||||
|
||||
// Try common "Artist - Title" pattern
|
||||
if let Some((artist, title)) = filename.split_once(" - ") {
|
||||
let artist = artist.trim().to_string();
|
||||
let title = title.trim().to_string();
|
||||
if !artist.is_empty() && !title.is_empty() {
|
||||
return Some((artist, title));
|
||||
}
|
||||
}
|
||||
|
||||
// If no delimiter found, treat entire filename as the title
|
||||
let name = filename.trim().to_string();
|
||||
if !name.is_empty() {
|
||||
Some((String::new(), name))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Score a candidate recording against the track's known metadata.
|
||||
/// Returns a confidence value from 0.0 to 1.0.
|
||||
pub fn score_match(track: &track::Model, candidate: &RecordingMatch) -> f64 {
|
||||
let track_title = track
|
||||
.title
|
||||
.as_deref()
|
||||
.map(normalize)
|
||||
.unwrap_or_default();
|
||||
let candidate_title = normalize(&candidate.title);
|
||||
|
||||
let track_artist = track
|
||||
.artist
|
||||
.as_deref()
|
||||
.or(track.album_artist.as_deref())
|
||||
.map(normalize)
|
||||
.unwrap_or_default();
|
||||
let candidate_artist = normalize(&candidate.artist);
|
||||
|
||||
// Title similarity (weighted 0.6)
|
||||
let title_sim = if track_title.is_empty() || candidate_title.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
strsim::jaro_winkler(&track_title, &candidate_title)
|
||||
};
|
||||
|
||||
// Artist similarity (weighted 0.4)
|
||||
let artist_sim = if track_artist.is_empty() || candidate_artist.is_empty() {
|
||||
0.3 // neutral-ish when we have no artist to compare
|
||||
} else {
|
||||
strsim::jaro_winkler(&track_artist, &candidate_artist)
|
||||
};
|
||||
|
||||
let mut score = 0.6 * title_sim + 0.4 * artist_sim;
|
||||
|
||||
// Bonus: album name matches a release
|
||||
if let Some(ref album) = track.album {
|
||||
let track_album = normalize(album);
|
||||
if !track_album.is_empty() {
|
||||
for release in &candidate.releases {
|
||||
let release_title = normalize(&release.title);
|
||||
let album_sim = strsim::jaro_winkler(&track_album, &release_title);
|
||||
if album_sim > 0.85 {
|
||||
score += 0.05;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Bonus: duration within 3 seconds
|
||||
if let Some(track_dur) = track.duration {
|
||||
// MusicBrainz search results don't always include duration,
|
||||
// but the score from the API itself is a signal
|
||||
if track_dur > 0.0 && candidate.score > 90 {
|
||||
score += 0.03;
|
||||
}
|
||||
}
|
||||
|
||||
score.min(1.0)
|
||||
}
|
||||
|
||||
/// Select the best match from candidates that exceeds the confidence threshold.
|
||||
pub fn select_best_match(
|
||||
track: &track::Model,
|
||||
candidates: Vec<RecordingMatch>,
|
||||
threshold: f64,
|
||||
) -> Option<ScoredMatch> {
|
||||
let mut best: Option<ScoredMatch> = None;
|
||||
|
||||
for candidate in candidates {
|
||||
let confidence = score_match(track, &candidate);
|
||||
tracing::debug!(
|
||||
title = %candidate.title,
|
||||
artist = %candidate.artist,
|
||||
confidence = confidence,
|
||||
"candidate"
|
||||
);
|
||||
|
||||
if confidence >= threshold {
|
||||
let best_release = candidate.releases.first().cloned();
|
||||
let scored = ScoredMatch {
|
||||
recording: candidate,
|
||||
confidence,
|
||||
best_release,
|
||||
};
|
||||
match &best {
|
||||
Some(current) if scored.confidence <= current.confidence => {}
|
||||
_ => best = Some(scored),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
best
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_filename_artist_title() {
|
||||
let result = parse_filename("/music/Pink Floyd - Time.mp3");
|
||||
assert_eq!(result, Some(("Pink Floyd".into(), "Time".into())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_filename_title_only() {
|
||||
let result = parse_filename("/music/some_song.mp3");
|
||||
assert_eq!(result, Some(("".into(), "some_song".into())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_filename_nested_path() {
|
||||
let result = parse_filename("/music/Artist/Album/03 - Track Name.flac");
|
||||
// The "03" gets treated as artist since it splits on " - "
|
||||
assert_eq!(result, Some(("03".into(), "Track Name".into())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_query_with_metadata() {
|
||||
let track = track::Model {
|
||||
id: 1,
|
||||
file_path: "/music/test.mp3".into(),
|
||||
title: Some("Time".into()),
|
||||
artist: Some("Pink Floyd".into()),
|
||||
album: None,
|
||||
album_artist: None,
|
||||
track_number: None,
|
||||
disc_number: None,
|
||||
duration: None,
|
||||
genre: None,
|
||||
year: None,
|
||||
codec: None,
|
||||
bitrate: None,
|
||||
file_size: 1000,
|
||||
fingerprint: None,
|
||||
musicbrainz_id: None,
|
||||
artist_id: None,
|
||||
album_id: None,
|
||||
file_mtime: None,
|
||||
added_at: chrono::Utc::now().naive_utc(),
|
||||
updated_at: chrono::Utc::now().naive_utc(),
|
||||
};
|
||||
let result = build_query(&track);
|
||||
assert_eq!(result, Some(("Pink Floyd".into(), "Time".into())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_query_falls_back_to_filename() {
|
||||
let track = track::Model {
|
||||
id: 1,
|
||||
file_path: "/music/Radiohead - Creep.mp3".into(),
|
||||
title: None,
|
||||
artist: None,
|
||||
album: None,
|
||||
album_artist: None,
|
||||
track_number: None,
|
||||
disc_number: None,
|
||||
duration: None,
|
||||
genre: None,
|
||||
year: None,
|
||||
codec: None,
|
||||
bitrate: None,
|
||||
file_size: 1000,
|
||||
fingerprint: None,
|
||||
musicbrainz_id: None,
|
||||
artist_id: None,
|
||||
album_id: None,
|
||||
file_mtime: None,
|
||||
added_at: chrono::Utc::now().naive_utc(),
|
||||
updated_at: chrono::Utc::now().naive_utc(),
|
||||
};
|
||||
let result = build_query(&track);
|
||||
assert_eq!(result, Some(("Radiohead".into(), "Creep".into())));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_score_match_exact() {
|
||||
let track = track::Model {
|
||||
id: 1,
|
||||
file_path: "/test.mp3".into(),
|
||||
title: Some("Time".into()),
|
||||
artist: Some("Pink Floyd".into()),
|
||||
album: None,
|
||||
album_artist: None,
|
||||
track_number: None,
|
||||
disc_number: None,
|
||||
duration: None,
|
||||
genre: None,
|
||||
year: None,
|
||||
codec: None,
|
||||
bitrate: None,
|
||||
file_size: 1000,
|
||||
fingerprint: None,
|
||||
musicbrainz_id: None,
|
||||
artist_id: None,
|
||||
album_id: None,
|
||||
file_mtime: None,
|
||||
added_at: chrono::Utc::now().naive_utc(),
|
||||
updated_at: chrono::Utc::now().naive_utc(),
|
||||
};
|
||||
let candidate = RecordingMatch {
|
||||
mbid: "123".into(),
|
||||
title: "Time".into(),
|
||||
artist: "Pink Floyd".into(),
|
||||
artist_mbid: None,
|
||||
releases: vec![],
|
||||
score: 100,
|
||||
};
|
||||
let score = score_match(&track, &candidate);
|
||||
assert!(score > 0.95, "exact match should score > 0.95, got {score}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_score_match_fuzzy() {
|
||||
let track = track::Model {
|
||||
id: 1,
|
||||
file_path: "/test.mp3".into(),
|
||||
title: Some("Comfortably Numb".into()),
|
||||
artist: Some("Pink Floyd".into()),
|
||||
album: None,
|
||||
album_artist: None,
|
||||
track_number: None,
|
||||
disc_number: None,
|
||||
duration: None,
|
||||
genre: None,
|
||||
year: None,
|
||||
codec: None,
|
||||
bitrate: None,
|
||||
file_size: 1000,
|
||||
fingerprint: None,
|
||||
musicbrainz_id: None,
|
||||
artist_id: None,
|
||||
album_id: None,
|
||||
file_mtime: None,
|
||||
added_at: chrono::Utc::now().naive_utc(),
|
||||
updated_at: chrono::Utc::now().naive_utc(),
|
||||
};
|
||||
// Slight misspelling
|
||||
let candidate = RecordingMatch {
|
||||
mbid: "123".into(),
|
||||
title: "Comfortably Numb".into(),
|
||||
artist: "Pink Flloyd".into(), // typo
|
||||
artist_mbid: None,
|
||||
releases: vec![],
|
||||
score: 95,
|
||||
};
|
||||
let score = score_match(&track, &candidate);
|
||||
assert!(score > 0.85, "fuzzy match should score > 0.85, got {score}");
|
||||
}
|
||||
}
|
||||
253
src/musicbrainz.rs
Normal file
253
src/musicbrainz.rs
Normal file
@@ -0,0 +1,253 @@
|
||||
use serde::Deserialize;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::time::{Duration, Instant};
|
||||
|
||||
use crate::cleaning::escape_lucene;
|
||||
use crate::error::{TagError, TagResult};
|
||||
use crate::provider::{MetadataProvider, RecordingDetails, RecordingMatch, ReleaseMatch, ReleaseRef};
|
||||
|
||||
const BASE_URL: &str = "https://musicbrainz.org/ws/2";
|
||||
const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
|
||||
const RATE_LIMIT: Duration = Duration::from_millis(1100); // slightly over 1s to be safe
|
||||
|
||||
/// MusicBrainz API client with rate limiting.
|
||||
pub struct MusicBrainzClient {
|
||||
client: reqwest::Client,
|
||||
last_request: Mutex<Instant>,
|
||||
}
|
||||
|
||||
impl MusicBrainzClient {
|
||||
pub fn new() -> TagResult<Self> {
|
||||
let client = reqwest::Client::builder()
|
||||
.user_agent(USER_AGENT)
|
||||
.timeout(Duration::from_secs(30))
|
||||
.build()?;
|
||||
Ok(Self {
|
||||
client,
|
||||
last_request: Mutex::new(Instant::now() - RATE_LIMIT),
|
||||
})
|
||||
}
|
||||
|
||||
/// Enforce rate limiting: wait if needed so we don't exceed 1 req/sec.
|
||||
async fn rate_limit(&self) {
|
||||
let mut last = self.last_request.lock().await;
|
||||
let elapsed = last.elapsed();
|
||||
if elapsed < RATE_LIMIT {
|
||||
tokio::time::sleep(RATE_LIMIT - elapsed).await;
|
||||
}
|
||||
*last = Instant::now();
|
||||
}
|
||||
|
||||
async fn get_json<T: serde::de::DeserializeOwned>(&self, url: &str) -> TagResult<T> {
|
||||
self.rate_limit().await;
|
||||
tracing::debug!(url = url, "MusicBrainz request");
|
||||
let resp = self.client.get(url).send().await?;
|
||||
let status = resp.status();
|
||||
if !status.is_success() {
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
return Err(TagError::Other(format!(
|
||||
"MusicBrainz API error {status}: {body}"
|
||||
)));
|
||||
}
|
||||
Ok(resp.json().await?)
|
||||
}
|
||||
}
|
||||
|
||||
impl MetadataProvider for MusicBrainzClient {
|
||||
async fn search_recording(
|
||||
&self,
|
||||
artist: &str,
|
||||
title: &str,
|
||||
) -> TagResult<Vec<RecordingMatch>> {
|
||||
let query = if artist.is_empty() {
|
||||
format!("recording:{}", escape_lucene(title))
|
||||
} else {
|
||||
format!(
|
||||
"artist:{} AND recording:{}",
|
||||
escape_lucene(artist),
|
||||
escape_lucene(title)
|
||||
)
|
||||
};
|
||||
let url = format!("{BASE_URL}/recording/?query={}&fmt=json&limit=5", urlencoded(&query));
|
||||
let resp: MbRecordingSearchResponse = self.get_json(&url).await?;
|
||||
|
||||
Ok(resp
|
||||
.recordings
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
|
||||
RecordingMatch {
|
||||
mbid: r.id,
|
||||
title: r.title,
|
||||
artist: artist_name,
|
||||
artist_mbid,
|
||||
releases: r
|
||||
.releases
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|rel| ReleaseRef {
|
||||
mbid: rel.id,
|
||||
title: rel.title,
|
||||
date: rel.date,
|
||||
track_number: None,
|
||||
})
|
||||
.collect(),
|
||||
score: r.score.unwrap_or(0),
|
||||
}
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn search_release(
|
||||
&self,
|
||||
artist: &str,
|
||||
album: &str,
|
||||
) -> TagResult<Vec<ReleaseMatch>> {
|
||||
let query = format!(
|
||||
"artist:{} AND release:{}",
|
||||
escape_lucene(artist),
|
||||
escape_lucene(album)
|
||||
);
|
||||
let url = format!("{BASE_URL}/release/?query={}&fmt=json&limit=5", urlencoded(&query));
|
||||
let resp: MbReleaseSearchResponse = self.get_json(&url).await?;
|
||||
|
||||
Ok(resp
|
||||
.releases
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
|
||||
ReleaseMatch {
|
||||
mbid: r.id,
|
||||
title: r.title,
|
||||
artist: artist_name,
|
||||
artist_mbid,
|
||||
date: r.date,
|
||||
track_count: r.track_count,
|
||||
score: r.score.unwrap_or(0),
|
||||
}
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn get_recording(&self, mbid: &str) -> TagResult<RecordingDetails> {
|
||||
let url = format!(
|
||||
"{BASE_URL}/recording/{mbid}?inc=artists+releases+genres&fmt=json"
|
||||
);
|
||||
let r: MbRecordingDetail = self.get_json(&url).await?;
|
||||
|
||||
let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
|
||||
Ok(RecordingDetails {
|
||||
mbid: r.id,
|
||||
title: r.title,
|
||||
artist: artist_name,
|
||||
artist_mbid,
|
||||
releases: r
|
||||
.releases
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|rel| ReleaseRef {
|
||||
mbid: rel.id,
|
||||
title: rel.title,
|
||||
date: rel.date,
|
||||
track_number: None,
|
||||
})
|
||||
.collect(),
|
||||
duration_ms: r.length,
|
||||
genres: r
|
||||
.genres
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|g| g.name)
|
||||
.collect(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_artist_credit(credits: &Option<Vec<MbArtistCredit>>) -> (String, Option<String>) {
|
||||
match credits {
|
||||
Some(credits) if !credits.is_empty() => {
|
||||
let name: String = credits
|
||||
.iter()
|
||||
.map(|c| {
|
||||
let mut s = c.artist.name.clone();
|
||||
if let Some(ref join) = c.joinphrase {
|
||||
s.push_str(join);
|
||||
}
|
||||
s
|
||||
})
|
||||
.collect();
|
||||
let mbid = Some(credits[0].artist.id.clone());
|
||||
(name, mbid)
|
||||
}
|
||||
_ => ("Unknown Artist".to_string(), None),
|
||||
}
|
||||
}
|
||||
|
||||
fn urlencoded(s: &str) -> String {
|
||||
s.replace(' ', "+")
|
||||
.replace('&', "%26")
|
||||
.replace('=', "%3D")
|
||||
.replace('#', "%23")
|
||||
}
|
||||
|
||||
// --- MusicBrainz API response types ---
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbRecordingSearchResponse {
|
||||
recordings: Vec<MbRecordingResult>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbRecordingResult {
|
||||
id: String,
|
||||
title: String,
|
||||
score: Option<u8>,
|
||||
#[serde(rename = "artist-credit")]
|
||||
artist_credit: Option<Vec<MbArtistCredit>>,
|
||||
releases: Option<Vec<MbReleaseResult>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbReleaseSearchResponse {
|
||||
releases: Vec<MbReleaseResult>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbReleaseResult {
|
||||
id: String,
|
||||
title: String,
|
||||
score: Option<u8>,
|
||||
#[serde(rename = "artist-credit")]
|
||||
artist_credit: Option<Vec<MbArtistCredit>>,
|
||||
date: Option<String>,
|
||||
#[serde(rename = "track-count")]
|
||||
track_count: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbRecordingDetail {
|
||||
id: String,
|
||||
title: String,
|
||||
#[serde(rename = "artist-credit")]
|
||||
artist_credit: Option<Vec<MbArtistCredit>>,
|
||||
releases: Option<Vec<MbReleaseResult>>,
|
||||
length: Option<u64>,
|
||||
genres: Option<Vec<MbGenre>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbArtistCredit {
|
||||
artist: MbArtist,
|
||||
joinphrase: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbArtist {
|
||||
id: String,
|
||||
name: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbGenre {
|
||||
name: String,
|
||||
}
|
||||
69
src/provider.rs
Normal file
69
src/provider.rs
Normal file
@@ -0,0 +1,69 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::TagResult;
|
||||
|
||||
/// A reference to a release (album) that a recording appears on.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ReleaseRef {
|
||||
pub mbid: String,
|
||||
pub title: String,
|
||||
pub date: Option<String>,
|
||||
pub track_number: Option<i32>,
|
||||
}
|
||||
|
||||
/// A recording match from a search query.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RecordingMatch {
|
||||
pub mbid: String,
|
||||
pub title: String,
|
||||
pub artist: String,
|
||||
pub artist_mbid: Option<String>,
|
||||
pub releases: Vec<ReleaseRef>,
|
||||
/// MusicBrainz API score (0-100).
|
||||
pub score: u8,
|
||||
}
|
||||
|
||||
/// A release (album) match from a search query.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ReleaseMatch {
|
||||
pub mbid: String,
|
||||
pub title: String,
|
||||
pub artist: String,
|
||||
pub artist_mbid: Option<String>,
|
||||
pub date: Option<String>,
|
||||
pub track_count: Option<i32>,
|
||||
pub score: u8,
|
||||
}
|
||||
|
||||
/// Full details for a recording, retrieved by MBID.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RecordingDetails {
|
||||
pub mbid: String,
|
||||
pub title: String,
|
||||
pub artist: String,
|
||||
pub artist_mbid: Option<String>,
|
||||
pub releases: Vec<ReleaseRef>,
|
||||
pub duration_ms: Option<u64>,
|
||||
pub genres: Vec<String>,
|
||||
}
|
||||
|
||||
/// Trait for metadata lookup backends. MusicBrainz is the default implementation;
|
||||
/// others (Last.fm, Discogs, etc.) can be added later.
|
||||
pub trait MetadataProvider: Send + Sync {
|
||||
fn search_recording(
|
||||
&self,
|
||||
artist: &str,
|
||||
title: &str,
|
||||
) -> impl std::future::Future<Output = TagResult<Vec<RecordingMatch>>> + Send;
|
||||
|
||||
fn search_release(
|
||||
&self,
|
||||
artist: &str,
|
||||
album: &str,
|
||||
) -> impl std::future::Future<Output = TagResult<Vec<ReleaseMatch>>> + Send;
|
||||
|
||||
fn get_recording(
|
||||
&self,
|
||||
mbid: &str,
|
||||
) -> impl std::future::Future<Output = TagResult<RecordingDetails>> + Send;
|
||||
}
|
||||
227
src/tagger.rs
Normal file
227
src/tagger.rs
Normal file
@@ -0,0 +1,227 @@
|
||||
use std::fmt;
|
||||
|
||||
use sea_orm::{ActiveValue::Set, DatabaseConnection, NotSet};
|
||||
|
||||
use shanty_db::entities::track;
|
||||
use shanty_db::queries;
|
||||
|
||||
use crate::error::TagResult;
|
||||
use crate::file_tags;
|
||||
use crate::matcher::{self, ScoredMatch};
|
||||
use crate::provider::MetadataProvider;
|
||||
|
||||
/// Configuration for a tagging operation.
|
||||
pub struct TagConfig {
|
||||
/// If true, show what would change without writing to DB or files.
|
||||
pub dry_run: bool,
|
||||
/// If true, write updated tags back to the music files.
|
||||
pub write_tags: bool,
|
||||
/// Minimum match confidence (0.0 - 1.0).
|
||||
pub confidence: f64,
|
||||
}
|
||||
|
||||
/// Statistics from a completed tagging run.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct TagStats {
|
||||
pub tracks_processed: u64,
|
||||
pub tracks_matched: u64,
|
||||
pub tracks_updated: u64,
|
||||
pub tracks_skipped: u64,
|
||||
pub tracks_errored: u64,
|
||||
}
|
||||
|
||||
impl fmt::Display for TagStats {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"processed: {}, matched: {}, updated: {}, skipped: {}, errors: {}",
|
||||
self.tracks_processed,
|
||||
self.tracks_matched,
|
||||
self.tracks_updated,
|
||||
self.tracks_skipped,
|
||||
self.tracks_errored,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Tag a single track. Returns `Ok(true)` if matched and updated.
|
||||
pub async fn tag_track(
|
||||
conn: &DatabaseConnection,
|
||||
provider: &impl MetadataProvider,
|
||||
track: &track::Model,
|
||||
config: &TagConfig,
|
||||
) -> TagResult<bool> {
|
||||
// Build search query
|
||||
let (artist, title) = match matcher::build_query(track) {
|
||||
Some(q) => q,
|
||||
None => {
|
||||
tracing::debug!(id = track.id, path = %track.file_path, "no query possible, skipping");
|
||||
return Ok(false);
|
||||
}
|
||||
};
|
||||
|
||||
tracing::info!(
|
||||
id = track.id,
|
||||
artist = %artist,
|
||||
title = %title,
|
||||
"searching MusicBrainz"
|
||||
);
|
||||
|
||||
// Search for recordings
|
||||
let candidates = provider.search_recording(&artist, &title).await?;
|
||||
|
||||
if candidates.is_empty() {
|
||||
tracing::debug!(id = track.id, "no results from MusicBrainz");
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// Score and select best match
|
||||
let best = match matcher::select_best_match(track, candidates, config.confidence) {
|
||||
Some(m) => m,
|
||||
None => {
|
||||
tracing::debug!(
|
||||
id = track.id,
|
||||
"no match above confidence threshold {}",
|
||||
config.confidence
|
||||
);
|
||||
return Ok(false);
|
||||
}
|
||||
};
|
||||
|
||||
log_match(track, &best);
|
||||
|
||||
if config.dry_run {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
// Get full details for the best match
|
||||
let details = provider.get_recording(&best.recording.mbid).await?;
|
||||
|
||||
// Upsert artist with MusicBrainz ID
|
||||
let artist_id = match &details.artist_mbid {
|
||||
Some(mbid) => {
|
||||
Some(queries::artists::upsert(conn, &details.artist, Some(mbid)).await?.id)
|
||||
}
|
||||
None => {
|
||||
Some(queries::artists::upsert(conn, &details.artist, None).await?.id)
|
||||
}
|
||||
};
|
||||
|
||||
// Upsert album from best release
|
||||
let (album_id, album_name) = if let Some(ref release) = best.best_release {
|
||||
let album = queries::albums::upsert(
|
||||
conn,
|
||||
&release.title,
|
||||
&details.artist,
|
||||
Some(&release.mbid),
|
||||
artist_id,
|
||||
)
|
||||
.await?;
|
||||
(Some(album.id), Some(release.title.clone()))
|
||||
} else {
|
||||
(None, None)
|
||||
};
|
||||
|
||||
// Parse year from release date
|
||||
let year = best
|
||||
.best_release
|
||||
.as_ref()
|
||||
.and_then(|r| r.date.as_deref())
|
||||
.and_then(|d| d.split('-').next())
|
||||
.and_then(|y| y.parse::<i32>().ok());
|
||||
|
||||
let genre = details.genres.first().cloned();
|
||||
|
||||
// Update track metadata
|
||||
let active = track::ActiveModel {
|
||||
id: Set(track.id),
|
||||
file_path: Set(track.file_path.clone()),
|
||||
title: Set(Some(details.title.clone())),
|
||||
artist: Set(Some(details.artist.clone())),
|
||||
album: Set(album_name),
|
||||
album_artist: Set(Some(details.artist.clone())),
|
||||
musicbrainz_id: Set(Some(details.mbid.clone())),
|
||||
artist_id: Set(artist_id),
|
||||
album_id: Set(album_id),
|
||||
year: Set(year),
|
||||
genre: Set(genre.clone()),
|
||||
// Preserve existing values for fields we don't update
|
||||
track_number: NotSet,
|
||||
disc_number: NotSet,
|
||||
duration: NotSet,
|
||||
codec: NotSet,
|
||||
bitrate: NotSet,
|
||||
file_size: NotSet,
|
||||
fingerprint: NotSet,
|
||||
file_mtime: NotSet,
|
||||
added_at: NotSet,
|
||||
updated_at: NotSet,
|
||||
};
|
||||
queries::tracks::update_metadata(conn, track.id, active).await?;
|
||||
|
||||
// Optionally write tags to file
|
||||
if config.write_tags {
|
||||
if let Err(e) = file_tags::write_tags(
|
||||
&track.file_path,
|
||||
&details,
|
||||
best.best_release.as_ref(),
|
||||
year,
|
||||
genre.as_deref(),
|
||||
) {
|
||||
tracing::warn!(id = track.id, path = %track.file_path, "failed to write file tags: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn log_match(track: &track::Model, best: &ScoredMatch) {
|
||||
tracing::info!(
|
||||
id = track.id,
|
||||
confidence = format!("{:.2}", best.confidence),
|
||||
matched_title = %best.recording.title,
|
||||
matched_artist = %best.recording.artist,
|
||||
release = best.best_release.as_ref().map(|r| r.title.as_str()).unwrap_or("(none)"),
|
||||
"match found"
|
||||
);
|
||||
}
|
||||
|
||||
/// Run tagging on all untagged tracks or a specific track.
|
||||
pub async fn run_tagging(
|
||||
conn: &DatabaseConnection,
|
||||
provider: &impl MetadataProvider,
|
||||
config: &TagConfig,
|
||||
track_id: Option<i32>,
|
||||
) -> TagResult<TagStats> {
|
||||
let tracks: Vec<track::Model> = if let Some(id) = track_id {
|
||||
vec![queries::tracks::get_by_id(conn, id).await?]
|
||||
} else {
|
||||
queries::tracks::get_untagged(conn).await?
|
||||
};
|
||||
|
||||
tracing::info!(count = tracks.len(), "tracks to process");
|
||||
let mut stats = TagStats::default();
|
||||
|
||||
for track in &tracks {
|
||||
stats.tracks_processed += 1;
|
||||
|
||||
match tag_track(conn, provider, track, config).await {
|
||||
Ok(true) => {
|
||||
stats.tracks_matched += 1;
|
||||
if !config.dry_run {
|
||||
stats.tracks_updated += 1;
|
||||
}
|
||||
}
|
||||
Ok(false) => {
|
||||
stats.tracks_skipped += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!(id = track.id, path = %track.file_path, "tagging error: {e}");
|
||||
stats.tracks_errored += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(%stats, "tagging complete");
|
||||
Ok(stats)
|
||||
}
|
||||
176
tests/integration.rs
Normal file
176
tests/integration.rs
Normal file
@@ -0,0 +1,176 @@
|
||||
use chrono::Utc;
|
||||
use sea_orm::ActiveValue::Set;
|
||||
|
||||
use shanty_db::{Database, queries};
|
||||
use shanty_tag::provider::{MetadataProvider, RecordingDetails, RecordingMatch, ReleaseMatch, ReleaseRef};
|
||||
use shanty_tag::error::TagResult;
|
||||
use shanty_tag::{TagConfig, run_tagging};
|
||||
|
||||
/// A mock metadata provider for testing without hitting MusicBrainz.
|
||||
struct MockProvider;
|
||||
|
||||
impl MetadataProvider for MockProvider {
|
||||
async fn search_recording(&self, artist: &str, title: &str) -> TagResult<Vec<RecordingMatch>> {
|
||||
// Return a match for "Pink Floyd - Time"
|
||||
if artist.contains("Pink Floyd") && title.contains("Time") {
|
||||
Ok(vec![RecordingMatch {
|
||||
mbid: "rec-123".into(),
|
||||
title: "Time".into(),
|
||||
artist: "Pink Floyd".into(),
|
||||
artist_mbid: Some("artist-456".into()),
|
||||
releases: vec![ReleaseRef {
|
||||
mbid: "release-789".into(),
|
||||
title: "The Dark Side of the Moon".into(),
|
||||
date: Some("1973-03-01".into()),
|
||||
track_number: Some(4),
|
||||
}],
|
||||
score: 100,
|
||||
}])
|
||||
} else {
|
||||
Ok(vec![])
|
||||
}
|
||||
}
|
||||
|
||||
async fn search_release(&self, _artist: &str, _album: &str) -> TagResult<Vec<ReleaseMatch>> {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn get_recording(&self, mbid: &str) -> TagResult<RecordingDetails> {
|
||||
if mbid == "rec-123" {
|
||||
Ok(RecordingDetails {
|
||||
mbid: "rec-123".into(),
|
||||
title: "Time".into(),
|
||||
artist: "Pink Floyd".into(),
|
||||
artist_mbid: Some("artist-456".into()),
|
||||
releases: vec![ReleaseRef {
|
||||
mbid: "release-789".into(),
|
||||
title: "The Dark Side of the Moon".into(),
|
||||
date: Some("1973-03-01".into()),
|
||||
track_number: Some(4),
|
||||
}],
|
||||
duration_ms: Some(413_000),
|
||||
genres: vec!["Progressive Rock".into()],
|
||||
})
|
||||
} else {
|
||||
Err(shanty_tag::TagError::Other("not found".into()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn test_db() -> Database {
|
||||
Database::new("sqlite::memory:")
|
||||
.await
|
||||
.expect("failed to create test database")
|
||||
}
|
||||
|
||||
async fn insert_untagged_track(db: &Database, file_path: &str, title: Option<&str>, artist: Option<&str>) -> i32 {
|
||||
let now = Utc::now().naive_utc();
|
||||
let active = shanty_db::entities::track::ActiveModel {
|
||||
file_path: Set(file_path.to_string()),
|
||||
title: Set(title.map(String::from)),
|
||||
artist: Set(artist.map(String::from)),
|
||||
file_size: Set(1_000_000),
|
||||
added_at: Set(now),
|
||||
updated_at: Set(now),
|
||||
..Default::default()
|
||||
};
|
||||
let track = queries::tracks::upsert(db.conn(), active).await.unwrap();
|
||||
track.id
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_tag_track_with_match() {
|
||||
let db = test_db().await;
|
||||
let provider = MockProvider;
|
||||
|
||||
let track_id = insert_untagged_track(&db, "/music/time.mp3", Some("Time"), Some("Pink Floyd")).await;
|
||||
|
||||
let config = TagConfig {
|
||||
dry_run: false,
|
||||
write_tags: false,
|
||||
confidence: 0.8,
|
||||
};
|
||||
|
||||
let stats = run_tagging(db.conn(), &provider, &config, Some(track_id)).await.unwrap();
|
||||
assert_eq!(stats.tracks_processed, 1);
|
||||
assert_eq!(stats.tracks_matched, 1);
|
||||
assert_eq!(stats.tracks_updated, 1);
|
||||
|
||||
// Verify the track was updated
|
||||
let track = queries::tracks::get_by_id(db.conn(), track_id).await.unwrap();
|
||||
assert_eq!(track.musicbrainz_id.as_deref(), Some("rec-123"));
|
||||
assert_eq!(track.title.as_deref(), Some("Time"));
|
||||
assert_eq!(track.artist.as_deref(), Some("Pink Floyd"));
|
||||
assert_eq!(track.album.as_deref(), Some("The Dark Side of the Moon"));
|
||||
assert_eq!(track.year, Some(1973));
|
||||
assert_eq!(track.genre.as_deref(), Some("Progressive Rock"));
|
||||
|
||||
// Verify artist was created with MusicBrainz ID
|
||||
let artist = queries::artists::find_by_name(db.conn(), "Pink Floyd").await.unwrap();
|
||||
assert!(artist.is_some());
|
||||
assert_eq!(artist.unwrap().musicbrainz_id.as_deref(), Some("artist-456"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_tag_track_no_match() {
|
||||
let db = test_db().await;
|
||||
let provider = MockProvider;
|
||||
|
||||
let track_id = insert_untagged_track(&db, "/music/unknown.mp3", Some("Unknown Song"), Some("Nobody")).await;
|
||||
|
||||
let config = TagConfig {
|
||||
dry_run: false,
|
||||
write_tags: false,
|
||||
confidence: 0.8,
|
||||
};
|
||||
|
||||
let stats = run_tagging(db.conn(), &provider, &config, Some(track_id)).await.unwrap();
|
||||
assert_eq!(stats.tracks_processed, 1);
|
||||
assert_eq!(stats.tracks_skipped, 1);
|
||||
|
||||
// Track should be unchanged
|
||||
let track = queries::tracks::get_by_id(db.conn(), track_id).await.unwrap();
|
||||
assert!(track.musicbrainz_id.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_dry_run_does_not_update() {
|
||||
let db = test_db().await;
|
||||
let provider = MockProvider;
|
||||
|
||||
let track_id = insert_untagged_track(&db, "/music/time.mp3", Some("Time"), Some("Pink Floyd")).await;
|
||||
|
||||
let config = TagConfig {
|
||||
dry_run: true,
|
||||
write_tags: false,
|
||||
confidence: 0.8,
|
||||
};
|
||||
|
||||
let stats = run_tagging(db.conn(), &provider, &config, Some(track_id)).await.unwrap();
|
||||
assert_eq!(stats.tracks_matched, 1);
|
||||
assert_eq!(stats.tracks_updated, 0); // dry run
|
||||
|
||||
// Track should be unchanged
|
||||
let track = queries::tracks::get_by_id(db.conn(), track_id).await.unwrap();
|
||||
assert!(track.musicbrainz_id.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_tag_all_untagged() {
|
||||
let db = test_db().await;
|
||||
let provider = MockProvider;
|
||||
|
||||
insert_untagged_track(&db, "/music/time.mp3", Some("Time"), Some("Pink Floyd")).await;
|
||||
insert_untagged_track(&db, "/music/unknown.mp3", Some("Unknown"), Some("Nobody")).await;
|
||||
|
||||
let config = TagConfig {
|
||||
dry_run: false,
|
||||
write_tags: false,
|
||||
confidence: 0.8,
|
||||
};
|
||||
|
||||
let stats = run_tagging(db.conn(), &provider, &config, None).await.unwrap();
|
||||
assert_eq!(stats.tracks_processed, 2);
|
||||
assert_eq!(stats.tracks_matched, 1); // only Pink Floyd matched
|
||||
assert_eq!(stats.tracks_skipped, 1);
|
||||
}
|
||||
Reference in New Issue
Block a user