Compare commits
10 Commits
9e93c5e6d2
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
884b2e8d52 | ||
|
|
e5b3fc3fe3 | ||
|
|
2280e9564d | ||
|
|
3572956cde | ||
|
|
324c409db0 | ||
|
|
5957d69e7d | ||
|
|
4400cbc1cb | ||
|
|
966dc6ca86 | ||
|
|
0572722184 | ||
|
|
5583bc21fe |
@@ -7,6 +7,7 @@ description = "Metadata tagging via online databases for Shanty"
|
||||
repository = "ssh://connor@git.rcjohnstone.com:2222/Shanty/tag.git"
|
||||
|
||||
[dependencies]
|
||||
shanty-data = { path = "../shanty-data" }
|
||||
shanty-db = { path = "../shanty-db" }
|
||||
sea-orm = { version = "1", features = ["sqlx-sqlite", "runtime-tokio-native-tls"] }
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
@@ -26,5 +27,6 @@ dirs = "6"
|
||||
regex = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
shanty-data = { path = "../shanty-data" }
|
||||
tokio = { version = "1", features = ["full", "test-util"] }
|
||||
tempfile = "3"
|
||||
|
||||
@@ -45,8 +45,8 @@ pub fn normalize(s: &str) -> String {
|
||||
/// Escape special characters for MusicBrainz Lucene query syntax.
|
||||
pub fn escape_lucene(s: &str) -> String {
|
||||
let special = [
|
||||
'+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':',
|
||||
'\\', '/',
|
||||
'+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\',
|
||||
'/',
|
||||
];
|
||||
let mut result = String::with_capacity(s.len());
|
||||
for c in s.chars() {
|
||||
@@ -72,7 +72,10 @@ mod tests {
|
||||
fn test_normalize_strips_official_video() {
|
||||
assert_eq!(normalize("Time (Official Video)"), "time");
|
||||
assert_eq!(normalize("Money (Official Music Video)"), "money");
|
||||
assert_eq!(normalize("Comfortably Numb (Official Audio)"), "comfortably numb");
|
||||
assert_eq!(
|
||||
normalize("Comfortably Numb (Official Audio)"),
|
||||
"comfortably numb"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -11,6 +11,9 @@ pub enum TagError {
|
||||
#[error("HTTP error: {0}")]
|
||||
Http(#[from] reqwest::Error),
|
||||
|
||||
#[error("data error: {0}")]
|
||||
Data(#[from] shanty_data::DataError),
|
||||
|
||||
#[error("metadata error: {0}")]
|
||||
Metadata(String),
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ use lofty::probe::Probe;
|
||||
use lofty::tag::{Accessor, TagExt, TagType};
|
||||
|
||||
use crate::error::TagResult;
|
||||
use crate::provider::{RecordingDetails, ReleaseRef};
|
||||
use shanty_data::{RecordingDetails, ReleaseRef};
|
||||
|
||||
/// Infer the best tag type for a given file type.
|
||||
fn tag_type_for_file(ft: FileType) -> TagType {
|
||||
@@ -31,9 +31,7 @@ pub fn write_tags(
|
||||
) -> TagResult<()> {
|
||||
let path = Path::new(file_path);
|
||||
|
||||
let tagged_file = Probe::open(path)?
|
||||
.options(ParseOptions::default())
|
||||
.read()?;
|
||||
let tagged_file = Probe::open(path)?.options(ParseOptions::default()).read()?;
|
||||
|
||||
// Determine the tag type to use
|
||||
let tag_type = tagged_file
|
||||
|
||||
18
src/lib.rs
18
src/lib.rs
@@ -8,11 +8,21 @@ pub mod cleaning;
|
||||
pub mod error;
|
||||
pub mod file_tags;
|
||||
pub mod matcher;
|
||||
pub mod musicbrainz;
|
||||
pub mod provider;
|
||||
pub mod tagger;
|
||||
|
||||
pub use error::{TagError, TagResult};
|
||||
pub use musicbrainz::MusicBrainzClient;
|
||||
pub use provider::MetadataProvider;
|
||||
pub use tagger::{TagConfig, TagStats, run_tagging};
|
||||
|
||||
// Re-export from shanty-data for backward compatibility.
|
||||
pub use shanty_data::MetadataFetcher as MetadataProvider;
|
||||
pub use shanty_data::MusicBrainzFetcher as MusicBrainzClient;
|
||||
|
||||
/// Re-export provider types from shanty-data for backward compatibility.
|
||||
pub mod provider {
|
||||
pub use shanty_data::{
|
||||
ArtistInfo, ArtistSearchResult, ArtistUrl, DiscographyEntry, RecordingDetails,
|
||||
RecordingMatch, ReleaseGroupEntry, ReleaseMatch, ReleaseRef, ReleaseTrack,
|
||||
};
|
||||
|
||||
pub use shanty_data::MetadataFetcher as MetadataProvider;
|
||||
}
|
||||
|
||||
@@ -7,7 +7,10 @@ use shanty_db::Database;
|
||||
use shanty_tag::{MusicBrainzClient, TagConfig, run_tagging};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "shanty-tag", about = "Fill in missing metadata on music files via MusicBrainz")]
|
||||
#[command(
|
||||
name = "shanty-tag",
|
||||
about = "Fill in missing metadata on music files via MusicBrainz"
|
||||
)]
|
||||
struct Cli {
|
||||
/// Database URL. Defaults to sqlite://<XDG_DATA_HOME>/shanty/shanty.db?mode=rwc
|
||||
#[arg(long, env = "SHANTY_DATABASE_URL")]
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use shanty_db::entities::track;
|
||||
|
||||
use crate::cleaning::normalize;
|
||||
use crate::provider::{RecordingMatch, ReleaseRef};
|
||||
use shanty_data::{RecordingMatch, ReleaseRef};
|
||||
|
||||
/// A scored recording match with the best matching release.
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -30,9 +30,7 @@ pub fn build_query(track: &track::Model) -> Option<(String, String)> {
|
||||
|
||||
/// Parse "Artist - Title" from a filename, stripping extension and path.
|
||||
pub fn parse_filename(file_path: &str) -> Option<(String, String)> {
|
||||
let filename = std::path::Path::new(file_path)
|
||||
.file_stem()?
|
||||
.to_str()?;
|
||||
let filename = std::path::Path::new(file_path).file_stem()?.to_str()?;
|
||||
|
||||
// Try common "Artist - Title" pattern
|
||||
if let Some((artist, title)) = filename.split_once(" - ") {
|
||||
@@ -55,11 +53,7 @@ pub fn parse_filename(file_path: &str) -> Option<(String, String)> {
|
||||
/// Score a candidate recording against the track's known metadata.
|
||||
/// Returns a confidence value from 0.0 to 1.0.
|
||||
pub fn score_match(track: &track::Model, candidate: &RecordingMatch) -> f64 {
|
||||
let track_title = track
|
||||
.title
|
||||
.as_deref()
|
||||
.map(normalize)
|
||||
.unwrap_or_default();
|
||||
let track_title = track.title.as_deref().map(normalize).unwrap_or_default();
|
||||
let candidate_title = normalize(&candidate.title);
|
||||
|
||||
let track_artist = track
|
||||
|
||||
@@ -1,319 +0,0 @@
|
||||
use serde::Deserialize;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::time::{Duration, Instant};
|
||||
|
||||
use crate::cleaning::escape_lucene;
|
||||
use crate::error::{TagError, TagResult};
|
||||
use crate::provider::{
|
||||
ArtistSearchResult, DiscographyEntry, MetadataProvider, RecordingDetails, RecordingMatch,
|
||||
ReleaseMatch, ReleaseRef,
|
||||
};
|
||||
|
||||
const BASE_URL: &str = "https://musicbrainz.org/ws/2";
|
||||
const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
|
||||
const RATE_LIMIT: Duration = Duration::from_millis(1100); // slightly over 1s to be safe
|
||||
|
||||
/// MusicBrainz API client with rate limiting.
|
||||
pub struct MusicBrainzClient {
|
||||
client: reqwest::Client,
|
||||
last_request: Mutex<Instant>,
|
||||
}
|
||||
|
||||
impl MusicBrainzClient {
|
||||
pub fn new() -> TagResult<Self> {
|
||||
let client = reqwest::Client::builder()
|
||||
.user_agent(USER_AGENT)
|
||||
.timeout(Duration::from_secs(30))
|
||||
.build()?;
|
||||
Ok(Self {
|
||||
client,
|
||||
last_request: Mutex::new(Instant::now() - RATE_LIMIT),
|
||||
})
|
||||
}
|
||||
|
||||
/// Enforce rate limiting: wait if needed so we don't exceed 1 req/sec.
|
||||
async fn rate_limit(&self) {
|
||||
let mut last = self.last_request.lock().await;
|
||||
let elapsed = last.elapsed();
|
||||
if elapsed < RATE_LIMIT {
|
||||
tokio::time::sleep(RATE_LIMIT - elapsed).await;
|
||||
}
|
||||
*last = Instant::now();
|
||||
}
|
||||
|
||||
async fn get_json<T: serde::de::DeserializeOwned>(&self, url: &str) -> TagResult<T> {
|
||||
self.rate_limit().await;
|
||||
tracing::debug!(url = url, "MusicBrainz request");
|
||||
let resp = self.client.get(url).send().await?;
|
||||
let status = resp.status();
|
||||
if !status.is_success() {
|
||||
let body = resp.text().await.unwrap_or_default();
|
||||
return Err(TagError::Other(format!(
|
||||
"MusicBrainz API error {status}: {body}"
|
||||
)));
|
||||
}
|
||||
Ok(resp.json().await?)
|
||||
}
|
||||
}
|
||||
|
||||
impl MetadataProvider for MusicBrainzClient {
|
||||
async fn search_recording(
|
||||
&self,
|
||||
artist: &str,
|
||||
title: &str,
|
||||
) -> TagResult<Vec<RecordingMatch>> {
|
||||
let query = if artist.is_empty() {
|
||||
format!("recording:{}", escape_lucene(title))
|
||||
} else {
|
||||
format!(
|
||||
"artist:{} AND recording:{}",
|
||||
escape_lucene(artist),
|
||||
escape_lucene(title)
|
||||
)
|
||||
};
|
||||
let url = format!("{BASE_URL}/recording/?query={}&fmt=json&limit=5", urlencoded(&query));
|
||||
let resp: MbRecordingSearchResponse = self.get_json(&url).await?;
|
||||
|
||||
Ok(resp
|
||||
.recordings
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
|
||||
RecordingMatch {
|
||||
mbid: r.id,
|
||||
title: r.title,
|
||||
artist: artist_name,
|
||||
artist_mbid,
|
||||
releases: r
|
||||
.releases
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|rel| ReleaseRef {
|
||||
mbid: rel.id,
|
||||
title: rel.title,
|
||||
date: rel.date,
|
||||
track_number: None,
|
||||
})
|
||||
.collect(),
|
||||
score: r.score.unwrap_or(0),
|
||||
}
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn search_release(
|
||||
&self,
|
||||
artist: &str,
|
||||
album: &str,
|
||||
) -> TagResult<Vec<ReleaseMatch>> {
|
||||
let query = format!(
|
||||
"artist:{} AND release:{}",
|
||||
escape_lucene(artist),
|
||||
escape_lucene(album)
|
||||
);
|
||||
let url = format!("{BASE_URL}/release/?query={}&fmt=json&limit=5", urlencoded(&query));
|
||||
let resp: MbReleaseSearchResponse = self.get_json(&url).await?;
|
||||
|
||||
Ok(resp
|
||||
.releases
|
||||
.into_iter()
|
||||
.map(|r| {
|
||||
let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
|
||||
ReleaseMatch {
|
||||
mbid: r.id,
|
||||
title: r.title,
|
||||
artist: artist_name,
|
||||
artist_mbid,
|
||||
date: r.date,
|
||||
track_count: r.track_count,
|
||||
score: r.score.unwrap_or(0),
|
||||
}
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn get_recording(&self, mbid: &str) -> TagResult<RecordingDetails> {
|
||||
let url = format!(
|
||||
"{BASE_URL}/recording/{mbid}?inc=artists+releases+genres&fmt=json"
|
||||
);
|
||||
let r: MbRecordingDetail = self.get_json(&url).await?;
|
||||
|
||||
let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
|
||||
Ok(RecordingDetails {
|
||||
mbid: r.id,
|
||||
title: r.title,
|
||||
artist: artist_name,
|
||||
artist_mbid,
|
||||
releases: r
|
||||
.releases
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|rel| ReleaseRef {
|
||||
mbid: rel.id,
|
||||
title: rel.title,
|
||||
date: rel.date,
|
||||
track_number: None,
|
||||
})
|
||||
.collect(),
|
||||
duration_ms: r.length,
|
||||
genres: r
|
||||
.genres
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|g| g.name)
|
||||
.collect(),
|
||||
})
|
||||
}
|
||||
async fn search_artist(
|
||||
&self,
|
||||
query: &str,
|
||||
limit: u32,
|
||||
) -> TagResult<Vec<ArtistSearchResult>> {
|
||||
let url = format!(
|
||||
"{BASE_URL}/artist/?query={}&fmt=json&limit={limit}",
|
||||
urlencoded(&escape_lucene(query))
|
||||
);
|
||||
let resp: MbArtistSearchResponse = self.get_json(&url).await?;
|
||||
|
||||
Ok(resp
|
||||
.artists
|
||||
.into_iter()
|
||||
.map(|a| ArtistSearchResult {
|
||||
mbid: a.id,
|
||||
name: a.name,
|
||||
disambiguation: a.disambiguation.filter(|s| !s.is_empty()),
|
||||
country: a.country,
|
||||
artist_type: a.artist_type,
|
||||
score: a.score.unwrap_or(0),
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn get_artist_releases(
|
||||
&self,
|
||||
artist_mbid: &str,
|
||||
limit: u32,
|
||||
) -> TagResult<Vec<DiscographyEntry>> {
|
||||
let url = format!(
|
||||
"{BASE_URL}/release/?artist={artist_mbid}&fmt=json&limit={limit}"
|
||||
);
|
||||
let resp: MbReleaseSearchResponse = self.get_json(&url).await?;
|
||||
|
||||
Ok(resp
|
||||
.releases
|
||||
.into_iter()
|
||||
.map(|r| DiscographyEntry {
|
||||
mbid: r.id,
|
||||
title: r.title,
|
||||
date: r.date,
|
||||
release_type: None, // release-group type not in this response
|
||||
track_count: r.track_count,
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_artist_credit(credits: &Option<Vec<MbArtistCredit>>) -> (String, Option<String>) {
|
||||
match credits {
|
||||
Some(credits) if !credits.is_empty() => {
|
||||
let name: String = credits
|
||||
.iter()
|
||||
.map(|c| {
|
||||
let mut s = c.artist.name.clone();
|
||||
if let Some(ref join) = c.joinphrase {
|
||||
s.push_str(join);
|
||||
}
|
||||
s
|
||||
})
|
||||
.collect();
|
||||
let mbid = Some(credits[0].artist.id.clone());
|
||||
(name, mbid)
|
||||
}
|
||||
_ => ("Unknown Artist".to_string(), None),
|
||||
}
|
||||
}
|
||||
|
||||
fn urlencoded(s: &str) -> String {
|
||||
s.replace(' ', "+")
|
||||
.replace('&', "%26")
|
||||
.replace('=', "%3D")
|
||||
.replace('#', "%23")
|
||||
}
|
||||
|
||||
// --- MusicBrainz API response types ---
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbArtistSearchResponse {
|
||||
artists: Vec<MbArtistResult>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbArtistResult {
|
||||
id: String,
|
||||
name: String,
|
||||
score: Option<u8>,
|
||||
disambiguation: Option<String>,
|
||||
country: Option<String>,
|
||||
#[serde(rename = "type")]
|
||||
artist_type: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbRecordingSearchResponse {
|
||||
recordings: Vec<MbRecordingResult>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbRecordingResult {
|
||||
id: String,
|
||||
title: String,
|
||||
score: Option<u8>,
|
||||
#[serde(rename = "artist-credit")]
|
||||
artist_credit: Option<Vec<MbArtistCredit>>,
|
||||
releases: Option<Vec<MbReleaseResult>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbReleaseSearchResponse {
|
||||
releases: Vec<MbReleaseResult>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbReleaseResult {
|
||||
id: String,
|
||||
title: String,
|
||||
score: Option<u8>,
|
||||
#[serde(rename = "artist-credit")]
|
||||
artist_credit: Option<Vec<MbArtistCredit>>,
|
||||
date: Option<String>,
|
||||
#[serde(rename = "track-count")]
|
||||
track_count: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbRecordingDetail {
|
||||
id: String,
|
||||
title: String,
|
||||
#[serde(rename = "artist-credit")]
|
||||
artist_credit: Option<Vec<MbArtistCredit>>,
|
||||
releases: Option<Vec<MbReleaseResult>>,
|
||||
length: Option<u64>,
|
||||
genres: Option<Vec<MbGenre>>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbArtistCredit {
|
||||
artist: MbArtist,
|
||||
joinphrase: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbArtist {
|
||||
id: String,
|
||||
name: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct MbGenre {
|
||||
name: String,
|
||||
}
|
||||
102
src/provider.rs
102
src/provider.rs
@@ -1,102 +0,0 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::TagResult;
|
||||
|
||||
/// A reference to a release (album) that a recording appears on.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ReleaseRef {
|
||||
pub mbid: String,
|
||||
pub title: String,
|
||||
pub date: Option<String>,
|
||||
pub track_number: Option<i32>,
|
||||
}
|
||||
|
||||
/// A recording match from a search query.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RecordingMatch {
|
||||
pub mbid: String,
|
||||
pub title: String,
|
||||
pub artist: String,
|
||||
pub artist_mbid: Option<String>,
|
||||
pub releases: Vec<ReleaseRef>,
|
||||
/// MusicBrainz API score (0-100).
|
||||
pub score: u8,
|
||||
}
|
||||
|
||||
/// A release (album) match from a search query.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ReleaseMatch {
|
||||
pub mbid: String,
|
||||
pub title: String,
|
||||
pub artist: String,
|
||||
pub artist_mbid: Option<String>,
|
||||
pub date: Option<String>,
|
||||
pub track_count: Option<i32>,
|
||||
pub score: u8,
|
||||
}
|
||||
|
||||
/// Full details for a recording, retrieved by MBID.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RecordingDetails {
|
||||
pub mbid: String,
|
||||
pub title: String,
|
||||
pub artist: String,
|
||||
pub artist_mbid: Option<String>,
|
||||
pub releases: Vec<ReleaseRef>,
|
||||
pub duration_ms: Option<u64>,
|
||||
pub genres: Vec<String>,
|
||||
}
|
||||
|
||||
/// An artist match from a search query.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ArtistSearchResult {
|
||||
pub mbid: String,
|
||||
pub name: String,
|
||||
pub disambiguation: Option<String>,
|
||||
pub country: Option<String>,
|
||||
pub artist_type: Option<String>,
|
||||
pub score: u8,
|
||||
}
|
||||
|
||||
/// A release entry in an artist's discography.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DiscographyEntry {
|
||||
pub mbid: String,
|
||||
pub title: String,
|
||||
pub date: Option<String>,
|
||||
pub release_type: Option<String>,
|
||||
pub track_count: Option<i32>,
|
||||
}
|
||||
|
||||
/// Trait for metadata lookup backends. MusicBrainz is the default implementation;
|
||||
/// others (Last.fm, Discogs, etc.) can be added later.
|
||||
pub trait MetadataProvider: Send + Sync {
|
||||
fn search_recording(
|
||||
&self,
|
||||
artist: &str,
|
||||
title: &str,
|
||||
) -> impl std::future::Future<Output = TagResult<Vec<RecordingMatch>>> + Send;
|
||||
|
||||
fn search_release(
|
||||
&self,
|
||||
artist: &str,
|
||||
album: &str,
|
||||
) -> impl std::future::Future<Output = TagResult<Vec<ReleaseMatch>>> + Send;
|
||||
|
||||
fn get_recording(
|
||||
&self,
|
||||
mbid: &str,
|
||||
) -> impl std::future::Future<Output = TagResult<RecordingDetails>> + Send;
|
||||
|
||||
fn search_artist(
|
||||
&self,
|
||||
query: &str,
|
||||
limit: u32,
|
||||
) -> impl std::future::Future<Output = TagResult<Vec<ArtistSearchResult>>> + Send;
|
||||
|
||||
fn get_artist_releases(
|
||||
&self,
|
||||
artist_mbid: &str,
|
||||
limit: u32,
|
||||
) -> impl std::future::Future<Output = TagResult<Vec<DiscographyEntry>>> + Send;
|
||||
}
|
||||
130
src/tagger.rs
130
src/tagger.rs
@@ -8,7 +8,7 @@ use shanty_db::queries;
|
||||
use crate::error::TagResult;
|
||||
use crate::file_tags;
|
||||
use crate::matcher::{self, ScoredMatch};
|
||||
use crate::provider::MetadataProvider;
|
||||
use shanty_data::MetadataFetcher as MetadataProvider;
|
||||
|
||||
/// Configuration for a tagging operation.
|
||||
pub struct TagConfig {
|
||||
@@ -51,64 +51,83 @@ pub async fn tag_track(
|
||||
track: &track::Model,
|
||||
config: &TagConfig,
|
||||
) -> TagResult<bool> {
|
||||
// Build search query
|
||||
let (artist, title) = match matcher::build_query(track) {
|
||||
Some(q) => q,
|
||||
None => {
|
||||
tracing::debug!(id = track.id, path = %track.file_path, "no query possible, skipping");
|
||||
// If the track already has an MBID, skip searching and go straight to lookup
|
||||
let (details, best_release) = if let Some(ref mbid) = track.musicbrainz_id {
|
||||
tracing::info!(id = track.id, mbid = %mbid, "looking up recording by MBID");
|
||||
|
||||
if config.dry_run {
|
||||
tracing::info!(id = track.id, mbid = %mbid, "DRY RUN: would enrich from MBID");
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
let details = provider.get_recording(mbid).await?;
|
||||
let best_release = details.releases.first().cloned();
|
||||
(details, best_release)
|
||||
} else {
|
||||
// No MBID — search by artist + title
|
||||
let (artist, title) = match matcher::build_query(track) {
|
||||
Some(q) => q,
|
||||
None => {
|
||||
tracing::debug!(id = track.id, path = %track.file_path, "no query possible, skipping");
|
||||
return Ok(false);
|
||||
}
|
||||
};
|
||||
|
||||
tracing::info!(id = track.id, artist = %artist, title = %title, "searching MusicBrainz");
|
||||
|
||||
let candidates = provider.search_recording(&artist, &title).await?;
|
||||
|
||||
if candidates.is_empty() {
|
||||
tracing::debug!(id = track.id, "no results from MusicBrainz");
|
||||
return Ok(false);
|
||||
}
|
||||
};
|
||||
|
||||
tracing::info!(
|
||||
id = track.id,
|
||||
artist = %artist,
|
||||
title = %title,
|
||||
"searching MusicBrainz"
|
||||
);
|
||||
let best = match matcher::select_best_match(track, candidates, config.confidence) {
|
||||
Some(m) => m,
|
||||
None => {
|
||||
tracing::debug!(
|
||||
id = track.id,
|
||||
"no match above confidence threshold {}",
|
||||
config.confidence
|
||||
);
|
||||
return Ok(false);
|
||||
}
|
||||
};
|
||||
|
||||
// Search for recordings
|
||||
let candidates = provider.search_recording(&artist, &title).await?;
|
||||
log_match(track, &best);
|
||||
|
||||
if candidates.is_empty() {
|
||||
tracing::debug!(id = track.id, "no results from MusicBrainz");
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// Score and select best match
|
||||
let best = match matcher::select_best_match(track, candidates, config.confidence) {
|
||||
Some(m) => m,
|
||||
None => {
|
||||
tracing::debug!(
|
||||
id = track.id,
|
||||
"no match above confidence threshold {}",
|
||||
config.confidence
|
||||
);
|
||||
return Ok(false);
|
||||
if config.dry_run {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
let details = provider.get_recording(&best.recording.mbid).await?;
|
||||
let best_release = best.best_release;
|
||||
(details, best_release)
|
||||
};
|
||||
|
||||
log_match(track, &best);
|
||||
|
||||
if config.dry_run {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
// Get full details for the best match
|
||||
let details = provider.get_recording(&best.recording.mbid).await?;
|
||||
|
||||
// Upsert artist with MusicBrainz ID
|
||||
// Upsert primary artist with MusicBrainz ID
|
||||
let artist_id = match &details.artist_mbid {
|
||||
Some(mbid) => {
|
||||
Some(queries::artists::upsert(conn, &details.artist, Some(mbid)).await?.id)
|
||||
}
|
||||
None => {
|
||||
Some(queries::artists::upsert(conn, &details.artist, None).await?.id)
|
||||
}
|
||||
Some(mbid) => Some(
|
||||
queries::artists::upsert(conn, &details.artist, Some(mbid))
|
||||
.await?
|
||||
.id,
|
||||
),
|
||||
None => Some(
|
||||
queries::artists::upsert(conn, &details.artist, None)
|
||||
.await?
|
||||
.id,
|
||||
),
|
||||
};
|
||||
|
||||
// Upsert secondary collaborator artists so they exist as separate library entries
|
||||
for (name, mbid) in &details.secondary_artists {
|
||||
if let Err(e) = queries::artists::upsert(conn, name, Some(mbid)).await {
|
||||
tracing::warn!(artist = %name, error = %e, "failed to upsert secondary artist");
|
||||
}
|
||||
}
|
||||
|
||||
// Upsert album from best release
|
||||
let (album_id, album_name) = if let Some(ref release) = best.best_release {
|
||||
let (album_id, album_name) = if let Some(ref release) = best_release {
|
||||
let album = queries::albums::upsert(
|
||||
conn,
|
||||
&release.title,
|
||||
@@ -123,8 +142,7 @@ pub async fn tag_track(
|
||||
};
|
||||
|
||||
// Parse year from release date
|
||||
let year = best
|
||||
.best_release
|
||||
let year = best_release
|
||||
.as_ref()
|
||||
.and_then(|r| r.date.as_deref())
|
||||
.and_then(|d| d.split('-').next())
|
||||
@@ -160,16 +178,16 @@ pub async fn tag_track(
|
||||
queries::tracks::update_metadata(conn, track.id, active).await?;
|
||||
|
||||
// Optionally write tags to file
|
||||
if config.write_tags {
|
||||
if let Err(e) = file_tags::write_tags(
|
||||
if config.write_tags
|
||||
&& let Err(e) = file_tags::write_tags(
|
||||
&track.file_path,
|
||||
&details,
|
||||
best.best_release.as_ref(),
|
||||
best_release.as_ref(),
|
||||
year,
|
||||
genre.as_deref(),
|
||||
) {
|
||||
tracing::warn!(id = track.id, path = %track.file_path, "failed to write file tags: {e}");
|
||||
}
|
||||
)
|
||||
{
|
||||
tracing::warn!(id = track.id, path = %track.file_path, "failed to write file tags: {e}");
|
||||
}
|
||||
|
||||
Ok(true)
|
||||
@@ -196,7 +214,7 @@ pub async fn run_tagging(
|
||||
let tracks: Vec<track::Model> = if let Some(id) = track_id {
|
||||
vec![queries::tracks::get_by_id(conn, id).await?]
|
||||
} else {
|
||||
queries::tracks::get_untagged(conn).await?
|
||||
queries::tracks::get_needing_metadata(conn).await?
|
||||
};
|
||||
|
||||
tracing::info!(count = tracks.len(), "tracks to process");
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
use chrono::Utc;
|
||||
use sea_orm::ActiveValue::Set;
|
||||
|
||||
use shanty_data::DataResult;
|
||||
use shanty_db::{Database, queries};
|
||||
use shanty_tag::provider::{
|
||||
ArtistSearchResult, DiscographyEntry, MetadataProvider, RecordingDetails, RecordingMatch,
|
||||
ReleaseMatch, ReleaseRef,
|
||||
};
|
||||
use shanty_tag::error::TagResult;
|
||||
use shanty_tag::{TagConfig, run_tagging};
|
||||
|
||||
/// A mock metadata provider for testing without hitting MusicBrainz.
|
||||
struct MockProvider;
|
||||
|
||||
impl MetadataProvider for MockProvider {
|
||||
async fn search_recording(&self, artist: &str, title: &str) -> TagResult<Vec<RecordingMatch>> {
|
||||
async fn search_recording(&self, artist: &str, title: &str) -> DataResult<Vec<RecordingMatch>> {
|
||||
// Return a match for "Pink Floyd - Time"
|
||||
if artist.contains("Pink Floyd") && title.contains("Time") {
|
||||
Ok(vec![RecordingMatch {
|
||||
@@ -34,11 +34,11 @@ impl MetadataProvider for MockProvider {
|
||||
}
|
||||
}
|
||||
|
||||
async fn search_release(&self, _artist: &str, _album: &str) -> TagResult<Vec<ReleaseMatch>> {
|
||||
async fn search_release(&self, _artist: &str, _album: &str) -> DataResult<Vec<ReleaseMatch>> {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn get_recording(&self, mbid: &str) -> TagResult<RecordingDetails> {
|
||||
async fn get_recording(&self, mbid: &str) -> DataResult<RecordingDetails> {
|
||||
if mbid == "rec-123" {
|
||||
Ok(RecordingDetails {
|
||||
mbid: "rec-123".into(),
|
||||
@@ -53,19 +53,48 @@ impl MetadataProvider for MockProvider {
|
||||
}],
|
||||
duration_ms: Some(413_000),
|
||||
genres: vec!["Progressive Rock".into()],
|
||||
secondary_artists: vec![],
|
||||
})
|
||||
} else {
|
||||
Err(shanty_tag::TagError::Other("not found".into()))
|
||||
Err(shanty_data::DataError::Other("not found".into()))
|
||||
}
|
||||
}
|
||||
|
||||
async fn search_artist(&self, _query: &str, _limit: u32) -> TagResult<Vec<ArtistSearchResult>> {
|
||||
async fn search_artist(
|
||||
&self,
|
||||
_query: &str,
|
||||
_limit: u32,
|
||||
) -> DataResult<Vec<ArtistSearchResult>> {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn get_artist_releases(&self, _artist_mbid: &str, _limit: u32) -> TagResult<Vec<DiscographyEntry>> {
|
||||
async fn get_artist_releases(
|
||||
&self,
|
||||
_artist_mbid: &str,
|
||||
_limit: u32,
|
||||
) -> DataResult<Vec<DiscographyEntry>> {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn get_release_tracks(
|
||||
&self,
|
||||
_release_mbid: &str,
|
||||
) -> DataResult<Vec<shanty_tag::provider::ReleaseTrack>> {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn get_artist_release_groups(
|
||||
&self,
|
||||
_artist_mbid: &str,
|
||||
) -> DataResult<Vec<shanty_tag::provider::ReleaseGroupEntry>> {
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn resolve_release_from_group(&self, release_group_mbid: &str) -> DataResult<String> {
|
||||
Err(shanty_data::DataError::Other(format!(
|
||||
"no releases for release-group {release_group_mbid}"
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
async fn test_db() -> Database {
|
||||
@@ -74,7 +103,12 @@ async fn test_db() -> Database {
|
||||
.expect("failed to create test database")
|
||||
}
|
||||
|
||||
async fn insert_untagged_track(db: &Database, file_path: &str, title: Option<&str>, artist: Option<&str>) -> i32 {
|
||||
async fn insert_untagged_track(
|
||||
db: &Database,
|
||||
file_path: &str,
|
||||
title: Option<&str>,
|
||||
artist: Option<&str>,
|
||||
) -> i32 {
|
||||
let now = Utc::now().naive_utc();
|
||||
let active = shanty_db::entities::track::ActiveModel {
|
||||
file_path: Set(file_path.to_string()),
|
||||
@@ -94,7 +128,8 @@ async fn test_tag_track_with_match() {
|
||||
let db = test_db().await;
|
||||
let provider = MockProvider;
|
||||
|
||||
let track_id = insert_untagged_track(&db, "/music/time.mp3", Some("Time"), Some("Pink Floyd")).await;
|
||||
let track_id =
|
||||
insert_untagged_track(&db, "/music/time.mp3", Some("Time"), Some("Pink Floyd")).await;
|
||||
|
||||
let config = TagConfig {
|
||||
dry_run: false,
|
||||
@@ -102,13 +137,17 @@ async fn test_tag_track_with_match() {
|
||||
confidence: 0.8,
|
||||
};
|
||||
|
||||
let stats = run_tagging(db.conn(), &provider, &config, Some(track_id)).await.unwrap();
|
||||
let stats = run_tagging(db.conn(), &provider, &config, Some(track_id))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(stats.tracks_processed, 1);
|
||||
assert_eq!(stats.tracks_matched, 1);
|
||||
assert_eq!(stats.tracks_updated, 1);
|
||||
|
||||
// Verify the track was updated
|
||||
let track = queries::tracks::get_by_id(db.conn(), track_id).await.unwrap();
|
||||
let track = queries::tracks::get_by_id(db.conn(), track_id)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(track.musicbrainz_id.as_deref(), Some("rec-123"));
|
||||
assert_eq!(track.title.as_deref(), Some("Time"));
|
||||
assert_eq!(track.artist.as_deref(), Some("Pink Floyd"));
|
||||
@@ -117,9 +156,14 @@ async fn test_tag_track_with_match() {
|
||||
assert_eq!(track.genre.as_deref(), Some("Progressive Rock"));
|
||||
|
||||
// Verify artist was created with MusicBrainz ID
|
||||
let artist = queries::artists::find_by_name(db.conn(), "Pink Floyd").await.unwrap();
|
||||
let artist = queries::artists::find_by_name(db.conn(), "Pink Floyd")
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(artist.is_some());
|
||||
assert_eq!(artist.unwrap().musicbrainz_id.as_deref(), Some("artist-456"));
|
||||
assert_eq!(
|
||||
artist.unwrap().musicbrainz_id.as_deref(),
|
||||
Some("artist-456")
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -127,7 +171,13 @@ async fn test_tag_track_no_match() {
|
||||
let db = test_db().await;
|
||||
let provider = MockProvider;
|
||||
|
||||
let track_id = insert_untagged_track(&db, "/music/unknown.mp3", Some("Unknown Song"), Some("Nobody")).await;
|
||||
let track_id = insert_untagged_track(
|
||||
&db,
|
||||
"/music/unknown.mp3",
|
||||
Some("Unknown Song"),
|
||||
Some("Nobody"),
|
||||
)
|
||||
.await;
|
||||
|
||||
let config = TagConfig {
|
||||
dry_run: false,
|
||||
@@ -135,12 +185,16 @@ async fn test_tag_track_no_match() {
|
||||
confidence: 0.8,
|
||||
};
|
||||
|
||||
let stats = run_tagging(db.conn(), &provider, &config, Some(track_id)).await.unwrap();
|
||||
let stats = run_tagging(db.conn(), &provider, &config, Some(track_id))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(stats.tracks_processed, 1);
|
||||
assert_eq!(stats.tracks_skipped, 1);
|
||||
|
||||
// Track should be unchanged
|
||||
let track = queries::tracks::get_by_id(db.conn(), track_id).await.unwrap();
|
||||
let track = queries::tracks::get_by_id(db.conn(), track_id)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(track.musicbrainz_id.is_none());
|
||||
}
|
||||
|
||||
@@ -149,7 +203,8 @@ async fn test_dry_run_does_not_update() {
|
||||
let db = test_db().await;
|
||||
let provider = MockProvider;
|
||||
|
||||
let track_id = insert_untagged_track(&db, "/music/time.mp3", Some("Time"), Some("Pink Floyd")).await;
|
||||
let track_id =
|
||||
insert_untagged_track(&db, "/music/time.mp3", Some("Time"), Some("Pink Floyd")).await;
|
||||
|
||||
let config = TagConfig {
|
||||
dry_run: true,
|
||||
@@ -157,12 +212,16 @@ async fn test_dry_run_does_not_update() {
|
||||
confidence: 0.8,
|
||||
};
|
||||
|
||||
let stats = run_tagging(db.conn(), &provider, &config, Some(track_id)).await.unwrap();
|
||||
let stats = run_tagging(db.conn(), &provider, &config, Some(track_id))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(stats.tracks_matched, 1);
|
||||
assert_eq!(stats.tracks_updated, 0); // dry run
|
||||
|
||||
// Track should be unchanged
|
||||
let track = queries::tracks::get_by_id(db.conn(), track_id).await.unwrap();
|
||||
let track = queries::tracks::get_by_id(db.conn(), track_id)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(track.musicbrainz_id.is_none());
|
||||
}
|
||||
|
||||
@@ -180,7 +239,9 @@ async fn test_tag_all_untagged() {
|
||||
confidence: 0.8,
|
||||
};
|
||||
|
||||
let stats = run_tagging(db.conn(), &provider, &config, None).await.unwrap();
|
||||
let stats = run_tagging(db.conn(), &provider, &config, None)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(stats.tracks_processed, 2);
|
||||
assert_eq!(stats.tracks_matched, 1); // only Pink Floyd matched
|
||||
assert_eq!(stats.tracks_skipped, 1);
|
||||
|
||||
Reference in New Issue
Block a user