Compare commits

..

6 Commits

Author SHA1 Message Date
Connor Johnstone 0f5d3f597a still getting extra artists. need to test on my deployment 2026-03-31 12:32:32 -04:00
Connor Johnstone 05756d95cc I **think** I've at least 99% fixed the top songs mismatch 2026-03-25 21:50:09 -04:00
Connor Johnstone 2444c93d48 fix for bad mbid 2026-03-25 16:25:35 -04:00
Connor Johnstone 042a137121 skip tagging if already good (ie lidarr) 2026-03-25 11:22:18 -04:00
Connor Johnstone 8ddd3bd64b speedup on tagging 2026-03-24 21:32:25 -04:00
Connor Johnstone a03c6bc8f5 format,test,blah 2026-03-24 20:47:14 -04:00
3 changed files with 169 additions and 92 deletions
+37 -1
View File
@@ -22,6 +22,7 @@ fn tag_type_for_file(ft: FileType) -> TagType {
}
/// Write updated metadata back to the music file's embedded tags.
/// Skips the write if all tags already match (avoids expensive FLAC rewrites).
pub fn write_tags(
file_path: &str,
details: &RecordingDetails,
@@ -31,7 +32,9 @@ pub fn write_tags(
) -> TagResult<()> {
let path = Path::new(file_path);
let tagged_file = Probe::open(path)?.options(ParseOptions::default()).read()?;
let tagged_file = Probe::open(path)?
.options(ParseOptions::new().read_properties(false))
.read()?;
// Determine the tag type to use
let tag_type = tagged_file
@@ -44,6 +47,39 @@ pub fn write_tags(
.cloned()
.unwrap_or_else(|| lofty::tag::Tag::new(tag_type));
// Check if all tags already match — skip the expensive write if so
let existing_title = tag.title().map(|s| s.to_string());
let existing_artist = tag.artist().map(|s| s.to_string());
let existing_album = tag.album().map(|s| s.to_string());
let existing_year = tag.year();
let existing_genre = tag.genre().map(|s| s.to_string());
let want_album = release.map(|r| r.title.clone());
let want_year = year.map(|y| y as u32);
let want_genre = genre.map(|g| g.to_string());
let title_ok = existing_title.as_deref() == Some(&details.title);
let artist_ok = existing_artist.as_deref() == Some(&details.artist);
let album_ok = match (&existing_album, &want_album) {
(Some(e), Some(w)) => e == w,
(None, None) => true,
_ => false,
};
let year_ok = existing_year == want_year;
let genre_ok = match (&existing_genre, &want_genre) {
(Some(e), Some(w)) => e == w,
(None, None) => true,
_ => false,
};
if title_ok && artist_ok && album_ok && year_ok && genre_ok {
tracing::debug!(
path = file_path,
"file tags already correct, skipping write"
);
return Ok(());
}
// Set metadata
tag.set_title(details.title.clone());
tag.set_artist(details.artist.clone());
+4
View File
@@ -218,6 +218,7 @@ mod tests {
file_mtime: None,
added_at: chrono::Utc::now().naive_utc(),
updated_at: chrono::Utc::now().naive_utc(),
tagged: false,
};
let result = build_query(&track);
assert_eq!(result, Some(("Pink Floyd".into(), "Time".into())));
@@ -247,6 +248,7 @@ mod tests {
file_mtime: None,
added_at: chrono::Utc::now().naive_utc(),
updated_at: chrono::Utc::now().naive_utc(),
tagged: false,
};
let result = build_query(&track);
assert_eq!(result, Some(("Radiohead".into(), "Creep".into())));
@@ -276,6 +278,7 @@ mod tests {
file_mtime: None,
added_at: chrono::Utc::now().naive_utc(),
updated_at: chrono::Utc::now().naive_utc(),
tagged: false,
};
let candidate = RecordingMatch {
mbid: "123".into(),
@@ -313,6 +316,7 @@ mod tests {
file_mtime: None,
added_at: chrono::Utc::now().naive_utc(),
updated_at: chrono::Utc::now().naive_utc(),
tagged: false,
};
// Slight misspelling
let candidate = RecordingMatch {
+128 -91
View File
@@ -1,27 +1,18 @@
use std::fmt;
use sea_orm::{ActiveValue::Set, DatabaseConnection, NotSet};
use sea_orm::{ActiveModelTrait, ActiveValue::NotSet, ActiveValue::Set, DatabaseConnection};
use shanty_data::{MetadataFetcher as MetadataProvider, RecordingDetails, ReleaseRef};
use shanty_db::entities::track;
use shanty_db::queries;
use crate::error::TagResult;
use crate::file_tags;
use crate::matcher::{self, ScoredMatch};
use shanty_data::MetadataFetcher as MetadataProvider;
/// Configuration for a tagging operation.
pub struct TagConfig {
/// If true, show what would change without writing to DB or files.
pub dry_run: bool,
/// If true, write updated tags back to the music files.
pub write_tags: bool,
/// Minimum match confidence (0.0 - 1.0).
pub confidence: f64,
}
/// Statistics from a completed tagging run.
#[derive(Debug, Default, Clone)]
pub struct TagStats {
pub tracks_processed: u64,
pub tracks_matched: u64,
@@ -30,8 +21,8 @@ pub struct TagStats {
pub tracks_errored: u64,
}
impl fmt::Display for TagStats {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl std::fmt::Display for TagStats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"processed: {}, matched: {}, updated: {}, skipped: {}, errors: {}",
@@ -51,79 +42,74 @@ pub async fn tag_track(
track: &track::Model,
config: &TagConfig,
) -> TagResult<bool> {
// If the track already has an MBID, skip searching and go straight to lookup
let (details, best_release) = if let Some(ref mbid) = track.musicbrainz_id {
// Resolve recording details — try MBID lookup first, fall back to search
let resolved = if let Some(ref mbid) = track.musicbrainz_id {
tracing::info!(id = track.id, mbid = %mbid, "looking up recording by MBID");
if config.dry_run {
tracing::info!(id = track.id, mbid = %mbid, "DRY RUN: would enrich from MBID");
return Ok(true);
}
let details = provider.get_recording(mbid).await?;
let best_release = crate::matcher::pick_best_release(track, &details.releases);
(details, best_release)
} else {
// No MBID — search by artist + title
let (artist, title) = match matcher::build_query(track) {
Some(q) => q,
None => {
tracing::debug!(id = track.id, path = %track.file_path, "no query possible, skipping");
return Ok(false);
match provider.get_recording(mbid).await {
Ok(details) => {
let best_release = matcher::pick_best_release(track, &details.releases);
Some((details, best_release))
}
};
tracing::info!(id = track.id, artist = %artist, title = %title, "searching MusicBrainz");
let candidates = provider.search_recording(&artist, &title).await?;
if candidates.is_empty() {
tracing::debug!(id = track.id, "no results from MusicBrainz");
return Ok(false);
}
let best = match matcher::select_best_match(track, candidates, config.confidence) {
Some(m) => m,
None => {
tracing::debug!(
id = track.id,
"no match above confidence threshold {}",
config.confidence
Err(e) => {
tracing::warn!(
id = track.id, mbid = %mbid, error = %e,
"MBID lookup failed, falling back to search"
);
return Ok(false);
None
}
};
log_match(track, &best);
if config.dry_run {
return Ok(true);
}
let details = provider.get_recording(&best.recording.mbid).await?;
let best_release = best.best_release;
(details, best_release)
} else {
None
};
// Use existing artist_id if already set (e.g., from download pipeline).
// Only upsert from MB when the track has no artist association yet.
let artist_id = if track.artist_id.is_some() {
track.artist_id
} else {
match &details.artist_mbid {
Some(mbid) => Some(
queries::artists::upsert(conn, &details.artist, Some(mbid))
.await?
.id,
),
None => Some(
queries::artists::upsert(conn, &details.artist, None)
.await?
.id,
),
let (details, best_release) = match resolved {
Some(r) => r,
None => {
// Search by artist + title
match search_and_match(track, provider, config).await? {
Some(r) => r,
None => return Ok(false),
}
}
};
if config.dry_run {
return Ok(true);
}
// If the resolved MBID differs from the track's original MBID, update the wanted_item
// so the cleanup step doesn't delete files whose MBID changed during tagging
if let Some(ref old_mbid) = track.musicbrainz_id
&& old_mbid != &details.mbid
&& let Ok(Some(wanted)) = queries::wanted::find_by_mbid(conn, old_mbid).await
{
let mut active: shanty_db::entities::wanted_item::ActiveModel = wanted.into();
active.musicbrainz_id = Set(Some(details.mbid.clone()));
let _ = active.update(conn).await;
tracing::info!(
old_mbid = %old_mbid,
new_mbid = %details.mbid,
"updated wanted_item MBID after tagger fallback"
);
}
// Always resolve artist_id from MB data — this is the authoritative source for the
// primary artist. The indexer or download worker may have set artist_id to a collaborator
// artist (e.g., "Bass Drum of Death, Not Documented"), so we always correct it here.
let artist_id = match &details.artist_mbid {
Some(mbid) => Some(
queries::artists::upsert(conn, &details.artist, Some(mbid))
.await?
.id,
),
None => Some(
queries::artists::upsert(conn, &details.artist, None)
.await?
.id,
),
};
// Upsert album from best release
let (album_id, album_name) = if let Some(ref release) = best_release {
let album = queries::albums::upsert(
@@ -148,20 +134,27 @@ pub async fn tag_track(
let genre = details.genres.first().cloned();
// Update track metadata
// Update track metadata — preserve artist name when artist_id was already set
let artist_name_for_track = if track.artist_id.is_some() {
track
.artist
.clone()
.or_else(|| Some(details.artist.clone()))
} else {
Some(details.artist.clone())
};
let active = track::ActiveModel {
id: Set(track.id),
file_path: Set(track.file_path.clone()),
title: Set(Some(details.title.clone())),
artist: Set(Some(details.artist.clone())),
artist: Set(artist_name_for_track.clone()),
album: Set(album_name),
album_artist: Set(Some(details.artist.clone())),
album_artist: Set(artist_name_for_track),
musicbrainz_id: Set(Some(details.mbid.clone())),
artist_id: Set(artist_id),
album_id: Set(album_id),
year: Set(year),
genre: Set(genre.clone()),
// Preserve existing values for fields we don't update
track_number: NotSet,
disc_number: NotSet,
duration: NotSet,
@@ -192,36 +185,82 @@ pub async fn tag_track(
Ok(true)
}
/// Search MusicBrainz by artist+title and return the best match.
/// Returns None if no query is possible or no match exceeds the confidence threshold.
async fn search_and_match(
track: &track::Model,
provider: &impl MetadataProvider,
config: &TagConfig,
) -> TagResult<Option<(RecordingDetails, Option<ReleaseRef>)>> {
let (artist, title) = match matcher::build_query(track) {
Some(q) => q,
None => {
tracing::debug!(id = track.id, path = %track.file_path, "no query possible, skipping");
return Ok(None);
}
};
tracing::info!(id = track.id, artist = %artist, title = %title, "searching MusicBrainz");
let candidates = provider.search_recording(&artist, &title).await?;
if candidates.is_empty() {
tracing::debug!(id = track.id, "no results from MusicBrainz");
return Ok(None);
}
let best = match matcher::select_best_match(track, candidates, config.confidence) {
Some(m) => m,
None => {
tracing::debug!(
id = track.id,
"no match above confidence threshold {}",
config.confidence
);
return Ok(None);
}
};
log_match(track, &best);
let details = provider.get_recording(&best.recording.mbid).await?;
let best_release = best.best_release;
Ok(Some((details, best_release)))
}
fn log_match(track: &track::Model, best: &ScoredMatch) {
tracing::info!(
id = track.id,
confidence = format!("{:.2}", best.confidence),
matched_title = %best.recording.title,
matched_artist = %best.recording.artist,
release = best.best_release.as_ref().map(|r| r.title.as_str()).unwrap_or("(none)"),
"match found"
"matched"
);
}
/// Run tagging on all untagged tracks or a specific track.
/// Run tagging on a single track (by ID) or all tracks needing metadata.
pub async fn run_tagging(
conn: &DatabaseConnection,
provider: &impl MetadataProvider,
config: &TagConfig,
track_id: Option<i32>,
) -> TagResult<TagStats> {
let tracks: Vec<track::Model> = if let Some(id) = track_id {
let mut stats = TagStats {
tracks_processed: 0,
tracks_matched: 0,
tracks_updated: 0,
tracks_skipped: 0,
tracks_errored: 0,
};
let tracks = if let Some(id) = track_id {
vec![queries::tracks::get_by_id(conn, id).await?]
} else {
queries::tracks::get_needing_metadata(conn).await?
};
tracing::info!(count = tracks.len(), "tracks to process");
let mut stats = TagStats::default();
for track in &tracks {
stats.tracks_processed += 1;
match tag_track(conn, provider, track, config).await {
Ok(true) => {
stats.tracks_matched += 1;
@@ -229,11 +268,9 @@ pub async fn run_tagging(
stats.tracks_updated += 1;
}
}
Ok(false) => {
stats.tracks_skipped += 1;
}
Ok(false) => stats.tracks_skipped += 1,
Err(e) => {
tracing::error!(id = track.id, path = %track.file_path, "tagging error: {e}");
tracing::error!(id = track.id, error = %e, "tagging failed");
stats.tracks_errored += 1;
}
}