commit 884acfcd18a2aca37f929c2f04a13500782c4cb3 Author: Connor Johnstone Date: Tue Mar 17 14:32:52 2026 -0400 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..360fdc9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +target/ +.env +*.db +*.db-journal diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..92497fa --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "shanty-index" +version = "0.1.0" +edition = "2024" +license = "MIT" +description = "Music file indexing and metadata extraction for Shanty" +repository = "ssh://connor@git.rcjohnstone.com:2222/Shanty/index.git" + +[dependencies] +shanty-db = { path = "../shanty-db" } +sea-orm = { version = "1", features = ["sqlx-sqlite", "runtime-tokio-native-tls"] } +clap = { version = "4", features = ["derive"] } +serde = { version = "1", features = ["derive"] } +thiserror = "2" +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tokio = { version = "1", features = ["full"] } +lofty = "0.22" +walkdir = "2" +chrono = { version = "0.4", features = ["serde"] } +dirs = "6" +anyhow = "1" + +[dev-dependencies] +tokio = { version = "1", features = ["full", "test-util"] } +tempfile = "3" diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..300170a --- /dev/null +++ b/readme.md @@ -0,0 +1,36 @@ +# shanty-index + +Music file indexing and metadata extraction for [Shanty](ssh://connor@git.rcjohnstone.com:2222/Shanty/shanty.git). + +Scans a directory tree, extracts embedded metadata (ID3, Vorbis comments, MP4 tags, etc.) +using `lofty`, and upserts everything into the Shanty database. Supports incremental +re-indexing via file modification time tracking. + +## Usage + +```sh +# Scan a directory +shanty-index /path/to/music + +# Dry run (no DB writes) +shanty-index /path/to/music --dry-run -vv + +# Custom database location +shanty-index /path/to/music --database sqlite:///path/to/shanty.db?mode=rwc +``` + +## As a library + +```rust +use shanty_index::{ScanConfig, run_scan}; +use shanty_db::Database; + +let db = Database::new("sqlite::memory:").await?; +let config = ScanConfig { + root: "/path/to/music".into(), + dry_run: false, + concurrency: 4, +}; +let stats = run_scan(db.conn(), &config).await?; +println!("{stats}"); +``` diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..1f68e11 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,30 @@ +use shanty_db::DbError; + +#[derive(Debug, thiserror::Error)] +pub enum IndexError { + #[error("database error: {0}")] + Db(#[from] DbError), + + #[error("I/O error: {0}")] + Io(#[from] std::io::Error), + + #[error("metadata error: {0}")] + Metadata(String), + + #[error("walkdir error: {0}")] + WalkDir(#[from] walkdir::Error), + + #[error("task join error: {0}")] + Join(#[from] tokio::task::JoinError), + + #[error("{0}")] + Other(String), +} + +impl From for IndexError { + fn from(e: lofty::error::LoftyError) -> Self { + IndexError::Metadata(e.to_string()) + } +} + +pub type IndexResult = Result; diff --git a/src/indexer.rs b/src/indexer.rs new file mode 100644 index 0000000..a148851 --- /dev/null +++ b/src/indexer.rs @@ -0,0 +1,186 @@ +use std::sync::{Arc, Mutex}; + +use sea_orm::{ActiveValue::Set, DatabaseConnection, NotSet}; +use tokio::sync::Semaphore; + +use shanty_db::queries; + +use crate::error::IndexResult; +use crate::metadata::{self, MusicMetadata}; +use crate::scanner::{self, ScannedFile}; +use crate::{ScanConfig, ScanStats}; + +/// Process a single file: check mtime, extract metadata, upsert to DB. +/// Returns `Ok(true)` if indexed, `Ok(false)` if skipped. +async fn process_file( + conn: &DatabaseConnection, + scanned: &ScannedFile, + dry_run: bool, +) -> IndexResult { + let file_path_str = scanned.path.to_string_lossy().to_string(); + + // Check if file already exists with same mtime + if let Some(existing) = queries::tracks::get_by_path(conn, &file_path_str).await? { + if let Some(existing_mtime) = existing.file_mtime { + // Compare at second granularity + if existing_mtime.and_utc().timestamp() == scanned.mtime.and_utc().timestamp() { + tracing::debug!(path = %file_path_str, "skipping (mtime unchanged)"); + return Ok(false); + } + } + } + + // Extract metadata (CPU-bound, run in blocking thread) + let path = scanned.path.clone(); + let meta: MusicMetadata = tokio::task::spawn_blocking(move || { + metadata::extract_metadata(&path) + }) + .await??; + + tracing::info!( + path = %file_path_str, + title = ?meta.title, + artist = ?meta.artist, + album = ?meta.album, + "indexed" + ); + + if dry_run { + return Ok(true); + } + + // Upsert artist (use album_artist if available, fall back to artist) + let artist_name = meta + .album_artist + .as_deref() + .or(meta.artist.as_deref()); + let artist_id = match artist_name { + Some(name) if !name.is_empty() => { + Some(queries::artists::upsert(conn, name, None).await?.id) + } + _ => None, + }; + + // Upsert album + let album_id = match meta.album.as_deref() { + Some(album_name) if !album_name.is_empty() => { + let album_artist = meta + .album_artist + .as_deref() + .or(meta.artist.as_deref()) + .unwrap_or("Unknown Artist"); + Some( + queries::albums::upsert(conn, album_name, album_artist, None, artist_id) + .await? + .id, + ) + } + _ => None, + }; + + // Upsert track + let active = shanty_db::entities::track::ActiveModel { + id: NotSet, + file_path: Set(file_path_str), + title: Set(meta.title), + artist: Set(meta.artist), + album: Set(meta.album), + album_artist: Set(meta.album_artist), + track_number: Set(meta.track_number), + disc_number: Set(meta.disc_number), + duration: Set(meta.duration), + genre: Set(meta.genre), + year: Set(meta.year), + codec: Set(meta.codec), + bitrate: Set(meta.bitrate), + file_size: Set(scanned.file_size), + fingerprint: NotSet, + musicbrainz_id: NotSet, + artist_id: Set(artist_id), + album_id: Set(album_id), + file_mtime: Set(Some(scanned.mtime)), + added_at: NotSet, + updated_at: NotSet, + }; + queries::tracks::upsert(conn, active).await?; + + Ok(true) +} + +/// Run the full indexing pipeline: scan directory, extract metadata, upsert to DB. +pub async fn index_directory( + conn: &DatabaseConnection, + config: &ScanConfig, +) -> IndexResult { + tracing::info!(root = %config.root.display(), "starting scan"); + + // Phase 1: collect all music files + let scan_results = scanner::scan_directory(&config.root); + let mut files = Vec::new(); + let mut stats = ScanStats::default(); + + for result in scan_results { + match result { + Ok(f) => files.push(f), + Err(e) => { + tracing::warn!("scan error: {e}"); + stats.files_errored += 1; + } + } + } + stats.files_found = files.len() as u64; + tracing::info!(count = stats.files_found, "found music files"); + + // Phase 2: process files with bounded concurrency + let semaphore = Arc::new(Semaphore::new(config.concurrency)); + let stats = Arc::new(Mutex::new(stats)); + let mut handles = Vec::new(); + + for file in files { + let permit = semaphore.clone().acquire_owned().await.unwrap(); + let conn = conn.clone(); + let stats = stats.clone(); + let dry_run = config.dry_run; + + handles.push(tokio::spawn(async move { + let _permit = permit; + match process_file(&conn, &file, dry_run).await { + Ok(true) => { + let mut s = stats.lock().unwrap(); + s.files_scanned += 1; + s.files_indexed += 1; + } + Ok(false) => { + let mut s = stats.lock().unwrap(); + s.files_scanned += 1; + s.files_skipped += 1; + } + Err(e) => { + tracing::error!(path = %file.path.display(), "indexing error: {e}"); + let mut s = stats.lock().unwrap(); + s.files_scanned += 1; + s.files_errored += 1; + } + } + })); + } + + for handle in handles { + handle.await?; + } + + let final_stats = Arc::try_unwrap(stats) + .expect("all tasks completed") + .into_inner() + .unwrap(); + + tracing::info!( + found = final_stats.files_found, + indexed = final_stats.files_indexed, + skipped = final_stats.files_skipped, + errored = final_stats.files_errored, + "scan complete" + ); + + Ok(final_stats) +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..23f4b2e --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,68 @@ +//! Music file indexing and metadata extraction for Shanty. +//! +//! Scans a directory tree of music files, extracts embedded metadata (ID3, Vorbis +//! comments, MP4 tags, etc.), and upserts everything into the Shanty database. +//! Supports incremental re-indexing via file modification time tracking. + +pub mod error; +pub mod indexer; +pub mod metadata; +pub mod scanner; + +pub use error::{IndexError, IndexResult}; + +use std::fmt; +use std::path::PathBuf; + +use sea_orm::DatabaseConnection; + +/// Configuration for a scan operation. +pub struct ScanConfig { + /// Root directory to scan for music files. + pub root: PathBuf, + /// If true, scan and extract metadata but don't write to the database. + pub dry_run: bool, + /// Maximum number of files to process concurrently. + pub concurrency: usize, +} + +impl Default for ScanConfig { + fn default() -> Self { + Self { + root: PathBuf::new(), + dry_run: false, + concurrency: 4, + } + } +} + +/// Statistics from a completed scan. +#[derive(Debug, Default, Clone)] +pub struct ScanStats { + /// Total music files discovered in the directory tree. + pub files_found: u64, + /// Files that were processed (scanned for metadata). + pub files_scanned: u64, + /// Files skipped because their modification time hasn't changed. + pub files_skipped: u64, + /// Files successfully indexed (upserted to DB or logged in dry-run). + pub files_indexed: u64, + /// Files that encountered errors during processing. + pub files_errored: u64, +} + +impl fmt::Display for ScanStats { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "found: {}, indexed: {}, skipped: {}, errors: {}", + self.files_found, self.files_indexed, self.files_skipped, self.files_errored, + ) + } +} + +/// Main entry point for library consumers. Scans the configured directory +/// and indexes all music files into the database. +pub async fn run_scan(conn: &DatabaseConnection, config: &ScanConfig) -> IndexResult { + indexer::index_directory(conn, config).await +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..04cf163 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,83 @@ +use std::path::PathBuf; + +use clap::Parser; +use tracing_subscriber::EnvFilter; + +use shanty_db::Database; +use shanty_index::{ScanConfig, run_scan}; + +#[derive(Parser)] +#[command(name = "shanty-index", about = "Index music files into the Shanty database")] +struct Cli { + /// Directory to scan for music files. + path: PathBuf, + + /// Database URL. Defaults to sqlite:///shanty/shanty.db?mode=rwc + #[arg(long, env = "SHANTY_DATABASE_URL")] + database: Option, + + /// Scan and extract metadata but don't write to the database. + #[arg(long)] + dry_run: bool, + + /// Increase verbosity (-v info, -vv debug, -vvv trace). + #[arg(short, long, action = clap::ArgAction::Count)] + verbose: u8, + + /// Number of files to process concurrently. + #[arg(long, default_value = "4")] + concurrency: usize, +} + +fn default_database_url() -> String { + let data_dir = dirs::data_dir() + .unwrap_or_else(|| PathBuf::from(".")) + .join("shanty"); + std::fs::create_dir_all(&data_dir).ok(); + let db_path = data_dir.join("shanty.db"); + format!("sqlite://{}?mode=rwc", db_path.display()) +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let cli = Cli::parse(); + + // Set up tracing + let filter = match cli.verbose { + 0 => "warn", + 1 => "info,shanty_index=info", + 2 => "info,shanty_index=debug", + _ => "debug,shanty_index=trace", + }; + tracing_subscriber::fmt() + .with_env_filter( + EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(filter)), + ) + .init(); + + // Validate scan path + if !cli.path.is_dir() { + anyhow::bail!("'{}' is not a directory", cli.path.display()); + } + + // Connect to database + let database_url = cli.database.unwrap_or_else(default_database_url); + tracing::info!(url = %database_url, "connecting to database"); + let db = Database::new(&database_url).await?; + + // Run scan + let config = ScanConfig { + root: cli.path, + dry_run: cli.dry_run, + concurrency: cli.concurrency, + }; + + if config.dry_run { + println!("DRY RUN — no changes will be written to the database"); + } + + let stats = run_scan(db.conn(), &config).await?; + println!("\nScan complete: {stats}"); + + Ok(()) +} diff --git a/src/metadata.rs b/src/metadata.rs new file mode 100644 index 0000000..afd42bb --- /dev/null +++ b/src/metadata.rs @@ -0,0 +1,82 @@ +use std::path::Path; + +use lofty::config::ParseOptions; +use lofty::file::{AudioFile, FileType, TaggedFileExt}; +use lofty::probe::Probe; +use lofty::tag::Accessor; + +use crate::error::IndexResult; + +/// Extracted metadata from a music file. All fields are optional since files +/// may have partial or missing tags. +#[derive(Debug, Clone, Default)] +pub struct MusicMetadata { + pub title: Option, + pub artist: Option, + pub album: Option, + pub album_artist: Option, + pub track_number: Option, + pub disc_number: Option, + pub year: Option, + pub genre: Option, + pub duration: Option, + pub codec: Option, + pub bitrate: Option, +} + +/// Map lofty FileType to a human-readable codec string. +fn file_type_to_codec(ft: FileType) -> &'static str { + match ft { + FileType::Aac => "AAC", + FileType::Aiff => "AIFF", + FileType::Ape => "APE", + FileType::Flac => "FLAC", + FileType::Mpeg => "MP3", + FileType::Mp4 => "MP4/AAC", + FileType::Mpc => "Musepack", + FileType::Opus => "Opus", + FileType::Vorbis => "Vorbis", + FileType::Speex => "Speex", + FileType::Wav => "WAV", + FileType::WavPack => "WavPack", + _ => "Unknown", + } +} + +/// Extract metadata from a music file. This is CPU-bound (sync). +pub fn extract_metadata(path: &Path) -> IndexResult { + let tagged_file = Probe::open(path)? + .options(ParseOptions::default()) + .read()?; + + let mut meta = MusicMetadata::default(); + + // Get audio properties (duration, bitrate, codec) + let properties = tagged_file.properties(); + meta.duration = Some(properties.duration().as_secs_f64()); + meta.bitrate = properties.audio_bitrate().map(|b| b as i32); + meta.codec = Some(file_type_to_codec(tagged_file.file_type()).to_string()); + + // Get tag metadata — try primary tag first, fall back to first available + let tag = tagged_file + .primary_tag() + .or_else(|| tagged_file.first_tag()); + + if let Some(tag) = tag { + meta.title = tag.title().map(|s| s.to_string()); + meta.artist = tag.artist().map(|s| s.to_string()); + meta.album = tag.album().map(|s| s.to_string()); + meta.genre = tag.genre().map(|s| s.to_string()); + meta.track_number = tag.track().map(|n| n as i32); + meta.disc_number = tag.disk().map(|n| n as i32); + meta.year = tag.year().map(|n| n as i32); + + // Album artist is not in the Accessor trait — check tag items directly + // Common keys: "ALBUMARTIST", "ALBUM ARTIST", "TPE2" (ID3v2) + meta.album_artist = tag + .get_string(&lofty::tag::ItemKey::AlbumArtist) + .map(|s| s.to_string()); + } + + Ok(meta) +} diff --git a/src/scanner.rs b/src/scanner.rs new file mode 100644 index 0000000..2de3ad6 --- /dev/null +++ b/src/scanner.rs @@ -0,0 +1,126 @@ +use std::path::{Path, PathBuf}; +use std::time::UNIX_EPOCH; + +use chrono::NaiveDateTime; +use walkdir::WalkDir; + +/// Supported music file extensions (lowercase). +pub const MUSIC_EXTENSIONS: &[&str] = &[ + "mp3", "flac", "ogg", "opus", "m4a", "wav", "wma", "aac", "alac", +]; + +/// A discovered music file with filesystem metadata. +#[derive(Debug, Clone)] +pub struct ScannedFile { + pub path: PathBuf, + pub file_size: i64, + pub mtime: NaiveDateTime, +} + +/// Recursively scan `root` for music files, yielding `ScannedFile` entries. +pub fn scan_directory(root: &Path) -> Vec> { + WalkDir::new(root) + .follow_links(true) + .into_iter() + .filter_map(|entry| { + match entry { + Ok(e) => { + if !e.file_type().is_file() { + return None; + } + let path = e.path(); + let ext = path + .extension() + .and_then(|e| e.to_str()) + .map(|e| e.to_lowercase()); + let ext = match ext { + Some(e) => e, + None => return None, + }; + if !MUSIC_EXTENSIONS.contains(&ext.as_str()) { + return None; + } + + // Read filesystem metadata + match std::fs::metadata(path) { + Ok(meta) => { + let file_size = meta.len() as i64; + let mtime = meta + .modified() + .ok() + .and_then(|t| t.duration_since(UNIX_EPOCH).ok()) + .and_then(|d| { + chrono::DateTime::from_timestamp(d.as_secs() as i64, 0) + .map(|dt| dt.naive_utc()) + }) + .unwrap_or_default(); + + Some(Ok(ScannedFile { + path: path.to_owned(), + file_size, + mtime, + })) + } + Err(e) => { + tracing::warn!(path = %path.display(), "failed to read metadata: {e}"); + None + } + } + } + Err(e) => Some(Err(e)), + } + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use tempfile::TempDir; + + #[test] + fn test_scan_finds_music_files() { + let dir = TempDir::new().unwrap(); + fs::write(dir.path().join("song.mp3"), b"fake mp3").unwrap(); + fs::write(dir.path().join("song.flac"), b"fake flac").unwrap(); + fs::write(dir.path().join("readme.txt"), b"not music").unwrap(); + fs::create_dir_all(dir.path().join("subdir")).unwrap(); + fs::write(dir.path().join("subdir/deep.ogg"), b"fake ogg").unwrap(); + + let results: Vec<_> = scan_directory(dir.path()) + .into_iter() + .filter_map(|r| r.ok()) + .collect(); + + assert_eq!(results.len(), 3); + let names: Vec<_> = results + .iter() + .map(|f| f.path.file_name().unwrap().to_str().unwrap().to_string()) + .collect(); + assert!(names.contains(&"song.mp3".to_string())); + assert!(names.contains(&"song.flac".to_string())); + assert!(names.contains(&"deep.ogg".to_string())); + } + + #[test] + fn test_scan_case_insensitive_extensions() { + let dir = TempDir::new().unwrap(); + fs::write(dir.path().join("song.MP3"), b"fake").unwrap(); + fs::write(dir.path().join("song.Flac"), b"fake").unwrap(); + + let results: Vec<_> = scan_directory(dir.path()) + .into_iter() + .filter_map(|r| r.ok()) + .collect(); + + assert_eq!(results.len(), 2); + } + + #[test] + fn test_scan_empty_directory() { + let dir = TempDir::new().unwrap(); + let results = scan_directory(dir.path()); + assert!(results.is_empty()); + } +} diff --git a/tests/integration.rs b/tests/integration.rs new file mode 100644 index 0000000..c6d7bea --- /dev/null +++ b/tests/integration.rs @@ -0,0 +1,191 @@ +use std::fs; +use std::io::Write; + +use lofty::tag::TagExt; +use shanty_db::{Database, queries}; +use shanty_index::{ScanConfig, run_scan}; +use tempfile::TempDir; + +/// Create a minimal valid MP3 file with ID3v2 tags using lofty. +fn create_test_mp3(dir: &std::path::Path, filename: &str, title: &str, artist: &str, album: &str) { + use lofty::config::WriteOptions; + use lofty::tag::{Accessor, ItemKey, Tag, TagType}; + + let path = dir.join(filename); + + // Write a minimal valid MPEG frame (silence, ~0.026s at 128kbps) + // MPEG1 Layer 3, 128kbps, 44100Hz, stereo frame header + padding + let frame_header: [u8; 4] = [0xFF, 0xFB, 0x90, 0x00]; + let frame_size = 417; // standard frame size for 128kbps/44100Hz + let mut frame_data = vec![0u8; frame_size]; + frame_data[..4].copy_from_slice(&frame_header); + + // Write a few frames so lofty recognizes it as valid audio + let mut file = fs::File::create(&path).unwrap(); + for _ in 0..10 { + file.write_all(&frame_data).unwrap(); + } + drop(file); + + // Now write tags using lofty + let mut tag = Tag::new(TagType::Id3v2); + tag.set_title(title.to_string()); + tag.set_artist(artist.to_string()); + tag.set_album(album.to_string()); + tag.set_track(1); + tag.set_disk(1); + tag.set_year(2024); + tag.set_genre("Rock".to_string()); + tag.insert(lofty::tag::TagItem::new( + ItemKey::AlbumArtist, + lofty::tag::ItemValue::Text(artist.to_string()), + )); + + tag.save_to_path(&path, WriteOptions::default()).unwrap(); +} + +async fn test_db() -> Database { + Database::new("sqlite::memory:") + .await + .expect("failed to create test database") +} + +#[tokio::test] +async fn test_scan_indexes_music_files() { + let db = test_db().await; + let dir = TempDir::new().unwrap(); + + create_test_mp3(dir.path(), "song1.mp3", "Time", "Pink Floyd", "DSOTM"); + create_test_mp3(dir.path(), "song2.mp3", "Money", "Pink Floyd", "DSOTM"); + + let config = ScanConfig { + root: dir.path().to_owned(), + dry_run: false, + concurrency: 2, + }; + + let stats = run_scan(db.conn(), &config).await.unwrap(); + assert_eq!(stats.files_found, 2); + assert_eq!(stats.files_indexed, 2); + assert_eq!(stats.files_errored, 0); + + // Verify tracks in DB + let tracks = queries::tracks::list(db.conn(), 100, 0).await.unwrap(); + assert_eq!(tracks.len(), 2); + + // Verify artist was created + let artist = queries::artists::find_by_name(db.conn(), "Pink Floyd") + .await + .unwrap(); + assert!(artist.is_some()); + + // Verify album was created and linked to artist + let albums = queries::albums::get_by_artist(db.conn(), artist.unwrap().id) + .await + .unwrap(); + assert_eq!(albums.len(), 1); + assert_eq!(albums[0].name, "DSOTM"); +} + +#[tokio::test] +async fn test_incremental_scan_skips_unchanged() { + let db = test_db().await; + let dir = TempDir::new().unwrap(); + + create_test_mp3(dir.path(), "song.mp3", "Time", "Pink Floyd", "DSOTM"); + + let config = ScanConfig { + root: dir.path().to_owned(), + dry_run: false, + concurrency: 1, + }; + + // First scan + let stats = run_scan(db.conn(), &config).await.unwrap(); + assert_eq!(stats.files_indexed, 1); + assert_eq!(stats.files_skipped, 0); + + // Second scan — should skip since mtime unchanged + let stats = run_scan(db.conn(), &config).await.unwrap(); + assert_eq!(stats.files_indexed, 0); + assert_eq!(stats.files_skipped, 1); +} + +#[tokio::test] +async fn test_dry_run_does_not_write() { + let db = test_db().await; + let dir = TempDir::new().unwrap(); + + create_test_mp3(dir.path(), "song.mp3", "Time", "Pink Floyd", "DSOTM"); + + let config = ScanConfig { + root: dir.path().to_owned(), + dry_run: true, + concurrency: 1, + }; + + let stats = run_scan(db.conn(), &config).await.unwrap(); + assert_eq!(stats.files_found, 1); + assert_eq!(stats.files_indexed, 1); // counted as indexed in dry-run + + // But DB should be empty + let tracks = queries::tracks::list(db.conn(), 100, 0).await.unwrap(); + assert!(tracks.is_empty()); +} + +#[tokio::test] +async fn test_partial_metadata_still_indexed() { + let db = test_db().await; + let dir = TempDir::new().unwrap(); + + // Create a file with minimal valid audio but no tags + let frame_header: [u8; 4] = [0xFF, 0xFB, 0x90, 0x00]; + let frame_size = 417; + let mut frame_data = vec![0u8; frame_size]; + frame_data[..4].copy_from_slice(&frame_header); + + let path = dir.path().join("untagged.mp3"); + let mut file = fs::File::create(&path).unwrap(); + for _ in 0..10 { + file.write_all(&frame_data).unwrap(); + } + drop(file); + + let config = ScanConfig { + root: dir.path().to_owned(), + dry_run: false, + concurrency: 1, + }; + + let stats = run_scan(db.conn(), &config).await.unwrap(); + assert_eq!(stats.files_found, 1); + assert_eq!(stats.files_indexed, 1); + + // Track should exist but with NULL metadata + let tracks = queries::tracks::list(db.conn(), 100, 0).await.unwrap(); + assert_eq!(tracks.len(), 1); + assert!(tracks[0].title.is_none()); + assert!(tracks[0].artist.is_none()); + // But should still have file-level info + assert!(tracks[0].file_size > 0); + assert!(tracks[0].codec.is_some()); +} + +#[tokio::test] +async fn test_non_music_files_ignored() { + let db = test_db().await; + let dir = TempDir::new().unwrap(); + + fs::write(dir.path().join("readme.txt"), b"not music").unwrap(); + fs::write(dir.path().join("cover.jpg"), b"not music").unwrap(); + fs::write(dir.path().join("data.json"), b"{}").unwrap(); + + let config = ScanConfig { + root: dir.path().to_owned(), + dry_run: false, + concurrency: 1, + }; + + let stats = run_scan(db.conn(), &config).await.unwrap(); + assert_eq!(stats.files_found, 0); +}