commit 884acfcd18a2aca37f929c2f04a13500782c4cb3
Author: Connor Johnstone <connor.johnstone@arcfield.com>
Date:   Tue Mar 17 14:32:52 2026 -0400

    Initial commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..360fdc9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+target/
+.env
+*.db
+*.db-journal
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..92497fa
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,26 @@
+[package]
+name = "shanty-index"
+version = "0.1.0"
+edition = "2024"
+license = "MIT"
+description = "Music file indexing and metadata extraction for Shanty"
+repository = "ssh://connor@git.rcjohnstone.com:2222/Shanty/index.git"
+
+[dependencies]
+shanty-db = { path = "../shanty-db" }
+sea-orm = { version = "1", features = ["sqlx-sqlite", "runtime-tokio-native-tls"] }
+clap = { version = "4", features = ["derive"] }
+serde = { version = "1", features = ["derive"] }
+thiserror = "2"
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+tokio = { version = "1", features = ["full"] }
+lofty = "0.22"
+walkdir = "2"
+chrono = { version = "0.4", features = ["serde"] }
+dirs = "6"
+anyhow = "1"
+
+[dev-dependencies]
+tokio = { version = "1", features = ["full", "test-util"] }
+tempfile = "3"
diff --git a/readme.md b/readme.md
new file mode 100644
index 0000000..300170a
--- /dev/null
+++ b/readme.md
@@ -0,0 +1,36 @@
+# shanty-index
+
+Music file indexing and metadata extraction for [Shanty](ssh://connor@git.rcjohnstone.com:2222/Shanty/shanty.git).
+
+Scans a directory tree, extracts embedded metadata (ID3, Vorbis comments, MP4 tags, etc.)
+using `lofty`, and upserts everything into the Shanty database. Supports incremental
+re-indexing via file modification time tracking.
+
+## Usage
+
+```sh
+# Scan a directory
+shanty-index /path/to/music
+
+# Dry run (no DB writes)
+shanty-index /path/to/music --dry-run -vv
+
+# Custom database location
+shanty-index /path/to/music --database sqlite:///path/to/shanty.db?mode=rwc
+```
+
+## As a library
+
+```rust
+use shanty_index::{ScanConfig, run_scan};
+use shanty_db::Database;
+
+let db = Database::new("sqlite::memory:").await?;
+let config = ScanConfig {
+    root: "/path/to/music".into(),
+    dry_run: false,
+    concurrency: 4,
+};
+let stats = run_scan(db.conn(), &config).await?;
+println!("{stats}");
+```
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..1f68e11
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,30 @@
+use shanty_db::DbError;
+
+#[derive(Debug, thiserror::Error)]
+pub enum IndexError {
+    #[error("database error: {0}")]
+    Db(#[from] DbError),
+
+    #[error("I/O error: {0}")]
+    Io(#[from] std::io::Error),
+
+    #[error("metadata error: {0}")]
+    Metadata(String),
+
+    #[error("walkdir error: {0}")]
+    WalkDir(#[from] walkdir::Error),
+
+    #[error("task join error: {0}")]
+    Join(#[from] tokio::task::JoinError),
+
+    #[error("{0}")]
+    Other(String),
+}
+
+impl From<lofty::error::LoftyError> for IndexError {
+    fn from(e: lofty::error::LoftyError) -> Self {
+        IndexError::Metadata(e.to_string())
+    }
+}
+
+pub type IndexResult<T> = Result<T, IndexError>;
diff --git a/src/indexer.rs b/src/indexer.rs
new file mode 100644
index 0000000..a148851
--- /dev/null
+++ b/src/indexer.rs
@@ -0,0 +1,186 @@
+use std::sync::{Arc, Mutex};
+
+use sea_orm::{ActiveValue::Set, DatabaseConnection, NotSet};
+use tokio::sync::Semaphore;
+
+use shanty_db::queries;
+
+use crate::error::IndexResult;
+use crate::metadata::{self, MusicMetadata};
+use crate::scanner::{self, ScannedFile};
+use crate::{ScanConfig, ScanStats};
+
+/// Process a single file: check mtime, extract metadata, upsert to DB.
+/// Returns `Ok(true)` if indexed, `Ok(false)` if skipped.
+async fn process_file(
+    conn: &DatabaseConnection,
+    scanned: &ScannedFile,
+    dry_run: bool,
+) -> IndexResult<bool> {
+    let file_path_str = scanned.path.to_string_lossy().to_string();
+
+    // Check if file already exists with same mtime
+    if let Some(existing) = queries::tracks::get_by_path(conn, &file_path_str).await? {
+        if let Some(existing_mtime) = existing.file_mtime {
+            // Compare at second granularity
+            if existing_mtime.and_utc().timestamp() == scanned.mtime.and_utc().timestamp() {
+                tracing::debug!(path = %file_path_str, "skipping (mtime unchanged)");
+                return Ok(false);
+            }
+        }
+    }
+
+    // Extract metadata (CPU-bound, run in blocking thread)
+    let path = scanned.path.clone();
+    let meta: MusicMetadata = tokio::task::spawn_blocking(move || {
+        metadata::extract_metadata(&path)
+    })
+    .await??;
+
+    tracing::info!(
+        path = %file_path_str,
+        title = ?meta.title,
+        artist = ?meta.artist,
+        album = ?meta.album,
+        "indexed"
+    );
+
+    if dry_run {
+        return Ok(true);
+    }
+
+    // Upsert artist (use album_artist if available, fall back to artist)
+    let artist_name = meta
+        .album_artist
+        .as_deref()
+        .or(meta.artist.as_deref());
+    let artist_id = match artist_name {
+        Some(name) if !name.is_empty() => {
+            Some(queries::artists::upsert(conn, name, None).await?.id)
+        }
+        _ => None,
+    };
+
+    // Upsert album
+    let album_id = match meta.album.as_deref() {
+        Some(album_name) if !album_name.is_empty() => {
+            let album_artist = meta
+                .album_artist
+                .as_deref()
+                .or(meta.artist.as_deref())
+                .unwrap_or("Unknown Artist");
+            Some(
+                queries::albums::upsert(conn, album_name, album_artist, None, artist_id)
+                    .await?
+                    .id,
+            )
+        }
+        _ => None,
+    };
+
+    // Upsert track
+    let active = shanty_db::entities::track::ActiveModel {
+        id: NotSet,
+        file_path: Set(file_path_str),
+        title: Set(meta.title),
+        artist: Set(meta.artist),
+        album: Set(meta.album),
+        album_artist: Set(meta.album_artist),
+        track_number: Set(meta.track_number),
+        disc_number: Set(meta.disc_number),
+        duration: Set(meta.duration),
+        genre: Set(meta.genre),
+        year: Set(meta.year),
+        codec: Set(meta.codec),
+        bitrate: Set(meta.bitrate),
+        file_size: Set(scanned.file_size),
+        fingerprint: NotSet,
+        musicbrainz_id: NotSet,
+        artist_id: Set(artist_id),
+        album_id: Set(album_id),
+        file_mtime: Set(Some(scanned.mtime)),
+        added_at: NotSet,
+        updated_at: NotSet,
+    };
+    queries::tracks::upsert(conn, active).await?;
+
+    Ok(true)
+}
+
+/// Run the full indexing pipeline: scan directory, extract metadata, upsert to DB.
+pub async fn index_directory(
+    conn: &DatabaseConnection,
+    config: &ScanConfig,
+) -> IndexResult<ScanStats> {
+    tracing::info!(root = %config.root.display(), "starting scan");
+
+    // Phase 1: collect all music files
+    let scan_results = scanner::scan_directory(&config.root);
+    let mut files = Vec::new();
+    let mut stats = ScanStats::default();
+
+    for result in scan_results {
+        match result {
+            Ok(f) => files.push(f),
+            Err(e) => {
+                tracing::warn!("scan error: {e}");
+                stats.files_errored += 1;
+            }
+        }
+    }
+    stats.files_found = files.len() as u64;
+    tracing::info!(count = stats.files_found, "found music files");
+
+    // Phase 2: process files with bounded concurrency
+    let semaphore = Arc::new(Semaphore::new(config.concurrency));
+    let stats = Arc::new(Mutex::new(stats));
+    let mut handles = Vec::new();
+
+    for file in files {
+        let permit = semaphore.clone().acquire_owned().await.unwrap();
+        let conn = conn.clone();
+        let stats = stats.clone();
+        let dry_run = config.dry_run;
+
+        handles.push(tokio::spawn(async move {
+            let _permit = permit;
+            match process_file(&conn, &file, dry_run).await {
+                Ok(true) => {
+                    let mut s = stats.lock().unwrap();
+                    s.files_scanned += 1;
+                    s.files_indexed += 1;
+                }
+                Ok(false) => {
+                    let mut s = stats.lock().unwrap();
+                    s.files_scanned += 1;
+                    s.files_skipped += 1;
+                }
+                Err(e) => {
+                    tracing::error!(path = %file.path.display(), "indexing error: {e}");
+                    let mut s = stats.lock().unwrap();
+                    s.files_scanned += 1;
+                    s.files_errored += 1;
+                }
+            }
+        }));
+    }
+
+    for handle in handles {
+        handle.await?;
+    }
+
+    let final_stats = Arc::try_unwrap(stats)
+        .expect("all tasks completed")
+        .into_inner()
+        .unwrap();
+
+    tracing::info!(
+        found = final_stats.files_found,
+        indexed = final_stats.files_indexed,
+        skipped = final_stats.files_skipped,
+        errored = final_stats.files_errored,
+        "scan complete"
+    );
+
+    Ok(final_stats)
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..23f4b2e
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,68 @@
+//! Music file indexing and metadata extraction for Shanty.
+//!
+//! Scans a directory tree of music files, extracts embedded metadata (ID3, Vorbis
+//! comments, MP4 tags, etc.), and upserts everything into the Shanty database.
+//! Supports incremental re-indexing via file modification time tracking.
+
+pub mod error;
+pub mod indexer;
+pub mod metadata;
+pub mod scanner;
+
+pub use error::{IndexError, IndexResult};
+
+use std::fmt;
+use std::path::PathBuf;
+
+use sea_orm::DatabaseConnection;
+
+/// Configuration for a scan operation.
+pub struct ScanConfig {
+    /// Root directory to scan for music files.
+    pub root: PathBuf,
+    /// If true, scan and extract metadata but don't write to the database.
+    pub dry_run: bool,
+    /// Maximum number of files to process concurrently.
+    pub concurrency: usize,
+}
+
+impl Default for ScanConfig {
+    fn default() -> Self {
+        Self {
+            root: PathBuf::new(),
+            dry_run: false,
+            concurrency: 4,
+        }
+    }
+}
+
+/// Statistics from a completed scan.
+#[derive(Debug, Default, Clone)]
+pub struct ScanStats {
+    /// Total music files discovered in the directory tree.
+    pub files_found: u64,
+    /// Files that were processed (scanned for metadata).
+    pub files_scanned: u64,
+    /// Files skipped because their modification time hasn't changed.
+    pub files_skipped: u64,
+    /// Files successfully indexed (upserted to DB or logged in dry-run).
+    pub files_indexed: u64,
+    /// Files that encountered errors during processing.
+    pub files_errored: u64,
+}
+
+impl fmt::Display for ScanStats {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "found: {}, indexed: {}, skipped: {}, errors: {}",
+            self.files_found, self.files_indexed, self.files_skipped, self.files_errored,
+        )
+    }
+}
+
+/// Main entry point for library consumers. Scans the configured directory
+/// and indexes all music files into the database.
+pub async fn run_scan(conn: &DatabaseConnection, config: &ScanConfig) -> IndexResult<ScanStats> {
+    indexer::index_directory(conn, config).await
+}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..04cf163
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,83 @@
+use std::path::PathBuf;
+
+use clap::Parser;
+use tracing_subscriber::EnvFilter;
+
+use shanty_db::Database;
+use shanty_index::{ScanConfig, run_scan};
+
+#[derive(Parser)]
+#[command(name = "shanty-index", about = "Index music files into the Shanty database")]
+struct Cli {
+    /// Directory to scan for music files.
+    path: PathBuf,
+
+    /// Database URL. Defaults to sqlite://<XDG_DATA_HOME>/shanty/shanty.db?mode=rwc
+    #[arg(long, env = "SHANTY_DATABASE_URL")]
+    database: Option<String>,
+
+    /// Scan and extract metadata but don't write to the database.
+    #[arg(long)]
+    dry_run: bool,
+
+    /// Increase verbosity (-v info, -vv debug, -vvv trace).
+    #[arg(short, long, action = clap::ArgAction::Count)]
+    verbose: u8,
+
+    /// Number of files to process concurrently.
+    #[arg(long, default_value = "4")]
+    concurrency: usize,
+}
+
+fn default_database_url() -> String {
+    let data_dir = dirs::data_dir()
+        .unwrap_or_else(|| PathBuf::from("."))
+        .join("shanty");
+    std::fs::create_dir_all(&data_dir).ok();
+    let db_path = data_dir.join("shanty.db");
+    format!("sqlite://{}?mode=rwc", db_path.display())
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    let cli = Cli::parse();
+
+    // Set up tracing
+    let filter = match cli.verbose {
+        0 => "warn",
+        1 => "info,shanty_index=info",
+        2 => "info,shanty_index=debug",
+        _ => "debug,shanty_index=trace",
+    };
+    tracing_subscriber::fmt()
+        .with_env_filter(
+            EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(filter)),
+        )
+        .init();
+
+    // Validate scan path
+    if !cli.path.is_dir() {
+        anyhow::bail!("'{}' is not a directory", cli.path.display());
+    }
+
+    // Connect to database
+    let database_url = cli.database.unwrap_or_else(default_database_url);
+    tracing::info!(url = %database_url, "connecting to database");
+    let db = Database::new(&database_url).await?;
+
+    // Run scan
+    let config = ScanConfig {
+        root: cli.path,
+        dry_run: cli.dry_run,
+        concurrency: cli.concurrency,
+    };
+
+    if config.dry_run {
+        println!("DRY RUN — no changes will be written to the database");
+    }
+
+    let stats = run_scan(db.conn(), &config).await?;
+    println!("\nScan complete: {stats}");
+
+    Ok(())
+}
diff --git a/src/metadata.rs b/src/metadata.rs
new file mode 100644
index 0000000..afd42bb
--- /dev/null
+++ b/src/metadata.rs
@@ -0,0 +1,82 @@
+use std::path::Path;
+
+use lofty::config::ParseOptions;
+use lofty::file::{AudioFile, FileType, TaggedFileExt};
+use lofty::probe::Probe;
+use lofty::tag::Accessor;
+
+use crate::error::IndexResult;
+
+/// Extracted metadata from a music file. All fields are optional since files
+/// may have partial or missing tags.
+#[derive(Debug, Clone, Default)]
+pub struct MusicMetadata {
+    pub title: Option<String>,
+    pub artist: Option<String>,
+    pub album: Option<String>,
+    pub album_artist: Option<String>,
+    pub track_number: Option<i32>,
+    pub disc_number: Option<i32>,
+    pub year: Option<i32>,
+    pub genre: Option<String>,
+    pub duration: Option<f64>,
+    pub codec: Option<String>,
+    pub bitrate: Option<i32>,
+}
+
+/// Map lofty FileType to a human-readable codec string.
+fn file_type_to_codec(ft: FileType) -> &'static str {
+    match ft {
+        FileType::Aac => "AAC",
+        FileType::Aiff => "AIFF",
+        FileType::Ape => "APE",
+        FileType::Flac => "FLAC",
+        FileType::Mpeg => "MP3",
+        FileType::Mp4 => "MP4/AAC",
+        FileType::Mpc => "Musepack",
+        FileType::Opus => "Opus",
+        FileType::Vorbis => "Vorbis",
+        FileType::Speex => "Speex",
+        FileType::Wav => "WAV",
+        FileType::WavPack => "WavPack",
+        _ => "Unknown",
+    }
+}
+
+/// Extract metadata from a music file. This is CPU-bound (sync).
+pub fn extract_metadata(path: &Path) -> IndexResult<MusicMetadata> {
+    let tagged_file = Probe::open(path)?
+        .options(ParseOptions::default())
+        .read()?;
+
+    let mut meta = MusicMetadata::default();
+
+    // Get audio properties (duration, bitrate, codec)
+    let properties = tagged_file.properties();
+    meta.duration = Some(properties.duration().as_secs_f64());
+    meta.bitrate = properties.audio_bitrate().map(|b| b as i32);
+    meta.codec = Some(file_type_to_codec(tagged_file.file_type()).to_string());
+
+    // Get tag metadata — try primary tag first, fall back to first available
+    let tag = tagged_file
+        .primary_tag()
+        .or_else(|| tagged_file.first_tag());
+
+    if let Some(tag) = tag {
+        meta.title = tag.title().map(|s| s.to_string());
+        meta.artist = tag.artist().map(|s| s.to_string());
+        meta.album = tag.album().map(|s| s.to_string());
+        meta.genre = tag.genre().map(|s| s.to_string());
+        meta.track_number = tag.track().map(|n| n as i32);
+        meta.disc_number = tag.disk().map(|n| n as i32);
+        meta.year = tag.year().map(|n| n as i32);
+
+        // Album artist is not in the Accessor trait — check tag items directly
+        // Common keys: "ALBUMARTIST", "ALBUM ARTIST", "TPE2" (ID3v2)
+        meta.album_artist = tag
+            .get_string(&lofty::tag::ItemKey::AlbumArtist)
+            .map(|s| s.to_string());
+    }
+
+    Ok(meta)
+}
diff --git a/src/scanner.rs b/src/scanner.rs
new file mode 100644
index 0000000..2de3ad6
--- /dev/null
+++ b/src/scanner.rs
@@ -0,0 +1,126 @@
+use std::path::{Path, PathBuf};
+use std::time::UNIX_EPOCH;
+
+use chrono::NaiveDateTime;
+use walkdir::WalkDir;
+
+/// Supported music file extensions (lowercase).
+pub const MUSIC_EXTENSIONS: &[&str] = &[
+    "mp3", "flac", "ogg", "opus", "m4a", "wav", "wma", "aac", "alac",
+];
+
+/// A discovered music file with filesystem metadata.
+#[derive(Debug, Clone)]
+pub struct ScannedFile {
+    pub path: PathBuf,
+    pub file_size: i64,
+    pub mtime: NaiveDateTime,
+}
+
+/// Recursively scan `root` for music files, yielding `ScannedFile` entries.
+pub fn scan_directory(root: &Path) -> Vec<Result<ScannedFile, walkdir::Error>> {
+    WalkDir::new(root)
+        .follow_links(true)
+        .into_iter()
+        .filter_map(|entry| {
+            match entry {
+                Ok(e) => {
+                    if !e.file_type().is_file() {
+                        return None;
+                    }
+                    let path = e.path();
+                    let ext = path
+                        .extension()
+                        .and_then(|e| e.to_str())
+                        .map(|e| e.to_lowercase());
+                    let ext = match ext {
+                        Some(e) => e,
+                        None => return None,
+                    };
+                    if !MUSIC_EXTENSIONS.contains(&ext.as_str()) {
+                        return None;
+                    }
+
+                    // Read filesystem metadata
+                    match std::fs::metadata(path) {
+                        Ok(meta) => {
+                            let file_size = meta.len() as i64;
+                            let mtime = meta
+                                .modified()
+                                .ok()
+                                .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
+                                .and_then(|d| {
+                                    chrono::DateTime::from_timestamp(d.as_secs() as i64, 0)
+                                        .map(|dt| dt.naive_utc())
+                                })
+                                .unwrap_or_default();
+
+                            Some(Ok(ScannedFile {
+                                path: path.to_owned(),
+                                file_size,
+                                mtime,
+                            }))
+                        }
+                        Err(e) => {
+                            tracing::warn!(path = %path.display(), "failed to read metadata: {e}");
+                            None
+                        }
+                    }
+                }
+                Err(e) => Some(Err(e)),
+            }
+        })
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fs;
+    use tempfile::TempDir;
+
+    #[test]
+    fn test_scan_finds_music_files() {
+        let dir = TempDir::new().unwrap();
+        fs::write(dir.path().join("song.mp3"), b"fake mp3").unwrap();
+        fs::write(dir.path().join("song.flac"), b"fake flac").unwrap();
+        fs::write(dir.path().join("readme.txt"), b"not music").unwrap();
+        fs::create_dir_all(dir.path().join("subdir")).unwrap();
+        fs::write(dir.path().join("subdir/deep.ogg"), b"fake ogg").unwrap();
+
+        let results: Vec<_> = scan_directory(dir.path())
+            .into_iter()
+            .filter_map(|r| r.ok())
+            .collect();
+
+        assert_eq!(results.len(), 3);
+        let names: Vec<_> = results
+            .iter()
+            .map(|f| f.path.file_name().unwrap().to_str().unwrap().to_string())
+            .collect();
+        assert!(names.contains(&"song.mp3".to_string()));
+        assert!(names.contains(&"song.flac".to_string()));
+        assert!(names.contains(&"deep.ogg".to_string()));
+    }
+
+    #[test]
+    fn test_scan_case_insensitive_extensions() {
+        let dir = TempDir::new().unwrap();
+        fs::write(dir.path().join("song.MP3"), b"fake").unwrap();
+        fs::write(dir.path().join("song.Flac"), b"fake").unwrap();
+
+        let results: Vec<_> = scan_directory(dir.path())
+            .into_iter()
+            .filter_map(|r| r.ok())
+            .collect();
+
+        assert_eq!(results.len(), 2);
+    }
+
+    #[test]
+    fn test_scan_empty_directory() {
+        let dir = TempDir::new().unwrap();
+        let results = scan_directory(dir.path());
+        assert!(results.is_empty());
+    }
+}
diff --git a/tests/integration.rs b/tests/integration.rs
new file mode 100644
index 0000000..c6d7bea
--- /dev/null
+++ b/tests/integration.rs
@@ -0,0 +1,191 @@
+use std::fs;
+use std::io::Write;
+
+use lofty::tag::TagExt;
+use shanty_db::{Database, queries};
+use shanty_index::{ScanConfig, run_scan};
+use tempfile::TempDir;
+
+/// Create a minimal valid MP3 file with ID3v2 tags using lofty.
+fn create_test_mp3(dir: &std::path::Path, filename: &str, title: &str, artist: &str, album: &str) {
+    use lofty::config::WriteOptions;
+    use lofty::tag::{Accessor, ItemKey, Tag, TagType};
+
+    let path = dir.join(filename);
+
+    // Write a minimal valid MPEG frame (silence, ~0.026s at 128kbps)
+    // MPEG1 Layer 3, 128kbps, 44100Hz, stereo frame header + padding
+    let frame_header: [u8; 4] = [0xFF, 0xFB, 0x90, 0x00];
+    let frame_size = 417; // standard frame size for 128kbps/44100Hz
+    let mut frame_data = vec![0u8; frame_size];
+    frame_data[..4].copy_from_slice(&frame_header);
+
+    // Write a few frames so lofty recognizes it as valid audio
+    let mut file = fs::File::create(&path).unwrap();
+    for _ in 0..10 {
+        file.write_all(&frame_data).unwrap();
+    }
+    drop(file);
+
+    // Now write tags using lofty
+    let mut tag = Tag::new(TagType::Id3v2);
+    tag.set_title(title.to_string());
+    tag.set_artist(artist.to_string());
+    tag.set_album(album.to_string());
+    tag.set_track(1);
+    tag.set_disk(1);
+    tag.set_year(2024);
+    tag.set_genre("Rock".to_string());
+    tag.insert(lofty::tag::TagItem::new(
+        ItemKey::AlbumArtist,
+        lofty::tag::ItemValue::Text(artist.to_string()),
+    ));
+
+    tag.save_to_path(&path, WriteOptions::default()).unwrap();
+}
+
+async fn test_db() -> Database {
+    Database::new("sqlite::memory:")
+        .await
+        .expect("failed to create test database")
+}
+
+#[tokio::test]
+async fn test_scan_indexes_music_files() {
+    let db = test_db().await;
+    let dir = TempDir::new().unwrap();
+
+    create_test_mp3(dir.path(), "song1.mp3", "Time", "Pink Floyd", "DSOTM");
+    create_test_mp3(dir.path(), "song2.mp3", "Money", "Pink Floyd", "DSOTM");
+
+    let config = ScanConfig {
+        root: dir.path().to_owned(),
+        dry_run: false,
+        concurrency: 2,
+    };
+
+    let stats = run_scan(db.conn(), &config).await.unwrap();
+    assert_eq!(stats.files_found, 2);
+    assert_eq!(stats.files_indexed, 2);
+    assert_eq!(stats.files_errored, 0);
+
+    // Verify tracks in DB
+    let tracks = queries::tracks::list(db.conn(), 100, 0).await.unwrap();
+    assert_eq!(tracks.len(), 2);
+
+    // Verify artist was created
+    let artist = queries::artists::find_by_name(db.conn(), "Pink Floyd")
+        .await
+        .unwrap();
+    assert!(artist.is_some());
+
+    // Verify album was created and linked to artist
+    let albums = queries::albums::get_by_artist(db.conn(), artist.unwrap().id)
+        .await
+        .unwrap();
+    assert_eq!(albums.len(), 1);
+    assert_eq!(albums[0].name, "DSOTM");
+}
+
+#[tokio::test]
+async fn test_incremental_scan_skips_unchanged() {
+    let db = test_db().await;
+    let dir = TempDir::new().unwrap();
+
+    create_test_mp3(dir.path(), "song.mp3", "Time", "Pink Floyd", "DSOTM");
+
+    let config = ScanConfig {
+        root: dir.path().to_owned(),
+        dry_run: false,
+        concurrency: 1,
+    };
+
+    // First scan
+    let stats = run_scan(db.conn(), &config).await.unwrap();
+    assert_eq!(stats.files_indexed, 1);
+    assert_eq!(stats.files_skipped, 0);
+
+    // Second scan — should skip since mtime unchanged
+    let stats = run_scan(db.conn(), &config).await.unwrap();
+    assert_eq!(stats.files_indexed, 0);
+    assert_eq!(stats.files_skipped, 1);
+}
+
+#[tokio::test]
+async fn test_dry_run_does_not_write() {
+    let db = test_db().await;
+    let dir = TempDir::new().unwrap();
+
+    create_test_mp3(dir.path(), "song.mp3", "Time", "Pink Floyd", "DSOTM");
+
+    let config = ScanConfig {
+        root: dir.path().to_owned(),
+        dry_run: true,
+        concurrency: 1,
+    };
+
+    let stats = run_scan(db.conn(), &config).await.unwrap();
+    assert_eq!(stats.files_found, 1);
+    assert_eq!(stats.files_indexed, 1); // counted as indexed in dry-run
+
+    // But DB should be empty
+    let tracks = queries::tracks::list(db.conn(), 100, 0).await.unwrap();
+    assert!(tracks.is_empty());
+}
+
+#[tokio::test]
+async fn test_partial_metadata_still_indexed() {
+    let db = test_db().await;
+    let dir = TempDir::new().unwrap();
+
+    // Create a file with minimal valid audio but no tags
+    let frame_header: [u8; 4] = [0xFF, 0xFB, 0x90, 0x00];
+    let frame_size = 417;
+    let mut frame_data = vec![0u8; frame_size];
+    frame_data[..4].copy_from_slice(&frame_header);
+
+    let path = dir.path().join("untagged.mp3");
+    let mut file = fs::File::create(&path).unwrap();
+    for _ in 0..10 {
+        file.write_all(&frame_data).unwrap();
+    }
+    drop(file);
+
+    let config = ScanConfig {
+        root: dir.path().to_owned(),
+        dry_run: false,
+        concurrency: 1,
+    };
+
+    let stats = run_scan(db.conn(), &config).await.unwrap();
+    assert_eq!(stats.files_found, 1);
+    assert_eq!(stats.files_indexed, 1);
+
+    // Track should exist but with NULL metadata
+    let tracks = queries::tracks::list(db.conn(), 100, 0).await.unwrap();
+    assert_eq!(tracks.len(), 1);
+    assert!(tracks[0].title.is_none());
+    assert!(tracks[0].artist.is_none());
+    // But should still have file-level info
+    assert!(tracks[0].file_size > 0);
+    assert!(tracks[0].codec.is_some());
+}
+
+#[tokio::test]
+async fn test_non_music_files_ignored() {
+    let db = test_db().await;
+    let dir = TempDir::new().unwrap();
+
+    fs::write(dir.path().join("readme.txt"), b"not music").unwrap();
+    fs::write(dir.path().join("cover.jpg"), b"not music").unwrap();
+    fs::write(dir.path().join("data.json"), b"{}").unwrap();
+
+    let config = ScanConfig {
+        root: dir.path().to_owned(),
+        dry_run: false,
+        concurrency: 1,
+    };
+
+    let stats = run_scan(db.conn(), &config).await.unwrap();
+    assert_eq!(stats.files_found, 0);
+}