Initial commit
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
target/
|
||||
.env
|
||||
*.db
|
||||
*.db-journal
|
||||
26
Cargo.toml
Normal file
26
Cargo.toml
Normal file
@@ -0,0 +1,26 @@
|
||||
[package]
|
||||
name = "shanty-index"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
description = "Music file indexing and metadata extraction for Shanty"
|
||||
repository = "ssh://connor@git.rcjohnstone.com:2222/Shanty/index.git"
|
||||
|
||||
[dependencies]
|
||||
shanty-db = { path = "../shanty-db" }
|
||||
sea-orm = { version = "1", features = ["sqlx-sqlite", "runtime-tokio-native-tls"] }
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
thiserror = "2"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
lofty = "0.22"
|
||||
walkdir = "2"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
dirs = "6"
|
||||
anyhow = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1", features = ["full", "test-util"] }
|
||||
tempfile = "3"
|
||||
36
readme.md
Normal file
36
readme.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# shanty-index
|
||||
|
||||
Music file indexing and metadata extraction for [Shanty](ssh://connor@git.rcjohnstone.com:2222/Shanty/shanty.git).
|
||||
|
||||
Scans a directory tree, extracts embedded metadata (ID3, Vorbis comments, MP4 tags, etc.)
|
||||
using `lofty`, and upserts everything into the Shanty database. Supports incremental
|
||||
re-indexing via file modification time tracking.
|
||||
|
||||
## Usage
|
||||
|
||||
```sh
|
||||
# Scan a directory
|
||||
shanty-index /path/to/music
|
||||
|
||||
# Dry run (no DB writes)
|
||||
shanty-index /path/to/music --dry-run -vv
|
||||
|
||||
# Custom database location
|
||||
shanty-index /path/to/music --database sqlite:///path/to/shanty.db?mode=rwc
|
||||
```
|
||||
|
||||
## As a library
|
||||
|
||||
```rust
|
||||
use shanty_index::{ScanConfig, run_scan};
|
||||
use shanty_db::Database;
|
||||
|
||||
let db = Database::new("sqlite::memory:").await?;
|
||||
let config = ScanConfig {
|
||||
root: "/path/to/music".into(),
|
||||
dry_run: false,
|
||||
concurrency: 4,
|
||||
};
|
||||
let stats = run_scan(db.conn(), &config).await?;
|
||||
println!("{stats}");
|
||||
```
|
||||
30
src/error.rs
Normal file
30
src/error.rs
Normal file
@@ -0,0 +1,30 @@
|
||||
use shanty_db::DbError;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum IndexError {
|
||||
#[error("database error: {0}")]
|
||||
Db(#[from] DbError),
|
||||
|
||||
#[error("I/O error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("metadata error: {0}")]
|
||||
Metadata(String),
|
||||
|
||||
#[error("walkdir error: {0}")]
|
||||
WalkDir(#[from] walkdir::Error),
|
||||
|
||||
#[error("task join error: {0}")]
|
||||
Join(#[from] tokio::task::JoinError),
|
||||
|
||||
#[error("{0}")]
|
||||
Other(String),
|
||||
}
|
||||
|
||||
impl From<lofty::error::LoftyError> for IndexError {
|
||||
fn from(e: lofty::error::LoftyError) -> Self {
|
||||
IndexError::Metadata(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
pub type IndexResult<T> = Result<T, IndexError>;
|
||||
186
src/indexer.rs
Normal file
186
src/indexer.rs
Normal file
@@ -0,0 +1,186 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use sea_orm::{ActiveValue::Set, DatabaseConnection, NotSet};
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
use shanty_db::queries;
|
||||
|
||||
use crate::error::IndexResult;
|
||||
use crate::metadata::{self, MusicMetadata};
|
||||
use crate::scanner::{self, ScannedFile};
|
||||
use crate::{ScanConfig, ScanStats};
|
||||
|
||||
/// Process a single file: check mtime, extract metadata, upsert to DB.
|
||||
/// Returns `Ok(true)` if indexed, `Ok(false)` if skipped.
|
||||
async fn process_file(
|
||||
conn: &DatabaseConnection,
|
||||
scanned: &ScannedFile,
|
||||
dry_run: bool,
|
||||
) -> IndexResult<bool> {
|
||||
let file_path_str = scanned.path.to_string_lossy().to_string();
|
||||
|
||||
// Check if file already exists with same mtime
|
||||
if let Some(existing) = queries::tracks::get_by_path(conn, &file_path_str).await? {
|
||||
if let Some(existing_mtime) = existing.file_mtime {
|
||||
// Compare at second granularity
|
||||
if existing_mtime.and_utc().timestamp() == scanned.mtime.and_utc().timestamp() {
|
||||
tracing::debug!(path = %file_path_str, "skipping (mtime unchanged)");
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract metadata (CPU-bound, run in blocking thread)
|
||||
let path = scanned.path.clone();
|
||||
let meta: MusicMetadata = tokio::task::spawn_blocking(move || {
|
||||
metadata::extract_metadata(&path)
|
||||
})
|
||||
.await??;
|
||||
|
||||
tracing::info!(
|
||||
path = %file_path_str,
|
||||
title = ?meta.title,
|
||||
artist = ?meta.artist,
|
||||
album = ?meta.album,
|
||||
"indexed"
|
||||
);
|
||||
|
||||
if dry_run {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
// Upsert artist (use album_artist if available, fall back to artist)
|
||||
let artist_name = meta
|
||||
.album_artist
|
||||
.as_deref()
|
||||
.or(meta.artist.as_deref());
|
||||
let artist_id = match artist_name {
|
||||
Some(name) if !name.is_empty() => {
|
||||
Some(queries::artists::upsert(conn, name, None).await?.id)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
// Upsert album
|
||||
let album_id = match meta.album.as_deref() {
|
||||
Some(album_name) if !album_name.is_empty() => {
|
||||
let album_artist = meta
|
||||
.album_artist
|
||||
.as_deref()
|
||||
.or(meta.artist.as_deref())
|
||||
.unwrap_or("Unknown Artist");
|
||||
Some(
|
||||
queries::albums::upsert(conn, album_name, album_artist, None, artist_id)
|
||||
.await?
|
||||
.id,
|
||||
)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
// Upsert track
|
||||
let active = shanty_db::entities::track::ActiveModel {
|
||||
id: NotSet,
|
||||
file_path: Set(file_path_str),
|
||||
title: Set(meta.title),
|
||||
artist: Set(meta.artist),
|
||||
album: Set(meta.album),
|
||||
album_artist: Set(meta.album_artist),
|
||||
track_number: Set(meta.track_number),
|
||||
disc_number: Set(meta.disc_number),
|
||||
duration: Set(meta.duration),
|
||||
genre: Set(meta.genre),
|
||||
year: Set(meta.year),
|
||||
codec: Set(meta.codec),
|
||||
bitrate: Set(meta.bitrate),
|
||||
file_size: Set(scanned.file_size),
|
||||
fingerprint: NotSet,
|
||||
musicbrainz_id: NotSet,
|
||||
artist_id: Set(artist_id),
|
||||
album_id: Set(album_id),
|
||||
file_mtime: Set(Some(scanned.mtime)),
|
||||
added_at: NotSet,
|
||||
updated_at: NotSet,
|
||||
};
|
||||
queries::tracks::upsert(conn, active).await?;
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// Run the full indexing pipeline: scan directory, extract metadata, upsert to DB.
|
||||
pub async fn index_directory(
|
||||
conn: &DatabaseConnection,
|
||||
config: &ScanConfig,
|
||||
) -> IndexResult<ScanStats> {
|
||||
tracing::info!(root = %config.root.display(), "starting scan");
|
||||
|
||||
// Phase 1: collect all music files
|
||||
let scan_results = scanner::scan_directory(&config.root);
|
||||
let mut files = Vec::new();
|
||||
let mut stats = ScanStats::default();
|
||||
|
||||
for result in scan_results {
|
||||
match result {
|
||||
Ok(f) => files.push(f),
|
||||
Err(e) => {
|
||||
tracing::warn!("scan error: {e}");
|
||||
stats.files_errored += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
stats.files_found = files.len() as u64;
|
||||
tracing::info!(count = stats.files_found, "found music files");
|
||||
|
||||
// Phase 2: process files with bounded concurrency
|
||||
let semaphore = Arc::new(Semaphore::new(config.concurrency));
|
||||
let stats = Arc::new(Mutex::new(stats));
|
||||
let mut handles = Vec::new();
|
||||
|
||||
for file in files {
|
||||
let permit = semaphore.clone().acquire_owned().await.unwrap();
|
||||
let conn = conn.clone();
|
||||
let stats = stats.clone();
|
||||
let dry_run = config.dry_run;
|
||||
|
||||
handles.push(tokio::spawn(async move {
|
||||
let _permit = permit;
|
||||
match process_file(&conn, &file, dry_run).await {
|
||||
Ok(true) => {
|
||||
let mut s = stats.lock().unwrap();
|
||||
s.files_scanned += 1;
|
||||
s.files_indexed += 1;
|
||||
}
|
||||
Ok(false) => {
|
||||
let mut s = stats.lock().unwrap();
|
||||
s.files_scanned += 1;
|
||||
s.files_skipped += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!(path = %file.path.display(), "indexing error: {e}");
|
||||
let mut s = stats.lock().unwrap();
|
||||
s.files_scanned += 1;
|
||||
s.files_errored += 1;
|
||||
}
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
for handle in handles {
|
||||
handle.await?;
|
||||
}
|
||||
|
||||
let final_stats = Arc::try_unwrap(stats)
|
||||
.expect("all tasks completed")
|
||||
.into_inner()
|
||||
.unwrap();
|
||||
|
||||
tracing::info!(
|
||||
found = final_stats.files_found,
|
||||
indexed = final_stats.files_indexed,
|
||||
skipped = final_stats.files_skipped,
|
||||
errored = final_stats.files_errored,
|
||||
"scan complete"
|
||||
);
|
||||
|
||||
Ok(final_stats)
|
||||
}
|
||||
68
src/lib.rs
Normal file
68
src/lib.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
//! Music file indexing and metadata extraction for Shanty.
|
||||
//!
|
||||
//! Scans a directory tree of music files, extracts embedded metadata (ID3, Vorbis
|
||||
//! comments, MP4 tags, etc.), and upserts everything into the Shanty database.
|
||||
//! Supports incremental re-indexing via file modification time tracking.
|
||||
|
||||
pub mod error;
|
||||
pub mod indexer;
|
||||
pub mod metadata;
|
||||
pub mod scanner;
|
||||
|
||||
pub use error::{IndexError, IndexResult};
|
||||
|
||||
use std::fmt;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use sea_orm::DatabaseConnection;
|
||||
|
||||
/// Configuration for a scan operation.
|
||||
pub struct ScanConfig {
|
||||
/// Root directory to scan for music files.
|
||||
pub root: PathBuf,
|
||||
/// If true, scan and extract metadata but don't write to the database.
|
||||
pub dry_run: bool,
|
||||
/// Maximum number of files to process concurrently.
|
||||
pub concurrency: usize,
|
||||
}
|
||||
|
||||
impl Default for ScanConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
root: PathBuf::new(),
|
||||
dry_run: false,
|
||||
concurrency: 4,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Statistics from a completed scan.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct ScanStats {
|
||||
/// Total music files discovered in the directory tree.
|
||||
pub files_found: u64,
|
||||
/// Files that were processed (scanned for metadata).
|
||||
pub files_scanned: u64,
|
||||
/// Files skipped because their modification time hasn't changed.
|
||||
pub files_skipped: u64,
|
||||
/// Files successfully indexed (upserted to DB or logged in dry-run).
|
||||
pub files_indexed: u64,
|
||||
/// Files that encountered errors during processing.
|
||||
pub files_errored: u64,
|
||||
}
|
||||
|
||||
impl fmt::Display for ScanStats {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"found: {}, indexed: {}, skipped: {}, errors: {}",
|
||||
self.files_found, self.files_indexed, self.files_skipped, self.files_errored,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Main entry point for library consumers. Scans the configured directory
|
||||
/// and indexes all music files into the database.
|
||||
pub async fn run_scan(conn: &DatabaseConnection, config: &ScanConfig) -> IndexResult<ScanStats> {
|
||||
indexer::index_directory(conn, config).await
|
||||
}
|
||||
83
src/main.rs
Normal file
83
src/main.rs
Normal file
@@ -0,0 +1,83 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::Parser;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use shanty_db::Database;
|
||||
use shanty_index::{ScanConfig, run_scan};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "shanty-index", about = "Index music files into the Shanty database")]
|
||||
struct Cli {
|
||||
/// Directory to scan for music files.
|
||||
path: PathBuf,
|
||||
|
||||
/// Database URL. Defaults to sqlite://<XDG_DATA_HOME>/shanty/shanty.db?mode=rwc
|
||||
#[arg(long, env = "SHANTY_DATABASE_URL")]
|
||||
database: Option<String>,
|
||||
|
||||
/// Scan and extract metadata but don't write to the database.
|
||||
#[arg(long)]
|
||||
dry_run: bool,
|
||||
|
||||
/// Increase verbosity (-v info, -vv debug, -vvv trace).
|
||||
#[arg(short, long, action = clap::ArgAction::Count)]
|
||||
verbose: u8,
|
||||
|
||||
/// Number of files to process concurrently.
|
||||
#[arg(long, default_value = "4")]
|
||||
concurrency: usize,
|
||||
}
|
||||
|
||||
fn default_database_url() -> String {
|
||||
let data_dir = dirs::data_dir()
|
||||
.unwrap_or_else(|| PathBuf::from("."))
|
||||
.join("shanty");
|
||||
std::fs::create_dir_all(&data_dir).ok();
|
||||
let db_path = data_dir.join("shanty.db");
|
||||
format!("sqlite://{}?mode=rwc", db_path.display())
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
// Set up tracing
|
||||
let filter = match cli.verbose {
|
||||
0 => "warn",
|
||||
1 => "info,shanty_index=info",
|
||||
2 => "info,shanty_index=debug",
|
||||
_ => "debug,shanty_index=trace",
|
||||
};
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(filter)),
|
||||
)
|
||||
.init();
|
||||
|
||||
// Validate scan path
|
||||
if !cli.path.is_dir() {
|
||||
anyhow::bail!("'{}' is not a directory", cli.path.display());
|
||||
}
|
||||
|
||||
// Connect to database
|
||||
let database_url = cli.database.unwrap_or_else(default_database_url);
|
||||
tracing::info!(url = %database_url, "connecting to database");
|
||||
let db = Database::new(&database_url).await?;
|
||||
|
||||
// Run scan
|
||||
let config = ScanConfig {
|
||||
root: cli.path,
|
||||
dry_run: cli.dry_run,
|
||||
concurrency: cli.concurrency,
|
||||
};
|
||||
|
||||
if config.dry_run {
|
||||
println!("DRY RUN — no changes will be written to the database");
|
||||
}
|
||||
|
||||
let stats = run_scan(db.conn(), &config).await?;
|
||||
println!("\nScan complete: {stats}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
82
src/metadata.rs
Normal file
82
src/metadata.rs
Normal file
@@ -0,0 +1,82 @@
|
||||
use std::path::Path;
|
||||
|
||||
use lofty::config::ParseOptions;
|
||||
use lofty::file::{AudioFile, FileType, TaggedFileExt};
|
||||
use lofty::probe::Probe;
|
||||
use lofty::tag::Accessor;
|
||||
|
||||
use crate::error::IndexResult;
|
||||
|
||||
/// Extracted metadata from a music file. All fields are optional since files
|
||||
/// may have partial or missing tags.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct MusicMetadata {
|
||||
pub title: Option<String>,
|
||||
pub artist: Option<String>,
|
||||
pub album: Option<String>,
|
||||
pub album_artist: Option<String>,
|
||||
pub track_number: Option<i32>,
|
||||
pub disc_number: Option<i32>,
|
||||
pub year: Option<i32>,
|
||||
pub genre: Option<String>,
|
||||
pub duration: Option<f64>,
|
||||
pub codec: Option<String>,
|
||||
pub bitrate: Option<i32>,
|
||||
}
|
||||
|
||||
/// Map lofty FileType to a human-readable codec string.
|
||||
fn file_type_to_codec(ft: FileType) -> &'static str {
|
||||
match ft {
|
||||
FileType::Aac => "AAC",
|
||||
FileType::Aiff => "AIFF",
|
||||
FileType::Ape => "APE",
|
||||
FileType::Flac => "FLAC",
|
||||
FileType::Mpeg => "MP3",
|
||||
FileType::Mp4 => "MP4/AAC",
|
||||
FileType::Mpc => "Musepack",
|
||||
FileType::Opus => "Opus",
|
||||
FileType::Vorbis => "Vorbis",
|
||||
FileType::Speex => "Speex",
|
||||
FileType::Wav => "WAV",
|
||||
FileType::WavPack => "WavPack",
|
||||
_ => "Unknown",
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract metadata from a music file. This is CPU-bound (sync).
|
||||
pub fn extract_metadata(path: &Path) -> IndexResult<MusicMetadata> {
|
||||
let tagged_file = Probe::open(path)?
|
||||
.options(ParseOptions::default())
|
||||
.read()?;
|
||||
|
||||
let mut meta = MusicMetadata::default();
|
||||
|
||||
// Get audio properties (duration, bitrate, codec)
|
||||
let properties = tagged_file.properties();
|
||||
meta.duration = Some(properties.duration().as_secs_f64());
|
||||
meta.bitrate = properties.audio_bitrate().map(|b| b as i32);
|
||||
meta.codec = Some(file_type_to_codec(tagged_file.file_type()).to_string());
|
||||
|
||||
// Get tag metadata — try primary tag first, fall back to first available
|
||||
let tag = tagged_file
|
||||
.primary_tag()
|
||||
.or_else(|| tagged_file.first_tag());
|
||||
|
||||
if let Some(tag) = tag {
|
||||
meta.title = tag.title().map(|s| s.to_string());
|
||||
meta.artist = tag.artist().map(|s| s.to_string());
|
||||
meta.album = tag.album().map(|s| s.to_string());
|
||||
meta.genre = tag.genre().map(|s| s.to_string());
|
||||
meta.track_number = tag.track().map(|n| n as i32);
|
||||
meta.disc_number = tag.disk().map(|n| n as i32);
|
||||
meta.year = tag.year().map(|n| n as i32);
|
||||
|
||||
// Album artist is not in the Accessor trait — check tag items directly
|
||||
// Common keys: "ALBUMARTIST", "ALBUM ARTIST", "TPE2" (ID3v2)
|
||||
meta.album_artist = tag
|
||||
.get_string(&lofty::tag::ItemKey::AlbumArtist)
|
||||
.map(|s| s.to_string());
|
||||
}
|
||||
|
||||
Ok(meta)
|
||||
}
|
||||
126
src/scanner.rs
Normal file
126
src/scanner.rs
Normal file
@@ -0,0 +1,126 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
||||
use chrono::NaiveDateTime;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
/// Supported music file extensions (lowercase).
|
||||
pub const MUSIC_EXTENSIONS: &[&str] = &[
|
||||
"mp3", "flac", "ogg", "opus", "m4a", "wav", "wma", "aac", "alac",
|
||||
];
|
||||
|
||||
/// A discovered music file with filesystem metadata.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ScannedFile {
|
||||
pub path: PathBuf,
|
||||
pub file_size: i64,
|
||||
pub mtime: NaiveDateTime,
|
||||
}
|
||||
|
||||
/// Recursively scan `root` for music files, yielding `ScannedFile` entries.
|
||||
pub fn scan_directory(root: &Path) -> Vec<Result<ScannedFile, walkdir::Error>> {
|
||||
WalkDir::new(root)
|
||||
.follow_links(true)
|
||||
.into_iter()
|
||||
.filter_map(|entry| {
|
||||
match entry {
|
||||
Ok(e) => {
|
||||
if !e.file_type().is_file() {
|
||||
return None;
|
||||
}
|
||||
let path = e.path();
|
||||
let ext = path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.map(|e| e.to_lowercase());
|
||||
let ext = match ext {
|
||||
Some(e) => e,
|
||||
None => return None,
|
||||
};
|
||||
if !MUSIC_EXTENSIONS.contains(&ext.as_str()) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Read filesystem metadata
|
||||
match std::fs::metadata(path) {
|
||||
Ok(meta) => {
|
||||
let file_size = meta.len() as i64;
|
||||
let mtime = meta
|
||||
.modified()
|
||||
.ok()
|
||||
.and_then(|t| t.duration_since(UNIX_EPOCH).ok())
|
||||
.and_then(|d| {
|
||||
chrono::DateTime::from_timestamp(d.as_secs() as i64, 0)
|
||||
.map(|dt| dt.naive_utc())
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
Some(Ok(ScannedFile {
|
||||
path: path.to_owned(),
|
||||
file_size,
|
||||
mtime,
|
||||
}))
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(path = %path.display(), "failed to read metadata: {e}");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => Some(Err(e)),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn test_scan_finds_music_files() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join("song.mp3"), b"fake mp3").unwrap();
|
||||
fs::write(dir.path().join("song.flac"), b"fake flac").unwrap();
|
||||
fs::write(dir.path().join("readme.txt"), b"not music").unwrap();
|
||||
fs::create_dir_all(dir.path().join("subdir")).unwrap();
|
||||
fs::write(dir.path().join("subdir/deep.ogg"), b"fake ogg").unwrap();
|
||||
|
||||
let results: Vec<_> = scan_directory(dir.path())
|
||||
.into_iter()
|
||||
.filter_map(|r| r.ok())
|
||||
.collect();
|
||||
|
||||
assert_eq!(results.len(), 3);
|
||||
let names: Vec<_> = results
|
||||
.iter()
|
||||
.map(|f| f.path.file_name().unwrap().to_str().unwrap().to_string())
|
||||
.collect();
|
||||
assert!(names.contains(&"song.mp3".to_string()));
|
||||
assert!(names.contains(&"song.flac".to_string()));
|
||||
assert!(names.contains(&"deep.ogg".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scan_case_insensitive_extensions() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join("song.MP3"), b"fake").unwrap();
|
||||
fs::write(dir.path().join("song.Flac"), b"fake").unwrap();
|
||||
|
||||
let results: Vec<_> = scan_directory(dir.path())
|
||||
.into_iter()
|
||||
.filter_map(|r| r.ok())
|
||||
.collect();
|
||||
|
||||
assert_eq!(results.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scan_empty_directory() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let results = scan_directory(dir.path());
|
||||
assert!(results.is_empty());
|
||||
}
|
||||
}
|
||||
191
tests/integration.rs
Normal file
191
tests/integration.rs
Normal file
@@ -0,0 +1,191 @@
|
||||
use std::fs;
|
||||
use std::io::Write;
|
||||
|
||||
use lofty::tag::TagExt;
|
||||
use shanty_db::{Database, queries};
|
||||
use shanty_index::{ScanConfig, run_scan};
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Create a minimal valid MP3 file with ID3v2 tags using lofty.
|
||||
fn create_test_mp3(dir: &std::path::Path, filename: &str, title: &str, artist: &str, album: &str) {
|
||||
use lofty::config::WriteOptions;
|
||||
use lofty::tag::{Accessor, ItemKey, Tag, TagType};
|
||||
|
||||
let path = dir.join(filename);
|
||||
|
||||
// Write a minimal valid MPEG frame (silence, ~0.026s at 128kbps)
|
||||
// MPEG1 Layer 3, 128kbps, 44100Hz, stereo frame header + padding
|
||||
let frame_header: [u8; 4] = [0xFF, 0xFB, 0x90, 0x00];
|
||||
let frame_size = 417; // standard frame size for 128kbps/44100Hz
|
||||
let mut frame_data = vec![0u8; frame_size];
|
||||
frame_data[..4].copy_from_slice(&frame_header);
|
||||
|
||||
// Write a few frames so lofty recognizes it as valid audio
|
||||
let mut file = fs::File::create(&path).unwrap();
|
||||
for _ in 0..10 {
|
||||
file.write_all(&frame_data).unwrap();
|
||||
}
|
||||
drop(file);
|
||||
|
||||
// Now write tags using lofty
|
||||
let mut tag = Tag::new(TagType::Id3v2);
|
||||
tag.set_title(title.to_string());
|
||||
tag.set_artist(artist.to_string());
|
||||
tag.set_album(album.to_string());
|
||||
tag.set_track(1);
|
||||
tag.set_disk(1);
|
||||
tag.set_year(2024);
|
||||
tag.set_genre("Rock".to_string());
|
||||
tag.insert(lofty::tag::TagItem::new(
|
||||
ItemKey::AlbumArtist,
|
||||
lofty::tag::ItemValue::Text(artist.to_string()),
|
||||
));
|
||||
|
||||
tag.save_to_path(&path, WriteOptions::default()).unwrap();
|
||||
}
|
||||
|
||||
async fn test_db() -> Database {
|
||||
Database::new("sqlite::memory:")
|
||||
.await
|
||||
.expect("failed to create test database")
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_scan_indexes_music_files() {
|
||||
let db = test_db().await;
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
create_test_mp3(dir.path(), "song1.mp3", "Time", "Pink Floyd", "DSOTM");
|
||||
create_test_mp3(dir.path(), "song2.mp3", "Money", "Pink Floyd", "DSOTM");
|
||||
|
||||
let config = ScanConfig {
|
||||
root: dir.path().to_owned(),
|
||||
dry_run: false,
|
||||
concurrency: 2,
|
||||
};
|
||||
|
||||
let stats = run_scan(db.conn(), &config).await.unwrap();
|
||||
assert_eq!(stats.files_found, 2);
|
||||
assert_eq!(stats.files_indexed, 2);
|
||||
assert_eq!(stats.files_errored, 0);
|
||||
|
||||
// Verify tracks in DB
|
||||
let tracks = queries::tracks::list(db.conn(), 100, 0).await.unwrap();
|
||||
assert_eq!(tracks.len(), 2);
|
||||
|
||||
// Verify artist was created
|
||||
let artist = queries::artists::find_by_name(db.conn(), "Pink Floyd")
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(artist.is_some());
|
||||
|
||||
// Verify album was created and linked to artist
|
||||
let albums = queries::albums::get_by_artist(db.conn(), artist.unwrap().id)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(albums.len(), 1);
|
||||
assert_eq!(albums[0].name, "DSOTM");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_incremental_scan_skips_unchanged() {
|
||||
let db = test_db().await;
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
create_test_mp3(dir.path(), "song.mp3", "Time", "Pink Floyd", "DSOTM");
|
||||
|
||||
let config = ScanConfig {
|
||||
root: dir.path().to_owned(),
|
||||
dry_run: false,
|
||||
concurrency: 1,
|
||||
};
|
||||
|
||||
// First scan
|
||||
let stats = run_scan(db.conn(), &config).await.unwrap();
|
||||
assert_eq!(stats.files_indexed, 1);
|
||||
assert_eq!(stats.files_skipped, 0);
|
||||
|
||||
// Second scan — should skip since mtime unchanged
|
||||
let stats = run_scan(db.conn(), &config).await.unwrap();
|
||||
assert_eq!(stats.files_indexed, 0);
|
||||
assert_eq!(stats.files_skipped, 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_dry_run_does_not_write() {
|
||||
let db = test_db().await;
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
create_test_mp3(dir.path(), "song.mp3", "Time", "Pink Floyd", "DSOTM");
|
||||
|
||||
let config = ScanConfig {
|
||||
root: dir.path().to_owned(),
|
||||
dry_run: true,
|
||||
concurrency: 1,
|
||||
};
|
||||
|
||||
let stats = run_scan(db.conn(), &config).await.unwrap();
|
||||
assert_eq!(stats.files_found, 1);
|
||||
assert_eq!(stats.files_indexed, 1); // counted as indexed in dry-run
|
||||
|
||||
// But DB should be empty
|
||||
let tracks = queries::tracks::list(db.conn(), 100, 0).await.unwrap();
|
||||
assert!(tracks.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_partial_metadata_still_indexed() {
|
||||
let db = test_db().await;
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
// Create a file with minimal valid audio but no tags
|
||||
let frame_header: [u8; 4] = [0xFF, 0xFB, 0x90, 0x00];
|
||||
let frame_size = 417;
|
||||
let mut frame_data = vec![0u8; frame_size];
|
||||
frame_data[..4].copy_from_slice(&frame_header);
|
||||
|
||||
let path = dir.path().join("untagged.mp3");
|
||||
let mut file = fs::File::create(&path).unwrap();
|
||||
for _ in 0..10 {
|
||||
file.write_all(&frame_data).unwrap();
|
||||
}
|
||||
drop(file);
|
||||
|
||||
let config = ScanConfig {
|
||||
root: dir.path().to_owned(),
|
||||
dry_run: false,
|
||||
concurrency: 1,
|
||||
};
|
||||
|
||||
let stats = run_scan(db.conn(), &config).await.unwrap();
|
||||
assert_eq!(stats.files_found, 1);
|
||||
assert_eq!(stats.files_indexed, 1);
|
||||
|
||||
// Track should exist but with NULL metadata
|
||||
let tracks = queries::tracks::list(db.conn(), 100, 0).await.unwrap();
|
||||
assert_eq!(tracks.len(), 1);
|
||||
assert!(tracks[0].title.is_none());
|
||||
assert!(tracks[0].artist.is_none());
|
||||
// But should still have file-level info
|
||||
assert!(tracks[0].file_size > 0);
|
||||
assert!(tracks[0].codec.is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_non_music_files_ignored() {
|
||||
let db = test_db().await;
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
||||
fs::write(dir.path().join("readme.txt"), b"not music").unwrap();
|
||||
fs::write(dir.path().join("cover.jpg"), b"not music").unwrap();
|
||||
fs::write(dir.path().join("data.json"), b"{}").unwrap();
|
||||
|
||||
let config = ScanConfig {
|
||||
root: dir.path().to_owned(),
|
||||
dry_run: false,
|
||||
concurrency: 1,
|
||||
};
|
||||
|
||||
let stats = run_scan(db.conn(), &config).await.unwrap();
|
||||
assert_eq!(stats.files_found, 0);
|
||||
}
|
||||
Reference in New Issue
Block a user