Initial commit
This commit is contained in:
@@ -0,0 +1,30 @@
|
||||
use shanty_db::DbError;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum IndexError {
|
||||
#[error("database error: {0}")]
|
||||
Db(#[from] DbError),
|
||||
|
||||
#[error("I/O error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
|
||||
#[error("metadata error: {0}")]
|
||||
Metadata(String),
|
||||
|
||||
#[error("walkdir error: {0}")]
|
||||
WalkDir(#[from] walkdir::Error),
|
||||
|
||||
#[error("task join error: {0}")]
|
||||
Join(#[from] tokio::task::JoinError),
|
||||
|
||||
#[error("{0}")]
|
||||
Other(String),
|
||||
}
|
||||
|
||||
impl From<lofty::error::LoftyError> for IndexError {
|
||||
fn from(e: lofty::error::LoftyError) -> Self {
|
||||
IndexError::Metadata(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
pub type IndexResult<T> = Result<T, IndexError>;
|
||||
+186
@@ -0,0 +1,186 @@
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use sea_orm::{ActiveValue::Set, DatabaseConnection, NotSet};
|
||||
use tokio::sync::Semaphore;
|
||||
|
||||
use shanty_db::queries;
|
||||
|
||||
use crate::error::IndexResult;
|
||||
use crate::metadata::{self, MusicMetadata};
|
||||
use crate::scanner::{self, ScannedFile};
|
||||
use crate::{ScanConfig, ScanStats};
|
||||
|
||||
/// Process a single file: check mtime, extract metadata, upsert to DB.
|
||||
/// Returns `Ok(true)` if indexed, `Ok(false)` if skipped.
|
||||
async fn process_file(
|
||||
conn: &DatabaseConnection,
|
||||
scanned: &ScannedFile,
|
||||
dry_run: bool,
|
||||
) -> IndexResult<bool> {
|
||||
let file_path_str = scanned.path.to_string_lossy().to_string();
|
||||
|
||||
// Check if file already exists with same mtime
|
||||
if let Some(existing) = queries::tracks::get_by_path(conn, &file_path_str).await? {
|
||||
if let Some(existing_mtime) = existing.file_mtime {
|
||||
// Compare at second granularity
|
||||
if existing_mtime.and_utc().timestamp() == scanned.mtime.and_utc().timestamp() {
|
||||
tracing::debug!(path = %file_path_str, "skipping (mtime unchanged)");
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract metadata (CPU-bound, run in blocking thread)
|
||||
let path = scanned.path.clone();
|
||||
let meta: MusicMetadata = tokio::task::spawn_blocking(move || {
|
||||
metadata::extract_metadata(&path)
|
||||
})
|
||||
.await??;
|
||||
|
||||
tracing::info!(
|
||||
path = %file_path_str,
|
||||
title = ?meta.title,
|
||||
artist = ?meta.artist,
|
||||
album = ?meta.album,
|
||||
"indexed"
|
||||
);
|
||||
|
||||
if dry_run {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
// Upsert artist (use album_artist if available, fall back to artist)
|
||||
let artist_name = meta
|
||||
.album_artist
|
||||
.as_deref()
|
||||
.or(meta.artist.as_deref());
|
||||
let artist_id = match artist_name {
|
||||
Some(name) if !name.is_empty() => {
|
||||
Some(queries::artists::upsert(conn, name, None).await?.id)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
// Upsert album
|
||||
let album_id = match meta.album.as_deref() {
|
||||
Some(album_name) if !album_name.is_empty() => {
|
||||
let album_artist = meta
|
||||
.album_artist
|
||||
.as_deref()
|
||||
.or(meta.artist.as_deref())
|
||||
.unwrap_or("Unknown Artist");
|
||||
Some(
|
||||
queries::albums::upsert(conn, album_name, album_artist, None, artist_id)
|
||||
.await?
|
||||
.id,
|
||||
)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
// Upsert track
|
||||
let active = shanty_db::entities::track::ActiveModel {
|
||||
id: NotSet,
|
||||
file_path: Set(file_path_str),
|
||||
title: Set(meta.title),
|
||||
artist: Set(meta.artist),
|
||||
album: Set(meta.album),
|
||||
album_artist: Set(meta.album_artist),
|
||||
track_number: Set(meta.track_number),
|
||||
disc_number: Set(meta.disc_number),
|
||||
duration: Set(meta.duration),
|
||||
genre: Set(meta.genre),
|
||||
year: Set(meta.year),
|
||||
codec: Set(meta.codec),
|
||||
bitrate: Set(meta.bitrate),
|
||||
file_size: Set(scanned.file_size),
|
||||
fingerprint: NotSet,
|
||||
musicbrainz_id: NotSet,
|
||||
artist_id: Set(artist_id),
|
||||
album_id: Set(album_id),
|
||||
file_mtime: Set(Some(scanned.mtime)),
|
||||
added_at: NotSet,
|
||||
updated_at: NotSet,
|
||||
};
|
||||
queries::tracks::upsert(conn, active).await?;
|
||||
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// Run the full indexing pipeline: scan directory, extract metadata, upsert to DB.
|
||||
pub async fn index_directory(
|
||||
conn: &DatabaseConnection,
|
||||
config: &ScanConfig,
|
||||
) -> IndexResult<ScanStats> {
|
||||
tracing::info!(root = %config.root.display(), "starting scan");
|
||||
|
||||
// Phase 1: collect all music files
|
||||
let scan_results = scanner::scan_directory(&config.root);
|
||||
let mut files = Vec::new();
|
||||
let mut stats = ScanStats::default();
|
||||
|
||||
for result in scan_results {
|
||||
match result {
|
||||
Ok(f) => files.push(f),
|
||||
Err(e) => {
|
||||
tracing::warn!("scan error: {e}");
|
||||
stats.files_errored += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
stats.files_found = files.len() as u64;
|
||||
tracing::info!(count = stats.files_found, "found music files");
|
||||
|
||||
// Phase 2: process files with bounded concurrency
|
||||
let semaphore = Arc::new(Semaphore::new(config.concurrency));
|
||||
let stats = Arc::new(Mutex::new(stats));
|
||||
let mut handles = Vec::new();
|
||||
|
||||
for file in files {
|
||||
let permit = semaphore.clone().acquire_owned().await.unwrap();
|
||||
let conn = conn.clone();
|
||||
let stats = stats.clone();
|
||||
let dry_run = config.dry_run;
|
||||
|
||||
handles.push(tokio::spawn(async move {
|
||||
let _permit = permit;
|
||||
match process_file(&conn, &file, dry_run).await {
|
||||
Ok(true) => {
|
||||
let mut s = stats.lock().unwrap();
|
||||
s.files_scanned += 1;
|
||||
s.files_indexed += 1;
|
||||
}
|
||||
Ok(false) => {
|
||||
let mut s = stats.lock().unwrap();
|
||||
s.files_scanned += 1;
|
||||
s.files_skipped += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!(path = %file.path.display(), "indexing error: {e}");
|
||||
let mut s = stats.lock().unwrap();
|
||||
s.files_scanned += 1;
|
||||
s.files_errored += 1;
|
||||
}
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
for handle in handles {
|
||||
handle.await?;
|
||||
}
|
||||
|
||||
let final_stats = Arc::try_unwrap(stats)
|
||||
.expect("all tasks completed")
|
||||
.into_inner()
|
||||
.unwrap();
|
||||
|
||||
tracing::info!(
|
||||
found = final_stats.files_found,
|
||||
indexed = final_stats.files_indexed,
|
||||
skipped = final_stats.files_skipped,
|
||||
errored = final_stats.files_errored,
|
||||
"scan complete"
|
||||
);
|
||||
|
||||
Ok(final_stats)
|
||||
}
|
||||
+68
@@ -0,0 +1,68 @@
|
||||
//! Music file indexing and metadata extraction for Shanty.
|
||||
//!
|
||||
//! Scans a directory tree of music files, extracts embedded metadata (ID3, Vorbis
|
||||
//! comments, MP4 tags, etc.), and upserts everything into the Shanty database.
|
||||
//! Supports incremental re-indexing via file modification time tracking.
|
||||
|
||||
pub mod error;
|
||||
pub mod indexer;
|
||||
pub mod metadata;
|
||||
pub mod scanner;
|
||||
|
||||
pub use error::{IndexError, IndexResult};
|
||||
|
||||
use std::fmt;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use sea_orm::DatabaseConnection;
|
||||
|
||||
/// Configuration for a scan operation.
|
||||
pub struct ScanConfig {
|
||||
/// Root directory to scan for music files.
|
||||
pub root: PathBuf,
|
||||
/// If true, scan and extract metadata but don't write to the database.
|
||||
pub dry_run: bool,
|
||||
/// Maximum number of files to process concurrently.
|
||||
pub concurrency: usize,
|
||||
}
|
||||
|
||||
impl Default for ScanConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
root: PathBuf::new(),
|
||||
dry_run: false,
|
||||
concurrency: 4,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Statistics from a completed scan.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct ScanStats {
|
||||
/// Total music files discovered in the directory tree.
|
||||
pub files_found: u64,
|
||||
/// Files that were processed (scanned for metadata).
|
||||
pub files_scanned: u64,
|
||||
/// Files skipped because their modification time hasn't changed.
|
||||
pub files_skipped: u64,
|
||||
/// Files successfully indexed (upserted to DB or logged in dry-run).
|
||||
pub files_indexed: u64,
|
||||
/// Files that encountered errors during processing.
|
||||
pub files_errored: u64,
|
||||
}
|
||||
|
||||
impl fmt::Display for ScanStats {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"found: {}, indexed: {}, skipped: {}, errors: {}",
|
||||
self.files_found, self.files_indexed, self.files_skipped, self.files_errored,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Main entry point for library consumers. Scans the configured directory
|
||||
/// and indexes all music files into the database.
|
||||
pub async fn run_scan(conn: &DatabaseConnection, config: &ScanConfig) -> IndexResult<ScanStats> {
|
||||
indexer::index_directory(conn, config).await
|
||||
}
|
||||
+83
@@ -0,0 +1,83 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::Parser;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use shanty_db::Database;
|
||||
use shanty_index::{ScanConfig, run_scan};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(name = "shanty-index", about = "Index music files into the Shanty database")]
|
||||
struct Cli {
|
||||
/// Directory to scan for music files.
|
||||
path: PathBuf,
|
||||
|
||||
/// Database URL. Defaults to sqlite://<XDG_DATA_HOME>/shanty/shanty.db?mode=rwc
|
||||
#[arg(long, env = "SHANTY_DATABASE_URL")]
|
||||
database: Option<String>,
|
||||
|
||||
/// Scan and extract metadata but don't write to the database.
|
||||
#[arg(long)]
|
||||
dry_run: bool,
|
||||
|
||||
/// Increase verbosity (-v info, -vv debug, -vvv trace).
|
||||
#[arg(short, long, action = clap::ArgAction::Count)]
|
||||
verbose: u8,
|
||||
|
||||
/// Number of files to process concurrently.
|
||||
#[arg(long, default_value = "4")]
|
||||
concurrency: usize,
|
||||
}
|
||||
|
||||
fn default_database_url() -> String {
|
||||
let data_dir = dirs::data_dir()
|
||||
.unwrap_or_else(|| PathBuf::from("."))
|
||||
.join("shanty");
|
||||
std::fs::create_dir_all(&data_dir).ok();
|
||||
let db_path = data_dir.join("shanty.db");
|
||||
format!("sqlite://{}?mode=rwc", db_path.display())
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
// Set up tracing
|
||||
let filter = match cli.verbose {
|
||||
0 => "warn",
|
||||
1 => "info,shanty_index=info",
|
||||
2 => "info,shanty_index=debug",
|
||||
_ => "debug,shanty_index=trace",
|
||||
};
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(filter)),
|
||||
)
|
||||
.init();
|
||||
|
||||
// Validate scan path
|
||||
if !cli.path.is_dir() {
|
||||
anyhow::bail!("'{}' is not a directory", cli.path.display());
|
||||
}
|
||||
|
||||
// Connect to database
|
||||
let database_url = cli.database.unwrap_or_else(default_database_url);
|
||||
tracing::info!(url = %database_url, "connecting to database");
|
||||
let db = Database::new(&database_url).await?;
|
||||
|
||||
// Run scan
|
||||
let config = ScanConfig {
|
||||
root: cli.path,
|
||||
dry_run: cli.dry_run,
|
||||
concurrency: cli.concurrency,
|
||||
};
|
||||
|
||||
if config.dry_run {
|
||||
println!("DRY RUN — no changes will be written to the database");
|
||||
}
|
||||
|
||||
let stats = run_scan(db.conn(), &config).await?;
|
||||
println!("\nScan complete: {stats}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
use std::path::Path;
|
||||
|
||||
use lofty::config::ParseOptions;
|
||||
use lofty::file::{AudioFile, FileType, TaggedFileExt};
|
||||
use lofty::probe::Probe;
|
||||
use lofty::tag::Accessor;
|
||||
|
||||
use crate::error::IndexResult;
|
||||
|
||||
/// Extracted metadata from a music file. All fields are optional since files
|
||||
/// may have partial or missing tags.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct MusicMetadata {
|
||||
pub title: Option<String>,
|
||||
pub artist: Option<String>,
|
||||
pub album: Option<String>,
|
||||
pub album_artist: Option<String>,
|
||||
pub track_number: Option<i32>,
|
||||
pub disc_number: Option<i32>,
|
||||
pub year: Option<i32>,
|
||||
pub genre: Option<String>,
|
||||
pub duration: Option<f64>,
|
||||
pub codec: Option<String>,
|
||||
pub bitrate: Option<i32>,
|
||||
}
|
||||
|
||||
/// Map lofty FileType to a human-readable codec string.
|
||||
fn file_type_to_codec(ft: FileType) -> &'static str {
|
||||
match ft {
|
||||
FileType::Aac => "AAC",
|
||||
FileType::Aiff => "AIFF",
|
||||
FileType::Ape => "APE",
|
||||
FileType::Flac => "FLAC",
|
||||
FileType::Mpeg => "MP3",
|
||||
FileType::Mp4 => "MP4/AAC",
|
||||
FileType::Mpc => "Musepack",
|
||||
FileType::Opus => "Opus",
|
||||
FileType::Vorbis => "Vorbis",
|
||||
FileType::Speex => "Speex",
|
||||
FileType::Wav => "WAV",
|
||||
FileType::WavPack => "WavPack",
|
||||
_ => "Unknown",
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract metadata from a music file. This is CPU-bound (sync).
|
||||
pub fn extract_metadata(path: &Path) -> IndexResult<MusicMetadata> {
|
||||
let tagged_file = Probe::open(path)?
|
||||
.options(ParseOptions::default())
|
||||
.read()?;
|
||||
|
||||
let mut meta = MusicMetadata::default();
|
||||
|
||||
// Get audio properties (duration, bitrate, codec)
|
||||
let properties = tagged_file.properties();
|
||||
meta.duration = Some(properties.duration().as_secs_f64());
|
||||
meta.bitrate = properties.audio_bitrate().map(|b| b as i32);
|
||||
meta.codec = Some(file_type_to_codec(tagged_file.file_type()).to_string());
|
||||
|
||||
// Get tag metadata — try primary tag first, fall back to first available
|
||||
let tag = tagged_file
|
||||
.primary_tag()
|
||||
.or_else(|| tagged_file.first_tag());
|
||||
|
||||
if let Some(tag) = tag {
|
||||
meta.title = tag.title().map(|s| s.to_string());
|
||||
meta.artist = tag.artist().map(|s| s.to_string());
|
||||
meta.album = tag.album().map(|s| s.to_string());
|
||||
meta.genre = tag.genre().map(|s| s.to_string());
|
||||
meta.track_number = tag.track().map(|n| n as i32);
|
||||
meta.disc_number = tag.disk().map(|n| n as i32);
|
||||
meta.year = tag.year().map(|n| n as i32);
|
||||
|
||||
// Album artist is not in the Accessor trait — check tag items directly
|
||||
// Common keys: "ALBUMARTIST", "ALBUM ARTIST", "TPE2" (ID3v2)
|
||||
meta.album_artist = tag
|
||||
.get_string(&lofty::tag::ItemKey::AlbumArtist)
|
||||
.map(|s| s.to_string());
|
||||
}
|
||||
|
||||
Ok(meta)
|
||||
}
|
||||
+126
@@ -0,0 +1,126 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::UNIX_EPOCH;
|
||||
|
||||
use chrono::NaiveDateTime;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
/// Supported music file extensions (lowercase).
|
||||
pub const MUSIC_EXTENSIONS: &[&str] = &[
|
||||
"mp3", "flac", "ogg", "opus", "m4a", "wav", "wma", "aac", "alac",
|
||||
];
|
||||
|
||||
/// A discovered music file with filesystem metadata.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ScannedFile {
|
||||
pub path: PathBuf,
|
||||
pub file_size: i64,
|
||||
pub mtime: NaiveDateTime,
|
||||
}
|
||||
|
||||
/// Recursively scan `root` for music files, yielding `ScannedFile` entries.
|
||||
pub fn scan_directory(root: &Path) -> Vec<Result<ScannedFile, walkdir::Error>> {
|
||||
WalkDir::new(root)
|
||||
.follow_links(true)
|
||||
.into_iter()
|
||||
.filter_map(|entry| {
|
||||
match entry {
|
||||
Ok(e) => {
|
||||
if !e.file_type().is_file() {
|
||||
return None;
|
||||
}
|
||||
let path = e.path();
|
||||
let ext = path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.map(|e| e.to_lowercase());
|
||||
let ext = match ext {
|
||||
Some(e) => e,
|
||||
None => return None,
|
||||
};
|
||||
if !MUSIC_EXTENSIONS.contains(&ext.as_str()) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Read filesystem metadata
|
||||
match std::fs::metadata(path) {
|
||||
Ok(meta) => {
|
||||
let file_size = meta.len() as i64;
|
||||
let mtime = meta
|
||||
.modified()
|
||||
.ok()
|
||||
.and_then(|t| t.duration_since(UNIX_EPOCH).ok())
|
||||
.and_then(|d| {
|
||||
chrono::DateTime::from_timestamp(d.as_secs() as i64, 0)
|
||||
.map(|dt| dt.naive_utc())
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
Some(Ok(ScannedFile {
|
||||
path: path.to_owned(),
|
||||
file_size,
|
||||
mtime,
|
||||
}))
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(path = %path.display(), "failed to read metadata: {e}");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => Some(Err(e)),
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[test]
|
||||
fn test_scan_finds_music_files() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join("song.mp3"), b"fake mp3").unwrap();
|
||||
fs::write(dir.path().join("song.flac"), b"fake flac").unwrap();
|
||||
fs::write(dir.path().join("readme.txt"), b"not music").unwrap();
|
||||
fs::create_dir_all(dir.path().join("subdir")).unwrap();
|
||||
fs::write(dir.path().join("subdir/deep.ogg"), b"fake ogg").unwrap();
|
||||
|
||||
let results: Vec<_> = scan_directory(dir.path())
|
||||
.into_iter()
|
||||
.filter_map(|r| r.ok())
|
||||
.collect();
|
||||
|
||||
assert_eq!(results.len(), 3);
|
||||
let names: Vec<_> = results
|
||||
.iter()
|
||||
.map(|f| f.path.file_name().unwrap().to_str().unwrap().to_string())
|
||||
.collect();
|
||||
assert!(names.contains(&"song.mp3".to_string()));
|
||||
assert!(names.contains(&"song.flac".to_string()));
|
||||
assert!(names.contains(&"deep.ogg".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scan_case_insensitive_extensions() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join("song.MP3"), b"fake").unwrap();
|
||||
fs::write(dir.path().join("song.Flac"), b"fake").unwrap();
|
||||
|
||||
let results: Vec<_> = scan_directory(dir.path())
|
||||
.into_iter()
|
||||
.filter_map(|r| r.ok())
|
||||
.collect();
|
||||
|
||||
assert_eq!(results.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scan_empty_directory() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let results = scan_directory(dir.path());
|
||||
assert!(results.is_empty());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user