Added the mb db download. Big upsides and downsides
CI / check (push) Successful in 1m11s
CI / docker (push) Successful in 2m21s

This commit is contained in:
Connor Johnstone
2026-03-21 23:22:49 -04:00
parent 31d54651e6
commit 51f2c2ae8f
9 changed files with 2181 additions and 142 deletions
+142 -4
View File
@@ -1,13 +1,13 @@
use actix_cors::Cors;
use actix_session::{SessionMiddleware, storage::CookieSessionStore};
use actix_web::{App, HttpServer, cookie::Key, web};
use clap::Parser;
use clap::{Parser, Subcommand};
use tracing_actix_web::TracingLogger;
use tracing_subscriber::EnvFilter;
use shanty_config::AppConfig;
use shanty_data::MusicBrainzFetcher;
use shanty_data::WikipediaFetcher;
use shanty_data::{HybridMusicBrainzFetcher, LocalMusicBrainzFetcher, MusicBrainzFetcher};
use shanty_db::Database;
use shanty_search::MusicBrainzSearch;
@@ -29,6 +29,24 @@ struct Cli {
/// Increase verbosity (-v info, -vv debug, -vvv trace).
#[arg(short, long, action = clap::ArgAction::Count)]
verbose: u8,
#[command(subcommand)]
command: Option<Commands>,
}
#[derive(Subcommand)]
enum Commands {
/// Import MusicBrainz JSON data dumps into local SQLite database.
MbImport {
/// Download fresh dump files from metabrainz.org before importing.
#[arg(long)]
download: bool,
/// Directory containing (or to download) dump files.
/// Defaults to the application data directory.
#[arg(long)]
data_dir: Option<String>,
},
}
#[actix_web::main]
@@ -54,11 +72,36 @@ async fn main() -> anyhow::Result<()> {
config.web.port = port;
}
// Handle subcommands
if let Some(Commands::MbImport { download, data_dir }) = cli.command {
return run_mb_import(&config, download, data_dir.as_deref()).await;
}
tracing::info!(url = %config.database_url, "connecting to database");
let db = Database::new(&config.database_url).await?;
let mb_client = MusicBrainzFetcher::new()?;
let search = MusicBrainzSearch::with_limiter(mb_client.limiter())?;
let mb_remote = MusicBrainzFetcher::new()?;
let search = MusicBrainzSearch::with_limiter(mb_remote.limiter())?;
// Set up local MB database if configured
let local_mb = create_local_mb_fetcher(&config);
let mb_client = HybridMusicBrainzFetcher::new(local_mb, mb_remote);
if mb_client.has_local_db()
&& let Some(stats) = mb_client.local_stats()
{
tracing::info!(
artists = stats.artists,
release_groups = stats.release_groups,
releases = stats.releases,
recordings = stats.recordings,
last_import = ?stats.last_import_date,
"local MusicBrainz database loaded"
);
} else if !mb_client.has_local_db() {
tracing::info!("no local MusicBrainz database — using API only");
}
let wiki_fetcher = WikipediaFetcher::new()?;
let bind = format!("{}:{}", config.web.bind, config.web.port);
@@ -88,6 +131,7 @@ async fn main() -> anyhow::Result<()> {
// Start pipeline and monitor schedulers
shanty_web::pipeline_scheduler::spawn(state.clone());
shanty_web::monitor::spawn(state.clone());
shanty_web::mb_update::spawn(state.clone());
// Resolve static files directory
let static_dir = std::env::current_exe()
@@ -168,3 +212,97 @@ async fn main() -> anyhow::Result<()> {
tracing::info!("server stopped");
Ok(())
}
/// Create a LocalMusicBrainzFetcher from config if available.
fn create_local_mb_fetcher(config: &AppConfig) -> Option<LocalMusicBrainzFetcher> {
let db_path = config
.musicbrainz
.local_db_path
.as_ref()
.map(|p| p.to_string_lossy().to_string())
.or_else(|| {
// Default location: data_dir/shanty-mb.db (only if it exists)
let default_path = shanty_config::data_dir().join("shanty-mb.db");
if default_path.exists() {
Some(default_path.to_string_lossy().to_string())
} else {
None
}
})?;
match LocalMusicBrainzFetcher::new(&db_path) {
Ok(fetcher) => {
if fetcher.is_available() {
tracing::info!(path = %db_path, "opened local MusicBrainz database");
Some(fetcher)
} else {
tracing::debug!(path = %db_path, "local MB database exists but has no data");
None
}
}
Err(e) => {
tracing::warn!(path = %db_path, error = %e, "failed to open local MB database");
None
}
}
}
/// Run the `mb-import` subcommand.
async fn run_mb_import(
config: &AppConfig,
download: bool,
data_dir_override: Option<&str>,
) -> anyhow::Result<()> {
let data_dir = data_dir_override
.map(std::path::PathBuf::from)
.unwrap_or_else(|| shanty_config::data_dir().join("mb-dumps"));
let db_path = config
.musicbrainz
.local_db_path
.clone()
.unwrap_or_else(|| shanty_config::data_dir().join("shanty-mb.db"));
tracing::info!(
dump_dir = %data_dir.display(),
db_path = %db_path.display(),
download = download,
"starting MusicBrainz import"
);
// Download dumps if requested
if download {
std::fs::create_dir_all(&data_dir)?;
let timestamp = shanty_data::mb_import::discover_latest_dump_folder()
.await
.map_err(|e| anyhow::anyhow!("failed to discover latest dump: {e}"))?;
tracing::info!(timestamp = %timestamp, "using dump folder");
for filename in shanty_data::mb_import::DUMP_FILES {
shanty_data::mb_import::download_dump(filename, &timestamp, &data_dir, |msg| {
tracing::info!("{msg}");
})
.await
.map_err(|e| anyhow::anyhow!("{e}"))?;
}
}
// Ensure the data directory exists and has at least one dump file
if !data_dir.exists() {
anyhow::bail!(
"dump directory {} does not exist. Use --download to fetch dumps, or provide --data-dir pointing to existing dump files.",
data_dir.display()
);
}
// Run import (handles opening the database internally)
let stats = shanty_data::mb_import::run_import_at_path(&db_path, &data_dir, |msg| {
tracing::info!("{msg}");
})
.map_err(|e| anyhow::anyhow!("{e}"))?;
tracing::info!(%stats, db_path = %db_path.display(), "MusicBrainz import complete");
println!("{stats}");
println!("Database: {}", db_path.display());
Ok(())
}