Added the mb db download. Big upsides and downsides

This commit is contained in:
Connor Johnstone
2026-03-21 23:22:49 -04:00
parent 75f3b4f704
commit 3dba620c9b
8 changed files with 420 additions and 6 deletions

View File

@@ -8,6 +8,7 @@ pub mod auth;
pub mod config;
pub mod cookie_refresh;
pub mod error;
pub mod mb_update;
pub mod monitor;
pub mod pipeline;
pub mod pipeline_scheduler;

View File

@@ -5,8 +5,8 @@ use clap::Parser;
use tracing_actix_web::TracingLogger;
use tracing_subscriber::EnvFilter;
use shanty_data::MusicBrainzFetcher;
use shanty_data::WikipediaFetcher;
use shanty_data::{HybridMusicBrainzFetcher, LocalMusicBrainzFetcher, MusicBrainzFetcher};
use shanty_db::Database;
use shanty_search::MusicBrainzSearch;
@@ -54,8 +54,23 @@ async fn main() -> anyhow::Result<()> {
tracing::info!(url = %config.database_url, "connecting to database");
let db = Database::new(&config.database_url).await?;
let mb_client = MusicBrainzFetcher::new()?;
let search = MusicBrainzSearch::with_limiter(mb_client.limiter())?;
let mb_remote = MusicBrainzFetcher::new()?;
let search = MusicBrainzSearch::with_limiter(mb_remote.limiter())?;
// Set up local MB database if configured
let local_mb = create_local_mb_fetcher(&config);
let mb_client = HybridMusicBrainzFetcher::new(local_mb, mb_remote);
if mb_client.has_local_db()
&& let Some(stats) = mb_client.local_stats()
{
tracing::info!(
artists = stats.artists,
release_groups = stats.release_groups,
"local MusicBrainz database loaded"
);
}
let wiki_fetcher = WikipediaFetcher::new()?;
let bind = format!("{}:{}", config.web.bind, config.web.port);
@@ -85,6 +100,7 @@ async fn main() -> anyhow::Result<()> {
// Start pipeline and monitor schedulers
shanty_web::pipeline_scheduler::spawn(state.clone());
shanty_web::monitor::spawn(state.clone());
shanty_web::mb_update::spawn(state.clone());
// Resolve static files directory relative to the binary location
let static_dir = std::env::current_exe()
@@ -157,3 +173,36 @@ async fn main() -> anyhow::Result<()> {
Ok(())
}
/// Create a LocalMusicBrainzFetcher from config if available.
fn create_local_mb_fetcher(config: &AppConfig) -> Option<LocalMusicBrainzFetcher> {
let db_path = config
.musicbrainz
.local_db_path
.as_ref()
.map(|p| p.to_string_lossy().to_string())
.or_else(|| {
let default_path = shanty_config::data_dir().join("shanty-mb.db");
if default_path.exists() {
Some(default_path.to_string_lossy().to_string())
} else {
None
}
})?;
match LocalMusicBrainzFetcher::new(&db_path) {
Ok(fetcher) => {
if fetcher.is_available() {
tracing::info!(path = %db_path, "opened local MusicBrainz database");
Some(fetcher)
} else {
tracing::debug!(path = %db_path, "local MB database exists but has no data");
None
}
}
Err(e) => {
tracing::warn!(path = %db_path, error = %e, "failed to open local MB database");
None
}
}
}

120
src/mb_update.rs Normal file
View File

@@ -0,0 +1,120 @@
//! Background task that periodically re-imports the MusicBrainz database.
use std::time::Duration;
use actix_web::web;
use crate::state::AppState;
/// Spawn the weekly MB database update loop.
///
/// Only runs if a local MB database exists (meaning the user has done an initial import).
/// Downloads fresh dumps and re-imports weekly.
pub fn spawn(state: web::Data<AppState>) {
tokio::spawn(async move {
// Wait 1 hour after startup before first check
tokio::time::sleep(Duration::from_secs(3600)).await;
loop {
// Check if local DB exists and auto-update is desired
let has_local = state.mb_client.has_local_db();
if !has_local {
// No local DB — sleep a day and check again
tokio::time::sleep(Duration::from_secs(86400)).await;
continue;
}
// Check how old the import is
let needs_update = state
.mb_client
.local_stats()
.and_then(|s| s.last_import_date)
.map(|date| {
// Parse the date and check if it's older than 7 days
chrono::NaiveDate::parse_from_str(&date, "%Y-%m-%d")
.map(|d| {
let age = chrono::Utc::now().naive_utc().date() - d;
age.num_days() >= 7
})
.unwrap_or(true) // If we can't parse the date, update
})
.unwrap_or(false); // No stats = no local DB = skip
if !needs_update {
// Check again in 24 hours
tokio::time::sleep(Duration::from_secs(86400)).await;
continue;
}
tracing::info!("starting weekly MusicBrainz database update");
let data_dir = shanty_config::data_dir().join("mb-dumps");
let db_path = state
.config
.read()
.await
.musicbrainz
.local_db_path
.clone()
.unwrap_or_else(|| shanty_config::data_dir().join("shanty-mb.db"));
// Download fresh dumps
if let Err(e) = std::fs::create_dir_all(&data_dir) {
tracing::error!(error = %e, "failed to create dump dir for MB update");
tokio::time::sleep(Duration::from_secs(86400)).await;
continue;
}
let timestamp = match shanty_data::mb_import::discover_latest_dump_folder().await {
Ok(t) => t,
Err(e) => {
tracing::error!(error = %e, "failed to discover latest MB dump");
tokio::time::sleep(Duration::from_secs(86400)).await;
continue;
}
};
let mut download_failed = false;
for filename in shanty_data::mb_import::DUMP_FILES {
if let Err(e) =
shanty_data::mb_import::download_dump(filename, &timestamp, &data_dir, |msg| {
tracing::info!("{msg}");
})
.await
{
tracing::error!(file = filename, error = %e, "MB dump download failed");
download_failed = true;
break;
}
}
if download_failed {
tokio::time::sleep(Duration::from_secs(86400)).await;
continue;
}
// Run import in blocking task
let result = tokio::task::spawn_blocking(move || {
shanty_data::mb_import::run_import_at_path(&db_path, &data_dir, |msg| {
tracing::info!("{msg}");
})
})
.await;
match result {
Ok(Ok(stats)) => {
tracing::info!(%stats, "weekly MusicBrainz update complete");
}
Ok(Err(e)) => {
tracing::error!(error = %e, "weekly MusicBrainz import failed");
}
Err(e) => {
tracing::error!(error = %e, "weekly MusicBrainz import task panicked");
}
}
// Sleep 7 days before next check
tokio::time::sleep(Duration::from_secs(7 * 86400)).await;
}
});
}

View File

@@ -24,6 +24,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/monitor/status").route(web::get().to(get_monitor_status)))
.service(web::resource("/scheduler/skip-pipeline").route(web::post().to(skip_pipeline)))
.service(web::resource("/scheduler/skip-monitor").route(web::post().to(skip_monitor)))
.service(web::resource("/mb-status").route(web::get().to(get_mb_status)))
.service(web::resource("/mb-import").route(web::post().to(trigger_mb_import)))
.service(
web::resource("/config")
.route(web::get().to(get_config))
@@ -327,3 +329,106 @@ async fn skip_monitor(
sched.next_monitor = None;
Ok(HttpResponse::Ok().json(serde_json::json!({"status": "skipped"})))
}
async fn get_mb_status(
state: web::Data<AppState>,
session: Session,
) -> Result<HttpResponse, ApiError> {
auth::require_auth(&session)?;
let has_local = state.mb_client.has_local_db();
let stats = state.mb_client.local_stats();
Ok(HttpResponse::Ok().json(serde_json::json!({
"has_local_db": has_local,
"stats": stats,
})))
}
async fn trigger_mb_import(
state: web::Data<AppState>,
session: Session,
) -> Result<HttpResponse, ApiError> {
auth::require_admin(&session)?;
let task_id = state.tasks.register("mb_import");
let tid = task_id.clone();
let config = state.config.read().await.clone();
tokio::spawn(async move {
state
.tasks
.update_progress(&tid, 0, 0, "Starting MusicBrainz import...");
let data_dir = shanty_config::data_dir().join("mb-dumps");
let db_path = config
.musicbrainz
.local_db_path
.clone()
.unwrap_or_else(|| shanty_config::data_dir().join("shanty-mb.db"));
// Download dumps
state
.tasks
.update_progress(&tid, 0, 4, "Downloading dumps...");
if let Err(e) = std::fs::create_dir_all(&data_dir) {
state
.tasks
.fail(&tid, format!("Failed to create data dir: {e}"));
return;
}
let timestamp = match shanty_data::mb_import::discover_latest_dump_folder().await {
Ok(t) => t,
Err(e) => {
state
.tasks
.fail(&tid, format!("Failed to discover latest dump: {e}"));
return;
}
};
for (i, filename) in shanty_data::mb_import::DUMP_FILES.iter().enumerate() {
state.tasks.update_progress(
&tid,
i as u64,
4 + 4, // 4 downloads + 4 imports
&format!("Downloading {filename}..."),
);
if let Err(e) =
shanty_data::mb_import::download_dump(filename, &timestamp, &data_dir, |_| {}).await
{
state
.tasks
.fail(&tid, format!("Failed to download {filename}: {e}"));
return;
}
}
// Run import
state
.tasks
.update_progress(&tid, 4, 8, "Importing into database...");
let tid_clone = tid.clone();
let state_clone = state.clone();
// Run import in blocking task since rusqlite is sync
let result = tokio::task::spawn_blocking(move || {
shanty_data::mb_import::run_import_at_path(&db_path, &data_dir, |msg| {
state_clone.tasks.update_progress(&tid_clone, 4, 8, msg);
})
})
.await;
match result {
Ok(Ok(stats)) => {
state.tasks.complete(&tid, format!("{stats}"));
}
Ok(Err(e)) => {
state.tasks.fail(&tid, format!("Import failed: {e}"));
}
Err(e) => {
state.tasks.fail(&tid, format!("Import task panicked: {e}"));
}
}
});
Ok(HttpResponse::Accepted().json(serde_json::json!({ "task_id": task_id })))
}

View File

@@ -1,7 +1,7 @@
use std::sync::Arc;
use tokio::sync::{Mutex, RwLock};
use shanty_data::MusicBrainzFetcher;
use shanty_data::HybridMusicBrainzFetcher;
use shanty_data::WikipediaFetcher;
use shanty_db::Database;
use shanty_search::MusicBrainzSearch;
@@ -28,7 +28,7 @@ pub struct SchedulerInfo {
pub struct AppState {
pub db: Database,
pub mb_client: MusicBrainzFetcher,
pub mb_client: HybridMusicBrainzFetcher,
pub search: MusicBrainzSearch,
pub wiki_fetcher: WikipediaFetcher,
pub config: Arc<RwLock<AppConfig>>,