Added the mb db download. Big upsides and downsides
This commit is contained in:
@@ -8,6 +8,7 @@ pub mod auth;
|
||||
pub mod config;
|
||||
pub mod cookie_refresh;
|
||||
pub mod error;
|
||||
pub mod mb_update;
|
||||
pub mod monitor;
|
||||
pub mod pipeline;
|
||||
pub mod pipeline_scheduler;
|
||||
|
||||
55
src/main.rs
55
src/main.rs
@@ -5,8 +5,8 @@ use clap::Parser;
|
||||
use tracing_actix_web::TracingLogger;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use shanty_data::MusicBrainzFetcher;
|
||||
use shanty_data::WikipediaFetcher;
|
||||
use shanty_data::{HybridMusicBrainzFetcher, LocalMusicBrainzFetcher, MusicBrainzFetcher};
|
||||
use shanty_db::Database;
|
||||
use shanty_search::MusicBrainzSearch;
|
||||
|
||||
@@ -54,8 +54,23 @@ async fn main() -> anyhow::Result<()> {
|
||||
tracing::info!(url = %config.database_url, "connecting to database");
|
||||
let db = Database::new(&config.database_url).await?;
|
||||
|
||||
let mb_client = MusicBrainzFetcher::new()?;
|
||||
let search = MusicBrainzSearch::with_limiter(mb_client.limiter())?;
|
||||
let mb_remote = MusicBrainzFetcher::new()?;
|
||||
let search = MusicBrainzSearch::with_limiter(mb_remote.limiter())?;
|
||||
|
||||
// Set up local MB database if configured
|
||||
let local_mb = create_local_mb_fetcher(&config);
|
||||
let mb_client = HybridMusicBrainzFetcher::new(local_mb, mb_remote);
|
||||
|
||||
if mb_client.has_local_db()
|
||||
&& let Some(stats) = mb_client.local_stats()
|
||||
{
|
||||
tracing::info!(
|
||||
artists = stats.artists,
|
||||
release_groups = stats.release_groups,
|
||||
"local MusicBrainz database loaded"
|
||||
);
|
||||
}
|
||||
|
||||
let wiki_fetcher = WikipediaFetcher::new()?;
|
||||
|
||||
let bind = format!("{}:{}", config.web.bind, config.web.port);
|
||||
@@ -85,6 +100,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
// Start pipeline and monitor schedulers
|
||||
shanty_web::pipeline_scheduler::spawn(state.clone());
|
||||
shanty_web::monitor::spawn(state.clone());
|
||||
shanty_web::mb_update::spawn(state.clone());
|
||||
|
||||
// Resolve static files directory relative to the binary location
|
||||
let static_dir = std::env::current_exe()
|
||||
@@ -157,3 +173,36 @@ async fn main() -> anyhow::Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a LocalMusicBrainzFetcher from config if available.
|
||||
fn create_local_mb_fetcher(config: &AppConfig) -> Option<LocalMusicBrainzFetcher> {
|
||||
let db_path = config
|
||||
.musicbrainz
|
||||
.local_db_path
|
||||
.as_ref()
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.or_else(|| {
|
||||
let default_path = shanty_config::data_dir().join("shanty-mb.db");
|
||||
if default_path.exists() {
|
||||
Some(default_path.to_string_lossy().to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})?;
|
||||
|
||||
match LocalMusicBrainzFetcher::new(&db_path) {
|
||||
Ok(fetcher) => {
|
||||
if fetcher.is_available() {
|
||||
tracing::info!(path = %db_path, "opened local MusicBrainz database");
|
||||
Some(fetcher)
|
||||
} else {
|
||||
tracing::debug!(path = %db_path, "local MB database exists but has no data");
|
||||
None
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(path = %db_path, error = %e, "failed to open local MB database");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
120
src/mb_update.rs
Normal file
120
src/mb_update.rs
Normal file
@@ -0,0 +1,120 @@
|
||||
//! Background task that periodically re-imports the MusicBrainz database.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use actix_web::web;
|
||||
|
||||
use crate::state::AppState;
|
||||
|
||||
/// Spawn the weekly MB database update loop.
|
||||
///
|
||||
/// Only runs if a local MB database exists (meaning the user has done an initial import).
|
||||
/// Downloads fresh dumps and re-imports weekly.
|
||||
pub fn spawn(state: web::Data<AppState>) {
|
||||
tokio::spawn(async move {
|
||||
// Wait 1 hour after startup before first check
|
||||
tokio::time::sleep(Duration::from_secs(3600)).await;
|
||||
|
||||
loop {
|
||||
// Check if local DB exists and auto-update is desired
|
||||
let has_local = state.mb_client.has_local_db();
|
||||
if !has_local {
|
||||
// No local DB — sleep a day and check again
|
||||
tokio::time::sleep(Duration::from_secs(86400)).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check how old the import is
|
||||
let needs_update = state
|
||||
.mb_client
|
||||
.local_stats()
|
||||
.and_then(|s| s.last_import_date)
|
||||
.map(|date| {
|
||||
// Parse the date and check if it's older than 7 days
|
||||
chrono::NaiveDate::parse_from_str(&date, "%Y-%m-%d")
|
||||
.map(|d| {
|
||||
let age = chrono::Utc::now().naive_utc().date() - d;
|
||||
age.num_days() >= 7
|
||||
})
|
||||
.unwrap_or(true) // If we can't parse the date, update
|
||||
})
|
||||
.unwrap_or(false); // No stats = no local DB = skip
|
||||
|
||||
if !needs_update {
|
||||
// Check again in 24 hours
|
||||
tokio::time::sleep(Duration::from_secs(86400)).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
tracing::info!("starting weekly MusicBrainz database update");
|
||||
|
||||
let data_dir = shanty_config::data_dir().join("mb-dumps");
|
||||
let db_path = state
|
||||
.config
|
||||
.read()
|
||||
.await
|
||||
.musicbrainz
|
||||
.local_db_path
|
||||
.clone()
|
||||
.unwrap_or_else(|| shanty_config::data_dir().join("shanty-mb.db"));
|
||||
|
||||
// Download fresh dumps
|
||||
if let Err(e) = std::fs::create_dir_all(&data_dir) {
|
||||
tracing::error!(error = %e, "failed to create dump dir for MB update");
|
||||
tokio::time::sleep(Duration::from_secs(86400)).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
let timestamp = match shanty_data::mb_import::discover_latest_dump_folder().await {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
tracing::error!(error = %e, "failed to discover latest MB dump");
|
||||
tokio::time::sleep(Duration::from_secs(86400)).await;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let mut download_failed = false;
|
||||
for filename in shanty_data::mb_import::DUMP_FILES {
|
||||
if let Err(e) =
|
||||
shanty_data::mb_import::download_dump(filename, ×tamp, &data_dir, |msg| {
|
||||
tracing::info!("{msg}");
|
||||
})
|
||||
.await
|
||||
{
|
||||
tracing::error!(file = filename, error = %e, "MB dump download failed");
|
||||
download_failed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if download_failed {
|
||||
tokio::time::sleep(Duration::from_secs(86400)).await;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Run import in blocking task
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
shanty_data::mb_import::run_import_at_path(&db_path, &data_dir, |msg| {
|
||||
tracing::info!("{msg}");
|
||||
})
|
||||
})
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(Ok(stats)) => {
|
||||
tracing::info!(%stats, "weekly MusicBrainz update complete");
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
tracing::error!(error = %e, "weekly MusicBrainz import failed");
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!(error = %e, "weekly MusicBrainz import task panicked");
|
||||
}
|
||||
}
|
||||
|
||||
// Sleep 7 days before next check
|
||||
tokio::time::sleep(Duration::from_secs(7 * 86400)).await;
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -24,6 +24,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::resource("/monitor/status").route(web::get().to(get_monitor_status)))
|
||||
.service(web::resource("/scheduler/skip-pipeline").route(web::post().to(skip_pipeline)))
|
||||
.service(web::resource("/scheduler/skip-monitor").route(web::post().to(skip_monitor)))
|
||||
.service(web::resource("/mb-status").route(web::get().to(get_mb_status)))
|
||||
.service(web::resource("/mb-import").route(web::post().to(trigger_mb_import)))
|
||||
.service(
|
||||
web::resource("/config")
|
||||
.route(web::get().to(get_config))
|
||||
@@ -327,3 +329,106 @@ async fn skip_monitor(
|
||||
sched.next_monitor = None;
|
||||
Ok(HttpResponse::Ok().json(serde_json::json!({"status": "skipped"})))
|
||||
}
|
||||
|
||||
async fn get_mb_status(
|
||||
state: web::Data<AppState>,
|
||||
session: Session,
|
||||
) -> Result<HttpResponse, ApiError> {
|
||||
auth::require_auth(&session)?;
|
||||
let has_local = state.mb_client.has_local_db();
|
||||
let stats = state.mb_client.local_stats();
|
||||
Ok(HttpResponse::Ok().json(serde_json::json!({
|
||||
"has_local_db": has_local,
|
||||
"stats": stats,
|
||||
})))
|
||||
}
|
||||
|
||||
async fn trigger_mb_import(
|
||||
state: web::Data<AppState>,
|
||||
session: Session,
|
||||
) -> Result<HttpResponse, ApiError> {
|
||||
auth::require_admin(&session)?;
|
||||
let task_id = state.tasks.register("mb_import");
|
||||
let tid = task_id.clone();
|
||||
let config = state.config.read().await.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
state
|
||||
.tasks
|
||||
.update_progress(&tid, 0, 0, "Starting MusicBrainz import...");
|
||||
|
||||
let data_dir = shanty_config::data_dir().join("mb-dumps");
|
||||
let db_path = config
|
||||
.musicbrainz
|
||||
.local_db_path
|
||||
.clone()
|
||||
.unwrap_or_else(|| shanty_config::data_dir().join("shanty-mb.db"));
|
||||
|
||||
// Download dumps
|
||||
state
|
||||
.tasks
|
||||
.update_progress(&tid, 0, 4, "Downloading dumps...");
|
||||
if let Err(e) = std::fs::create_dir_all(&data_dir) {
|
||||
state
|
||||
.tasks
|
||||
.fail(&tid, format!("Failed to create data dir: {e}"));
|
||||
return;
|
||||
}
|
||||
|
||||
let timestamp = match shanty_data::mb_import::discover_latest_dump_folder().await {
|
||||
Ok(t) => t,
|
||||
Err(e) => {
|
||||
state
|
||||
.tasks
|
||||
.fail(&tid, format!("Failed to discover latest dump: {e}"));
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
for (i, filename) in shanty_data::mb_import::DUMP_FILES.iter().enumerate() {
|
||||
state.tasks.update_progress(
|
||||
&tid,
|
||||
i as u64,
|
||||
4 + 4, // 4 downloads + 4 imports
|
||||
&format!("Downloading {filename}..."),
|
||||
);
|
||||
if let Err(e) =
|
||||
shanty_data::mb_import::download_dump(filename, ×tamp, &data_dir, |_| {}).await
|
||||
{
|
||||
state
|
||||
.tasks
|
||||
.fail(&tid, format!("Failed to download {filename}: {e}"));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Run import
|
||||
state
|
||||
.tasks
|
||||
.update_progress(&tid, 4, 8, "Importing into database...");
|
||||
|
||||
let tid_clone = tid.clone();
|
||||
let state_clone = state.clone();
|
||||
// Run import in blocking task since rusqlite is sync
|
||||
let result = tokio::task::spawn_blocking(move || {
|
||||
shanty_data::mb_import::run_import_at_path(&db_path, &data_dir, |msg| {
|
||||
state_clone.tasks.update_progress(&tid_clone, 4, 8, msg);
|
||||
})
|
||||
})
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(Ok(stats)) => {
|
||||
state.tasks.complete(&tid, format!("{stats}"));
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
state.tasks.fail(&tid, format!("Import failed: {e}"));
|
||||
}
|
||||
Err(e) => {
|
||||
state.tasks.fail(&tid, format!("Import task panicked: {e}"));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(HttpResponse::Accepted().json(serde_json::json!({ "task_id": task_id })))
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
|
||||
use shanty_data::MusicBrainzFetcher;
|
||||
use shanty_data::HybridMusicBrainzFetcher;
|
||||
use shanty_data::WikipediaFetcher;
|
||||
use shanty_db::Database;
|
||||
use shanty_search::MusicBrainzSearch;
|
||||
@@ -28,7 +28,7 @@ pub struct SchedulerInfo {
|
||||
|
||||
pub struct AppState {
|
||||
pub db: Database,
|
||||
pub mb_client: MusicBrainzFetcher,
|
||||
pub mb_client: HybridMusicBrainzFetcher,
|
||||
pub search: MusicBrainzSearch,
|
||||
pub wiki_fetcher: WikipediaFetcher,
|
||||
pub config: Arc<RwLock<AppConfig>>,
|
||||
|
||||
Reference in New Issue
Block a user