Compare commits

...

1 Commits

Author SHA1 Message Date
Connor Johnstone
3dba620c9b Added the mb db download. Big upsides and downsides 2026-03-21 23:22:49 -04:00
8 changed files with 420 additions and 6 deletions

View File

@@ -374,3 +374,13 @@ pub async fn ytauth_refresh() -> Result<serde_json::Value, ApiError> {
pub async fn ytauth_clear_cookies() -> Result<(), ApiError> { pub async fn ytauth_clear_cookies() -> Result<(), ApiError> {
delete(&format!("{BASE}/ytauth/cookies")).await delete(&format!("{BASE}/ytauth/cookies")).await
} }
// --- MusicBrainz Local DB ---
pub async fn get_mb_status() -> Result<MbStatus, ApiError> {
get_json(&format!("{BASE}/mb-status")).await
}
pub async fn trigger_mb_import() -> Result<TaskRef, ApiError> {
post_empty(&format!("{BASE}/mb-import")).await
}

View File

@@ -3,7 +3,7 @@ use web_sys::HtmlSelectElement;
use yew::prelude::*; use yew::prelude::*;
use crate::api; use crate::api;
use crate::types::{AppConfig, SubsonicPasswordStatus, YtAuthStatus}; use crate::types::{AppConfig, MbStatus, SubsonicPasswordStatus, YtAuthStatus};
#[function_component(SettingsPage)] #[function_component(SettingsPage)]
pub fn settings_page() -> Html { pub fn settings_page() -> Html {
@@ -15,12 +15,15 @@ pub fn settings_page() -> Html {
let subsonic_status = use_state(|| None::<SubsonicPasswordStatus>); let subsonic_status = use_state(|| None::<SubsonicPasswordStatus>);
let subsonic_password = use_state(String::new); let subsonic_password = use_state(String::new);
let subsonic_saving = use_state(|| false); let subsonic_saving = use_state(|| false);
let mb_status = use_state(|| None::<MbStatus>);
let mb_importing = use_state(|| false);
{ {
let config = config.clone(); let config = config.clone();
let error = error.clone(); let error = error.clone();
let ytauth = ytauth.clone(); let ytauth = ytauth.clone();
let subsonic_status = subsonic_status.clone(); let subsonic_status = subsonic_status.clone();
let mb_status = mb_status.clone();
use_effect_with((), move |_| { use_effect_with((), move |_| {
wasm_bindgen_futures::spawn_local(async move { wasm_bindgen_futures::spawn_local(async move {
match api::get_config().await { match api::get_config().await {
@@ -38,6 +41,11 @@ pub fn settings_page() -> Html {
subsonic_status.set(Some(status)); subsonic_status.set(Some(status));
} }
}); });
wasm_bindgen_futures::spawn_local(async move {
if let Ok(status) = api::get_mb_status().await {
mb_status.set(Some(status));
}
});
}); });
} }
@@ -596,6 +604,92 @@ pub fn settings_page() -> Html {
</button> </button>
</div> </div>
// MusicBrainz Local Database
<div class="card">
<h3>{ "MusicBrainz Database" }</h3>
<p class="text-sm text-muted mb-1">
{ "Import the MusicBrainz database locally for instant artist/album lookups instead of rate-limited API calls. " }
{ "Makes browsing and watching artists dramatically faster." }
</p>
<div class="card" style="border-color: var(--warning); background: rgba(234, 179, 8, 0.08); margin: 0.5rem 0;">
<p class="text-sm" style="margin:0;">
<strong style="color: var(--warning);">{ "Heads up: " }</strong>
{ "This downloads ~24 GB of data and builds a ~10 GB local database. " }
{ "The initial import can take 3-6 hours depending on your hardware. " }
{ "Total disk usage: ~35 GB (downloads + database). " }
{ "After the initial import, the database is automatically refreshed weekly to stay current." }
</p>
</div>
{
if let Some(ref status) = *mb_status {
if status.has_local_db {
if let Some(ref stats) = status.stats {
let import_date = stats.last_import_date.clone().unwrap_or_else(|| "unknown".into());
html! {
<>
<p>
<span class="badge badge-success">{ "Loaded" }</span>
<span class="text-muted text-sm" style="margin-left: 0.5rem;">
{ format!("imported {}", import_date) }
</span>
</p>
<p class="text-sm">
{ format!("{} artists, {} release groups, {} releases, {} recordings",
stats.artists, stats.release_groups, stats.releases, stats.recordings) }
</p>
</>
}
} else {
html! {
<p><span class="badge badge-success">{ "Loaded" }</span></p>
}
}
} else {
html! {
<p class="text-sm text-muted">
{ "Not configured. Import data to enable instant lookups." }
</p>
}
}
} else {
html! { <p class="text-sm text-muted">{ "Loading..." }</p> }
}
}
<button type="button" class="btn btn-primary"
disabled={*mb_importing}
onclick={{
let mb_importing = mb_importing.clone();
let mb_status = mb_status.clone();
let message = message.clone();
let error = error.clone();
Callback::from(move |_: MouseEvent| {
let mb_importing = mb_importing.clone();
let mb_status = mb_status.clone();
let message = message.clone();
let error = error.clone();
mb_importing.set(true);
wasm_bindgen_futures::spawn_local(async move {
match api::trigger_mb_import().await {
Ok(task_ref) => {
message.set(Some(format!(
"MusicBrainz import started (task {}). This will take a while.",
task_ref.task_id
)));
// Refresh status after a short delay
if let Ok(s) = api::get_mb_status().await {
mb_status.set(Some(s));
}
}
Err(e) => error.set(Some(e.0)),
}
mb_importing.set(false);
});
})
}}>
{ if *mb_importing { "Starting import..." } else { "Import MusicBrainz Data" } }
</button>
</div>
// Metadata Providers // Metadata Providers
<div class="card"> <div class="card">
<h3>{ "Metadata Providers" }</h3> <h3>{ "Metadata Providers" }</h3>

View File

@@ -400,6 +400,8 @@ pub struct AppConfig {
pub metadata: MetadataConfigFe, pub metadata: MetadataConfigFe,
#[serde(default)] #[serde(default)]
pub scheduling: SchedulingConfigFe, pub scheduling: SchedulingConfigFe,
#[serde(default)]
pub musicbrainz: MusicBrainzConfigFe,
} }
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)] #[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
@@ -513,3 +515,36 @@ fn default_lyrics_source() -> String {
fn default_cover_art_source() -> String { fn default_cover_art_source() -> String {
"coverartarchive".into() "coverartarchive".into()
} }
// --- MusicBrainz local DB ---
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
pub struct MusicBrainzConfigFe {
#[serde(default)]
pub local_db_path: Option<String>,
#[serde(default)]
pub auto_update: bool,
}
#[derive(Debug, Clone, PartialEq, Deserialize)]
pub struct MbStatus {
pub has_local_db: bool,
#[serde(default)]
pub stats: Option<MbLocalStats>,
}
#[derive(Debug, Clone, PartialEq, Deserialize)]
pub struct MbLocalStats {
#[serde(default)]
pub artists: u64,
#[serde(default)]
pub release_groups: u64,
#[serde(default)]
pub releases: u64,
#[serde(default)]
pub recordings: u64,
#[serde(default)]
pub tracks: u64,
#[serde(default)]
pub last_import_date: Option<String>,
}

View File

@@ -8,6 +8,7 @@ pub mod auth;
pub mod config; pub mod config;
pub mod cookie_refresh; pub mod cookie_refresh;
pub mod error; pub mod error;
pub mod mb_update;
pub mod monitor; pub mod monitor;
pub mod pipeline; pub mod pipeline;
pub mod pipeline_scheduler; pub mod pipeline_scheduler;

View File

@@ -5,8 +5,8 @@ use clap::Parser;
use tracing_actix_web::TracingLogger; use tracing_actix_web::TracingLogger;
use tracing_subscriber::EnvFilter; use tracing_subscriber::EnvFilter;
use shanty_data::MusicBrainzFetcher;
use shanty_data::WikipediaFetcher; use shanty_data::WikipediaFetcher;
use shanty_data::{HybridMusicBrainzFetcher, LocalMusicBrainzFetcher, MusicBrainzFetcher};
use shanty_db::Database; use shanty_db::Database;
use shanty_search::MusicBrainzSearch; use shanty_search::MusicBrainzSearch;
@@ -54,8 +54,23 @@ async fn main() -> anyhow::Result<()> {
tracing::info!(url = %config.database_url, "connecting to database"); tracing::info!(url = %config.database_url, "connecting to database");
let db = Database::new(&config.database_url).await?; let db = Database::new(&config.database_url).await?;
let mb_client = MusicBrainzFetcher::new()?; let mb_remote = MusicBrainzFetcher::new()?;
let search = MusicBrainzSearch::with_limiter(mb_client.limiter())?; let search = MusicBrainzSearch::with_limiter(mb_remote.limiter())?;
// Set up local MB database if configured
let local_mb = create_local_mb_fetcher(&config);
let mb_client = HybridMusicBrainzFetcher::new(local_mb, mb_remote);
if mb_client.has_local_db()
&& let Some(stats) = mb_client.local_stats()
{
tracing::info!(
artists = stats.artists,
release_groups = stats.release_groups,
"local MusicBrainz database loaded"
);
}
let wiki_fetcher = WikipediaFetcher::new()?; let wiki_fetcher = WikipediaFetcher::new()?;
let bind = format!("{}:{}", config.web.bind, config.web.port); let bind = format!("{}:{}", config.web.bind, config.web.port);
@@ -85,6 +100,7 @@ async fn main() -> anyhow::Result<()> {
// Start pipeline and monitor schedulers // Start pipeline and monitor schedulers
shanty_web::pipeline_scheduler::spawn(state.clone()); shanty_web::pipeline_scheduler::spawn(state.clone());
shanty_web::monitor::spawn(state.clone()); shanty_web::monitor::spawn(state.clone());
shanty_web::mb_update::spawn(state.clone());
// Resolve static files directory relative to the binary location // Resolve static files directory relative to the binary location
let static_dir = std::env::current_exe() let static_dir = std::env::current_exe()
@@ -157,3 +173,36 @@ async fn main() -> anyhow::Result<()> {
Ok(()) Ok(())
} }
/// Create a LocalMusicBrainzFetcher from config if available.
fn create_local_mb_fetcher(config: &AppConfig) -> Option<LocalMusicBrainzFetcher> {
let db_path = config
.musicbrainz
.local_db_path
.as_ref()
.map(|p| p.to_string_lossy().to_string())
.or_else(|| {
let default_path = shanty_config::data_dir().join("shanty-mb.db");
if default_path.exists() {
Some(default_path.to_string_lossy().to_string())
} else {
None
}
})?;
match LocalMusicBrainzFetcher::new(&db_path) {
Ok(fetcher) => {
if fetcher.is_available() {
tracing::info!(path = %db_path, "opened local MusicBrainz database");
Some(fetcher)
} else {
tracing::debug!(path = %db_path, "local MB database exists but has no data");
None
}
}
Err(e) => {
tracing::warn!(path = %db_path, error = %e, "failed to open local MB database");
None
}
}
}

120
src/mb_update.rs Normal file
View File

@@ -0,0 +1,120 @@
//! Background task that periodically re-imports the MusicBrainz database.
use std::time::Duration;
use actix_web::web;
use crate::state::AppState;
/// Spawn the weekly MB database update loop.
///
/// Only runs if a local MB database exists (meaning the user has done an initial import).
/// Downloads fresh dumps and re-imports weekly.
pub fn spawn(state: web::Data<AppState>) {
tokio::spawn(async move {
// Wait 1 hour after startup before first check
tokio::time::sleep(Duration::from_secs(3600)).await;
loop {
// Check if local DB exists and auto-update is desired
let has_local = state.mb_client.has_local_db();
if !has_local {
// No local DB — sleep a day and check again
tokio::time::sleep(Duration::from_secs(86400)).await;
continue;
}
// Check how old the import is
let needs_update = state
.mb_client
.local_stats()
.and_then(|s| s.last_import_date)
.map(|date| {
// Parse the date and check if it's older than 7 days
chrono::NaiveDate::parse_from_str(&date, "%Y-%m-%d")
.map(|d| {
let age = chrono::Utc::now().naive_utc().date() - d;
age.num_days() >= 7
})
.unwrap_or(true) // If we can't parse the date, update
})
.unwrap_or(false); // No stats = no local DB = skip
if !needs_update {
// Check again in 24 hours
tokio::time::sleep(Duration::from_secs(86400)).await;
continue;
}
tracing::info!("starting weekly MusicBrainz database update");
let data_dir = shanty_config::data_dir().join("mb-dumps");
let db_path = state
.config
.read()
.await
.musicbrainz
.local_db_path
.clone()
.unwrap_or_else(|| shanty_config::data_dir().join("shanty-mb.db"));
// Download fresh dumps
if let Err(e) = std::fs::create_dir_all(&data_dir) {
tracing::error!(error = %e, "failed to create dump dir for MB update");
tokio::time::sleep(Duration::from_secs(86400)).await;
continue;
}
let timestamp = match shanty_data::mb_import::discover_latest_dump_folder().await {
Ok(t) => t,
Err(e) => {
tracing::error!(error = %e, "failed to discover latest MB dump");
tokio::time::sleep(Duration::from_secs(86400)).await;
continue;
}
};
let mut download_failed = false;
for filename in shanty_data::mb_import::DUMP_FILES {
if let Err(e) =
shanty_data::mb_import::download_dump(filename, &timestamp, &data_dir, |msg| {
tracing::info!("{msg}");
})
.await
{
tracing::error!(file = filename, error = %e, "MB dump download failed");
download_failed = true;
break;
}
}
if download_failed {
tokio::time::sleep(Duration::from_secs(86400)).await;
continue;
}
// Run import in blocking task
let result = tokio::task::spawn_blocking(move || {
shanty_data::mb_import::run_import_at_path(&db_path, &data_dir, |msg| {
tracing::info!("{msg}");
})
})
.await;
match result {
Ok(Ok(stats)) => {
tracing::info!(%stats, "weekly MusicBrainz update complete");
}
Ok(Err(e)) => {
tracing::error!(error = %e, "weekly MusicBrainz import failed");
}
Err(e) => {
tracing::error!(error = %e, "weekly MusicBrainz import task panicked");
}
}
// Sleep 7 days before next check
tokio::time::sleep(Duration::from_secs(7 * 86400)).await;
}
});
}

View File

@@ -24,6 +24,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/monitor/status").route(web::get().to(get_monitor_status))) .service(web::resource("/monitor/status").route(web::get().to(get_monitor_status)))
.service(web::resource("/scheduler/skip-pipeline").route(web::post().to(skip_pipeline))) .service(web::resource("/scheduler/skip-pipeline").route(web::post().to(skip_pipeline)))
.service(web::resource("/scheduler/skip-monitor").route(web::post().to(skip_monitor))) .service(web::resource("/scheduler/skip-monitor").route(web::post().to(skip_monitor)))
.service(web::resource("/mb-status").route(web::get().to(get_mb_status)))
.service(web::resource("/mb-import").route(web::post().to(trigger_mb_import)))
.service( .service(
web::resource("/config") web::resource("/config")
.route(web::get().to(get_config)) .route(web::get().to(get_config))
@@ -327,3 +329,106 @@ async fn skip_monitor(
sched.next_monitor = None; sched.next_monitor = None;
Ok(HttpResponse::Ok().json(serde_json::json!({"status": "skipped"}))) Ok(HttpResponse::Ok().json(serde_json::json!({"status": "skipped"})))
} }
async fn get_mb_status(
state: web::Data<AppState>,
session: Session,
) -> Result<HttpResponse, ApiError> {
auth::require_auth(&session)?;
let has_local = state.mb_client.has_local_db();
let stats = state.mb_client.local_stats();
Ok(HttpResponse::Ok().json(serde_json::json!({
"has_local_db": has_local,
"stats": stats,
})))
}
async fn trigger_mb_import(
state: web::Data<AppState>,
session: Session,
) -> Result<HttpResponse, ApiError> {
auth::require_admin(&session)?;
let task_id = state.tasks.register("mb_import");
let tid = task_id.clone();
let config = state.config.read().await.clone();
tokio::spawn(async move {
state
.tasks
.update_progress(&tid, 0, 0, "Starting MusicBrainz import...");
let data_dir = shanty_config::data_dir().join("mb-dumps");
let db_path = config
.musicbrainz
.local_db_path
.clone()
.unwrap_or_else(|| shanty_config::data_dir().join("shanty-mb.db"));
// Download dumps
state
.tasks
.update_progress(&tid, 0, 4, "Downloading dumps...");
if let Err(e) = std::fs::create_dir_all(&data_dir) {
state
.tasks
.fail(&tid, format!("Failed to create data dir: {e}"));
return;
}
let timestamp = match shanty_data::mb_import::discover_latest_dump_folder().await {
Ok(t) => t,
Err(e) => {
state
.tasks
.fail(&tid, format!("Failed to discover latest dump: {e}"));
return;
}
};
for (i, filename) in shanty_data::mb_import::DUMP_FILES.iter().enumerate() {
state.tasks.update_progress(
&tid,
i as u64,
4 + 4, // 4 downloads + 4 imports
&format!("Downloading {filename}..."),
);
if let Err(e) =
shanty_data::mb_import::download_dump(filename, &timestamp, &data_dir, |_| {}).await
{
state
.tasks
.fail(&tid, format!("Failed to download {filename}: {e}"));
return;
}
}
// Run import
state
.tasks
.update_progress(&tid, 4, 8, "Importing into database...");
let tid_clone = tid.clone();
let state_clone = state.clone();
// Run import in blocking task since rusqlite is sync
let result = tokio::task::spawn_blocking(move || {
shanty_data::mb_import::run_import_at_path(&db_path, &data_dir, |msg| {
state_clone.tasks.update_progress(&tid_clone, 4, 8, msg);
})
})
.await;
match result {
Ok(Ok(stats)) => {
state.tasks.complete(&tid, format!("{stats}"));
}
Ok(Err(e)) => {
state.tasks.fail(&tid, format!("Import failed: {e}"));
}
Err(e) => {
state.tasks.fail(&tid, format!("Import task panicked: {e}"));
}
}
});
Ok(HttpResponse::Accepted().json(serde_json::json!({ "task_id": task_id })))
}

View File

@@ -1,7 +1,7 @@
use std::sync::Arc; use std::sync::Arc;
use tokio::sync::{Mutex, RwLock}; use tokio::sync::{Mutex, RwLock};
use shanty_data::MusicBrainzFetcher; use shanty_data::HybridMusicBrainzFetcher;
use shanty_data::WikipediaFetcher; use shanty_data::WikipediaFetcher;
use shanty_db::Database; use shanty_db::Database;
use shanty_search::MusicBrainzSearch; use shanty_search::MusicBrainzSearch;
@@ -28,7 +28,7 @@ pub struct SchedulerInfo {
pub struct AppState { pub struct AppState {
pub db: Database, pub db: Database,
pub mb_client: MusicBrainzFetcher, pub mb_client: HybridMusicBrainzFetcher,
pub search: MusicBrainzSearch, pub search: MusicBrainzSearch,
pub wiki_fetcher: WikipediaFetcher, pub wiki_fetcher: WikipediaFetcher,
pub config: Arc<RwLock<AppConfig>>, pub config: Arc<RwLock<AppConfig>>,