diff --git a/shanty-data/src/mb_import.rs b/shanty-data/src/mb_import.rs index b48bfcc..e43741c 100644 --- a/shanty-data/src/mb_import.rs +++ b/shanty-data/src/mb_import.rs @@ -589,6 +589,18 @@ pub async fn download_dump( let url = format!("{DUMP_BASE_URL}{timestamp}/{filename}"); let target_path = target_dir.join(filename); + // Skip if we already have this file from the same dump timestamp + let stamp_path = target_dir.join(format!("{filename}.timestamp")); + if target_path.exists() { + if let Ok(existing_stamp) = std::fs::read_to_string(&stamp_path) { + if existing_stamp.trim() == timestamp { + progress(&format!("Skipping {filename} (already downloaded from {timestamp})")); + tracing::info!(file = %filename, timestamp = %timestamp, "dump file already up to date, skipping download"); + return Ok(target_path); + } + } + } + progress(&format!("Downloading {filename}...")); tracing::info!(url = %url, target = %target_path.display(), "downloading MB dump"); @@ -634,6 +646,9 @@ pub async fn download_dump( "download complete" ); + // Write timestamp marker so we can skip this file on re-runs + let _ = std::fs::write(&stamp_path, timestamp); + Ok(target_path) }