Sped up artist enrichment at least somewhat
All checks were successful
CI / check (push) Successful in 1m30s
CI / docker (push) Successful in 2m7s

This commit is contained in:
Connor Johnstone
2026-03-21 15:08:28 -04:00
parent 43f4dad038
commit 31d54651e6
5 changed files with 74 additions and 6 deletions

View File

@@ -1,4 +1,9 @@
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Instant;
use serde::Deserialize;
use tokio::sync::Mutex;
use tokio::time::Duration;
use crate::error::{DataError, DataResult};
@@ -12,11 +17,21 @@ use crate::types::{
const BASE_URL: &str = "https://musicbrainz.org/ws/2";
const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
const RATE_LIMIT: Duration = Duration::from_millis(1100); // slightly over 1s to be safe
/// How long cached responses stay valid (1 hour).
const RESPONSE_CACHE_TTL: Duration = Duration::from_secs(3600);
/// MusicBrainz API client with rate limiting.
/// A cached HTTP response with a timestamp for expiry.
struct CachedResponse {
body: String,
fetched_at: Instant,
}
/// MusicBrainz API client with rate limiting and in-memory response caching.
pub struct MusicBrainzFetcher {
client: reqwest::Client,
limiter: RateLimiter,
/// In-memory cache: URL -> raw JSON response body. Entries expire after RESPONSE_CACHE_TTL.
response_cache: Arc<Mutex<HashMap<String, CachedResponse>>>,
}
impl MusicBrainzFetcher {
@@ -30,7 +45,11 @@ impl MusicBrainzFetcher {
.user_agent(USER_AGENT)
.timeout(Duration::from_secs(30))
.build()?;
Ok(Self { client, limiter })
Ok(Self {
client,
limiter,
response_cache: Arc::new(Mutex::new(HashMap::new())),
})
}
/// Get a clone of the rate limiter for sharing with other MB clients.
@@ -39,6 +58,30 @@ impl MusicBrainzFetcher {
}
async fn get_json<T: serde::de::DeserializeOwned>(&self, url: &str) -> DataResult<T> {
// Check in-memory cache first
{
let mut cache = self.response_cache.lock().await;
if let Some(entry) = cache.get(url) {
if entry.fetched_at.elapsed() < RESPONSE_CACHE_TTL {
tracing::debug!(url = url, "MusicBrainz cache hit");
return Ok(serde_json::from_str(&entry.body)?);
} else {
// Expired — remove it
cache.remove(url);
}
}
// Also purge any other expired entries opportunistically (limit to avoid holding lock too long)
let expired: Vec<String> = cache
.iter()
.filter(|(_, v)| v.fetched_at.elapsed() >= RESPONSE_CACHE_TTL)
.map(|(k, _)| k.clone())
.take(50)
.collect();
for k in expired {
cache.remove(&k);
}
}
self.limiter.wait().await;
tracing::debug!(url = url, "MusicBrainz request");
let resp = self.client.get(url).send().await?;
@@ -49,7 +92,21 @@ impl MusicBrainzFetcher {
"MusicBrainz API error {status}: {body}"
)));
}
Ok(resp.json().await?)
let body = resp.text().await?;
// Store in cache
{
let mut cache = self.response_cache.lock().await;
cache.insert(
url.to_string(),
CachedResponse {
body: body.clone(),
fetched_at: Instant::now(),
},
);
}
Ok(serde_json::from_str(&body)?)
}
/// Look up an artist directly by MBID. Returns (name, disambiguation).
@@ -310,6 +367,11 @@ impl MetadataFetcher for MusicBrainzFetcher {
})
.collect())
}
async fn resolve_release_from_group(&self, release_group_mbid: &str) -> DataResult<String> {
// Delegate to the inherent method
MusicBrainzFetcher::resolve_release_from_group(self, release_group_mbid).await
}
}
/// Extract the primary artist from MusicBrainz artist credits.

View File

@@ -48,6 +48,12 @@ pub trait MetadataFetcher: Send + Sync {
&self,
artist_mbid: &str,
) -> impl Future<Output = DataResult<Vec<ReleaseGroupEntry>>> + Send;
/// Resolve a release-group MBID to a concrete release MBID (first release in the group).
fn resolve_release_from_group(
&self,
release_group_mbid: &str,
) -> impl Future<Output = DataResult<String>> + Send;
}
/// Fetches artist image URLs from an external source.