Sped up artist enrichment at least somewhat
This commit is contained in:
@@ -1,4 +1,9 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
use tokio::sync::Mutex;
|
||||||
use tokio::time::Duration;
|
use tokio::time::Duration;
|
||||||
|
|
||||||
use crate::error::{DataError, DataResult};
|
use crate::error::{DataError, DataResult};
|
||||||
@@ -12,11 +17,21 @@ use crate::types::{
|
|||||||
const BASE_URL: &str = "https://musicbrainz.org/ws/2";
|
const BASE_URL: &str = "https://musicbrainz.org/ws/2";
|
||||||
const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
|
const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
|
||||||
const RATE_LIMIT: Duration = Duration::from_millis(1100); // slightly over 1s to be safe
|
const RATE_LIMIT: Duration = Duration::from_millis(1100); // slightly over 1s to be safe
|
||||||
|
/// How long cached responses stay valid (1 hour).
|
||||||
|
const RESPONSE_CACHE_TTL: Duration = Duration::from_secs(3600);
|
||||||
|
|
||||||
/// MusicBrainz API client with rate limiting.
|
/// A cached HTTP response with a timestamp for expiry.
|
||||||
|
struct CachedResponse {
|
||||||
|
body: String,
|
||||||
|
fetched_at: Instant,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// MusicBrainz API client with rate limiting and in-memory response caching.
|
||||||
pub struct MusicBrainzFetcher {
|
pub struct MusicBrainzFetcher {
|
||||||
client: reqwest::Client,
|
client: reqwest::Client,
|
||||||
limiter: RateLimiter,
|
limiter: RateLimiter,
|
||||||
|
/// In-memory cache: URL -> raw JSON response body. Entries expire after RESPONSE_CACHE_TTL.
|
||||||
|
response_cache: Arc<Mutex<HashMap<String, CachedResponse>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MusicBrainzFetcher {
|
impl MusicBrainzFetcher {
|
||||||
@@ -30,7 +45,11 @@ impl MusicBrainzFetcher {
|
|||||||
.user_agent(USER_AGENT)
|
.user_agent(USER_AGENT)
|
||||||
.timeout(Duration::from_secs(30))
|
.timeout(Duration::from_secs(30))
|
||||||
.build()?;
|
.build()?;
|
||||||
Ok(Self { client, limiter })
|
Ok(Self {
|
||||||
|
client,
|
||||||
|
limiter,
|
||||||
|
response_cache: Arc::new(Mutex::new(HashMap::new())),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get a clone of the rate limiter for sharing with other MB clients.
|
/// Get a clone of the rate limiter for sharing with other MB clients.
|
||||||
@@ -39,6 +58,30 @@ impl MusicBrainzFetcher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async fn get_json<T: serde::de::DeserializeOwned>(&self, url: &str) -> DataResult<T> {
|
async fn get_json<T: serde::de::DeserializeOwned>(&self, url: &str) -> DataResult<T> {
|
||||||
|
// Check in-memory cache first
|
||||||
|
{
|
||||||
|
let mut cache = self.response_cache.lock().await;
|
||||||
|
if let Some(entry) = cache.get(url) {
|
||||||
|
if entry.fetched_at.elapsed() < RESPONSE_CACHE_TTL {
|
||||||
|
tracing::debug!(url = url, "MusicBrainz cache hit");
|
||||||
|
return Ok(serde_json::from_str(&entry.body)?);
|
||||||
|
} else {
|
||||||
|
// Expired — remove it
|
||||||
|
cache.remove(url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Also purge any other expired entries opportunistically (limit to avoid holding lock too long)
|
||||||
|
let expired: Vec<String> = cache
|
||||||
|
.iter()
|
||||||
|
.filter(|(_, v)| v.fetched_at.elapsed() >= RESPONSE_CACHE_TTL)
|
||||||
|
.map(|(k, _)| k.clone())
|
||||||
|
.take(50)
|
||||||
|
.collect();
|
||||||
|
for k in expired {
|
||||||
|
cache.remove(&k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
self.limiter.wait().await;
|
self.limiter.wait().await;
|
||||||
tracing::debug!(url = url, "MusicBrainz request");
|
tracing::debug!(url = url, "MusicBrainz request");
|
||||||
let resp = self.client.get(url).send().await?;
|
let resp = self.client.get(url).send().await?;
|
||||||
@@ -49,7 +92,21 @@ impl MusicBrainzFetcher {
|
|||||||
"MusicBrainz API error {status}: {body}"
|
"MusicBrainz API error {status}: {body}"
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
Ok(resp.json().await?)
|
let body = resp.text().await?;
|
||||||
|
|
||||||
|
// Store in cache
|
||||||
|
{
|
||||||
|
let mut cache = self.response_cache.lock().await;
|
||||||
|
cache.insert(
|
||||||
|
url.to_string(),
|
||||||
|
CachedResponse {
|
||||||
|
body: body.clone(),
|
||||||
|
fetched_at: Instant::now(),
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(serde_json::from_str(&body)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Look up an artist directly by MBID. Returns (name, disambiguation).
|
/// Look up an artist directly by MBID. Returns (name, disambiguation).
|
||||||
@@ -310,6 +367,11 @@ impl MetadataFetcher for MusicBrainzFetcher {
|
|||||||
})
|
})
|
||||||
.collect())
|
.collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn resolve_release_from_group(&self, release_group_mbid: &str) -> DataResult<String> {
|
||||||
|
// Delegate to the inherent method
|
||||||
|
MusicBrainzFetcher::resolve_release_from_group(self, release_group_mbid).await
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extract the primary artist from MusicBrainz artist credits.
|
/// Extract the primary artist from MusicBrainz artist credits.
|
||||||
|
|||||||
@@ -48,6 +48,12 @@ pub trait MetadataFetcher: Send + Sync {
|
|||||||
&self,
|
&self,
|
||||||
artist_mbid: &str,
|
artist_mbid: &str,
|
||||||
) -> impl Future<Output = DataResult<Vec<ReleaseGroupEntry>>> + Send;
|
) -> impl Future<Output = DataResult<Vec<ReleaseGroupEntry>>> + Send;
|
||||||
|
|
||||||
|
/// Resolve a release-group MBID to a concrete release MBID (first release in the group).
|
||||||
|
fn resolve_release_from_group(
|
||||||
|
&self,
|
||||||
|
release_group_mbid: &str,
|
||||||
|
) -> impl Future<Output = DataResult<String>> + Send;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fetches artist image URLs from an external source.
|
/// Fetches artist image URLs from an external source.
|
||||||
|
|||||||
Submodule shanty-tag updated: e5b3fc3fe3...884b2e8d52
Submodule shanty-watch updated: 85e24671a3...aef4708439
Submodule shanty-web updated: 621355e352...75f3b4f704
Reference in New Issue
Block a user