Re-organized providers and added a few
Some checks failed
CI / check (push) Failing after 1m10s
CI / docker (push) Has been skipped

This commit is contained in:
Connor Johnstone
2026-03-20 14:52:16 -04:00
parent d3f4dc33d5
commit 4ec47252d9
19 changed files with 1302 additions and 6 deletions

17
Cargo.lock generated
View File

@@ -3091,9 +3091,11 @@ dependencies = [
"actix-web", "actix-web",
"anyhow", "anyhow",
"clap", "clap",
"dotenvy",
"rand 0.9.2", "rand 0.9.2",
"serde_json", "serde_json",
"shanty-config", "shanty-config",
"shanty-data",
"shanty-db", "shanty-db",
"shanty-search", "shanty-search",
"shanty-tag", "shanty-tag",
@@ -3116,6 +3118,18 @@ dependencies = [
"tracing-subscriber", "tracing-subscriber",
] ]
[[package]]
name = "shanty-data"
version = "0.1.0"
dependencies = [
"reqwest",
"serde",
"serde_json",
"thiserror",
"tokio",
"tracing",
]
[[package]] [[package]]
name = "shanty-db" name = "shanty-db"
version = "0.1.0" version = "0.1.0"
@@ -3233,6 +3247,7 @@ dependencies = [
"sea-orm", "sea-orm",
"serde", "serde",
"serde_json", "serde_json",
"shanty-data",
"shanty-db", "shanty-db",
"shanty-tag", "shanty-tag",
"thiserror", "thiserror",
@@ -3267,6 +3282,7 @@ dependencies = [
"sea-orm", "sea-orm",
"serde", "serde",
"serde_json", "serde_json",
"shanty-data",
"shanty-db", "shanty-db",
"strsim", "strsim",
"tempfile", "tempfile",
@@ -3319,6 +3335,7 @@ dependencies = [
"serde_json", "serde_json",
"serde_yaml", "serde_yaml",
"shanty-config", "shanty-config",
"shanty-data",
"shanty-db", "shanty-db",
"shanty-dl", "shanty-dl",
"shanty-index", "shanty-index",

View File

@@ -13,7 +13,7 @@ members = [
"shanty-playlist", "shanty-playlist",
"shanty-serve", "shanty-serve",
"shanty-play", "shanty-play",
"shanty-web", "shanty-web", "shanty-data",
] ]
resolver = "3" resolver = "3"
@@ -55,6 +55,7 @@ path = "src/main.rs"
[dependencies] [dependencies]
shanty-config = { path = "shanty-config" } shanty-config = { path = "shanty-config" }
shanty-data = { path = "shanty-data" }
shanty-db = { path = "shanty-db" } shanty-db = { path = "shanty-db" }
shanty-web = { path = "shanty-web" } shanty-web = { path = "shanty-web" }
shanty-tag = { path = "shanty-tag" } shanty-tag = { path = "shanty-tag" }
@@ -71,3 +72,4 @@ tracing = { workspace = true }
tracing-subscriber = { workspace = true } tracing-subscriber = { workspace = true }
anyhow = { workspace = true } anyhow = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
dotenvy = "0.15"

View File

@@ -32,6 +32,9 @@ pub struct AppConfig {
#[serde(default)] #[serde(default)]
pub indexing: IndexingConfig, pub indexing: IndexingConfig,
#[serde(default)]
pub metadata: MetadataConfig,
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -94,6 +97,39 @@ pub struct IndexingConfig {
pub concurrency: usize, pub concurrency: usize,
} }
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MetadataConfig {
/// Source for structured metadata: "musicbrainz" (default).
#[serde(default = "default_metadata_source")]
pub metadata_source: String,
/// Source for artist images: "wikipedia" (default).
#[serde(default = "default_artist_image_source")]
pub artist_image_source: String,
/// Source for artist bios: "wikipedia" (default) or "lastfm".
#[serde(default = "default_artist_bio_source")]
pub artist_bio_source: String,
/// Source for lyrics: "lrclib" (default).
#[serde(default = "default_lyrics_source")]
pub lyrics_source: String,
/// Source for cover art: "coverartarchive" (default).
#[serde(default = "default_cover_art_source")]
pub cover_art_source: String,
/// Last.fm API key for fetching artist bios. Set via SHANTY_LASTFM_API_KEY env var.
/// Required if artist_bio_source is "lastfm".
#[serde(skip)]
pub lastfm_api_key: Option<String>,
/// fanart.tv API key for artist images/banners. Set via SHANTY_FANART_API_KEY env var.
/// Required if artist_image_source is "fanarttv".
#[serde(skip)]
pub fanart_api_key: Option<String>,
}
// --- Defaults --- // --- Defaults ---
impl Default for AppConfig { impl Default for AppConfig {
@@ -108,6 +144,7 @@ impl Default for AppConfig {
tagging: TaggingConfig::default(), tagging: TaggingConfig::default(),
download: DownloadConfig::default(), download: DownloadConfig::default(),
indexing: IndexingConfig::default(), indexing: IndexingConfig::default(),
metadata: MetadataConfig::default(),
} }
} }
} }
@@ -154,6 +191,20 @@ impl Default for IndexingConfig {
} }
} }
impl Default for MetadataConfig {
fn default() -> Self {
Self {
metadata_source: default_metadata_source(),
artist_image_source: default_artist_image_source(),
artist_bio_source: default_artist_bio_source(),
lyrics_source: default_lyrics_source(),
cover_art_source: default_cover_art_source(),
lastfm_api_key: None,
fanart_api_key: None,
}
}
}
fn default_library_path() -> PathBuf { fn default_library_path() -> PathBuf {
dirs::audio_dir().unwrap_or_else(|| PathBuf::from("~/Music")) dirs::audio_dir().unwrap_or_else(|| PathBuf::from("~/Music"))
} }
@@ -206,6 +257,21 @@ fn default_rate_limit_auth() -> u32 {
fn default_concurrency() -> usize { fn default_concurrency() -> usize {
4 4
} }
fn default_metadata_source() -> String {
"musicbrainz".to_string()
}
fn default_artist_image_source() -> String {
"wikipedia".to_string()
}
fn default_artist_bio_source() -> String {
"wikipedia".to_string()
}
fn default_lyrics_source() -> String {
"lrclib".to_string()
}
fn default_cover_art_source() -> String {
"coverartarchive".to_string()
}
fn default_cookie_refresh_hours() -> u32 { fn default_cookie_refresh_hours() -> u32 {
6 6
} }
@@ -295,6 +361,12 @@ impl AppConfig {
if let Ok(v) = std::env::var("SHANTY_WEB_BIND") { if let Ok(v) = std::env::var("SHANTY_WEB_BIND") {
config.web.bind = v; config.web.bind = v;
} }
if let Ok(v) = std::env::var("SHANTY_LASTFM_API_KEY") {
config.metadata.lastfm_api_key = Some(v);
}
if let Ok(v) = std::env::var("SHANTY_FANART_API_KEY") {
config.metadata.fanart_api_key = Some(v);
}
config config
} }
} }

17
shanty-data/Cargo.toml Normal file
View File

@@ -0,0 +1,17 @@
[package]
name = "shanty-data"
version.workspace = true
edition.workspace = true
license.workspace = true
description = "External data providers for Shanty — metadata, images, bios, lyrics, cover art"
[dependencies]
serde = { version = "1", features = ["derive"] }
serde_json = "1"
thiserror = "2"
tracing = "0.1"
tokio = { version = "1", features = ["full"] }
reqwest = { version = "0.12", features = ["json"] }
[dev-dependencies]
tokio = { version = "1", features = ["full", "test-util"] }

View File

@@ -0,0 +1,19 @@
use crate::traits::CoverArtFetcher;
/// Fetches cover art URLs from the Cover Art Archive.
#[derive(Default)]
pub struct CoverArtArchiveFetcher;
impl CoverArtArchiveFetcher {
pub fn new() -> Self {
Self
}
}
impl CoverArtFetcher for CoverArtArchiveFetcher {
fn get_cover_art_url(&self, release_id: &str) -> Option<String> {
Some(format!(
"https://coverartarchive.org/release/{release_id}/front-250"
))
}
}

14
shanty-data/src/error.rs Normal file
View File

@@ -0,0 +1,14 @@
/// Error type for data fetching operations.
#[derive(Debug, thiserror::Error)]
pub enum DataError {
#[error("HTTP error: {0}")]
Http(#[from] reqwest::Error),
#[error("JSON error: {0}")]
Json(#[from] serde_json::Error),
#[error("{0}")]
Other(String),
}
pub type DataResult<T> = Result<T, DataError>;

116
shanty-data/src/fanarttv.rs Normal file
View File

@@ -0,0 +1,116 @@
//! fanart.tv artist image and banner provider.
//!
//! API docs: <https://fanart.tv/get-an-api-key/>
//! Endpoint: `GET https://webservice.fanart.tv/v3/music/{musicbrainz_id}?api_key={key}`
//!
//! Returns artist thumbnails, HD backgrounds (banners), logos, and more.
use crate::error::{DataError, DataResult};
use crate::http::build_client;
use crate::traits::{ArtistBioFetcher, ArtistImageFetcher};
use crate::types::ArtistInfo;
/// fanart.tv image fetcher. Provides artist thumbnails and HD background banners.
pub struct FanartTvFetcher {
client: reqwest::Client,
api_key: String,
}
impl FanartTvFetcher {
pub fn new(api_key: String) -> DataResult<Self> {
let client = build_client("Shanty/0.1.0 (shanty-music-app)", 15)?;
Ok(Self { client, api_key })
}
/// Fetch the full fanart.tv response for an artist MBID.
async fn fetch(&self, mbid: &str) -> DataResult<Option<FanartResponse>> {
let url = format!(
"https://webservice.fanart.tv/v3/music/{mbid}?api_key={}",
self.api_key
);
let resp = self.client.get(&url).send().await?;
if resp.status() == reqwest::StatusCode::NOT_FOUND {
return Ok(None);
}
if !resp.status().is_success() {
let status = resp.status();
let body = resp.text().await.unwrap_or_default();
return Err(DataError::Other(format!(
"fanart.tv API error {status}: {body}"
)));
}
Ok(Some(resp.json().await?))
}
}
impl ArtistImageFetcher for FanartTvFetcher {
async fn get_artist_image(&self, artist_info: &ArtistInfo) -> DataResult<Option<String>> {
let Some(ref mbid) = artist_info.mbid else {
return Ok(None);
};
let Some(data) = self.fetch(mbid).await? else {
return Ok(None);
};
// Prefer artistthumb, fall back to hdmusiclogo
let url = data
.artistthumb
.as_ref()
.and_then(|imgs| imgs.first())
.or_else(|| data.hdmusiclogo.as_ref().and_then(|imgs| imgs.first()))
.map(|img| img.url.clone());
Ok(url)
}
async fn get_artist_banner(&self, artist_info: &ArtistInfo) -> DataResult<Option<String>> {
let Some(ref mbid) = artist_info.mbid else {
return Ok(None);
};
let Some(data) = self.fetch(mbid).await? else {
return Ok(None);
};
// Prefer artistbackground (1920x1080), fall back to musicbanner (1000x185)
let url = data
.artistbackground
.as_ref()
.and_then(|imgs| imgs.first())
.or_else(|| data.musicbanner.as_ref().and_then(|imgs| imgs.first()))
.map(|img| img.url.clone());
Ok(url)
}
}
// fanart.tv doesn't provide bios — this is a no-op so it can be used as
// a combined image+bio provider without errors.
impl ArtistBioFetcher for FanartTvFetcher {
async fn get_artist_bio(&self, _artist_info: &ArtistInfo) -> DataResult<Option<String>> {
Ok(None)
}
}
// --- fanart.tv API response types ---
#[derive(serde::Deserialize)]
struct FanartResponse {
#[serde(default)]
artistthumb: Option<Vec<FanartImage>>,
#[serde(default)]
artistbackground: Option<Vec<FanartImage>>,
#[serde(default)]
musicbanner: Option<Vec<FanartImage>>,
#[serde(default)]
hdmusiclogo: Option<Vec<FanartImage>>,
}
#[derive(serde::Deserialize)]
struct FanartImage {
url: String,
}

61
shanty-data/src/http.rs Normal file
View File

@@ -0,0 +1,61 @@
use std::time::Duration;
use tokio::sync::Mutex;
use tokio::time::Instant;
/// A simple rate limiter that enforces a minimum interval between requests.
pub struct RateLimiter {
last_request: Mutex<Instant>,
interval: Duration,
}
impl RateLimiter {
pub fn new(interval: Duration) -> Self {
Self {
last_request: Mutex::new(Instant::now() - interval),
interval,
}
}
/// Wait if needed so we don't exceed the rate limit.
pub async fn wait(&self) {
let mut last = self.last_request.lock().await;
let elapsed = last.elapsed();
if elapsed < self.interval {
tokio::time::sleep(self.interval - elapsed).await;
}
*last = Instant::now();
}
}
/// Build a reqwest client with a custom user agent and timeout.
pub fn build_client(user_agent: &str, timeout_secs: u64) -> reqwest::Result<reqwest::Client> {
reqwest::Client::builder()
.user_agent(user_agent)
.timeout(Duration::from_secs(timeout_secs))
.build()
}
/// Simple URL-encode for query parameters.
pub fn urlencoded(s: &str) -> String {
s.replace(' ', "+")
.replace('&', "%26")
.replace('=', "%3D")
.replace('#', "%23")
}
/// Escape special characters for MusicBrainz Lucene query syntax.
pub fn escape_lucene(s: &str) -> String {
let special = [
'+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\',
'/',
];
let mut result = String::with_capacity(s.len());
for c in s.chars() {
if special.contains(&c) {
result.push('\\');
}
result.push(c);
}
result
}

67
shanty-data/src/lastfm.rs Normal file
View File

@@ -0,0 +1,67 @@
use crate::error::DataResult;
use crate::http::{build_client, urlencoded};
use crate::traits::ArtistBioFetcher;
use crate::types::ArtistInfo;
const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
/// Fetches artist bios from Last.fm.
pub struct LastFmBioFetcher {
api_key: String,
client: reqwest::Client,
}
impl LastFmBioFetcher {
pub fn new(api_key: String) -> DataResult<Self> {
let client = build_client(USER_AGENT, 30)?;
Ok(Self { api_key, client })
}
}
impl ArtistBioFetcher for LastFmBioFetcher {
async fn get_artist_bio(&self, artist_info: &ArtistInfo) -> DataResult<Option<String>> {
if self.api_key.is_empty() {
return Ok(None);
}
let url = format!(
"https://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist={}&api_key={}&format=json",
urlencoded(&artist_info.name),
&self.api_key,
);
let resp = match self.client.get(&url).send().await {
Ok(r) if r.status().is_success() => r,
_ => return Ok(None),
};
let body: serde_json::Value = match resp.json().await {
Ok(v) => v,
Err(_) => return Ok(None),
};
let summary = body
.get("artist")
.and_then(|a| a.get("bio"))
.and_then(|b| b.get("summary"))
.and_then(|s| s.as_str())
.map(strip_html_tags);
Ok(summary)
}
}
/// Strip HTML tags from a string with a simple approach.
fn strip_html_tags(s: &str) -> String {
let mut result = String::with_capacity(s.len());
let mut in_tag = false;
for c in s.chars() {
match c {
'<' => in_tag = true,
'>' => in_tag = false,
_ if !in_tag => result.push(c),
_ => {}
}
}
result.trim().to_string()
}

20
shanty-data/src/lib.rs Normal file
View File

@@ -0,0 +1,20 @@
pub mod coverart;
pub mod error;
pub mod fanarttv;
pub mod http;
pub mod lastfm;
pub mod lrclib;
pub mod musicbrainz;
pub mod traits;
pub mod types;
pub mod wikipedia;
pub use coverart::CoverArtArchiveFetcher;
pub use error::{DataError, DataResult};
pub use fanarttv::FanartTvFetcher;
pub use lastfm::LastFmBioFetcher;
pub use lrclib::LrclibFetcher;
pub use musicbrainz::MusicBrainzFetcher;
pub use traits::*;
pub use types::*;
pub use wikipedia::WikipediaFetcher;

63
shanty-data/src/lrclib.rs Normal file
View File

@@ -0,0 +1,63 @@
use crate::error::DataResult;
use crate::http::{build_client, urlencoded};
use crate::traits::LyricsFetcher;
use crate::types::LyricsResult;
const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
/// Fetches lyrics from LRCLIB.
pub struct LrclibFetcher {
client: reqwest::Client,
}
impl LrclibFetcher {
pub fn new() -> DataResult<Self> {
let client = build_client(USER_AGENT, 30)?;
Ok(Self { client })
}
}
impl LyricsFetcher for LrclibFetcher {
async fn get_lyrics(&self, artist: &str, title: &str) -> DataResult<LyricsResult> {
let url = format!(
"https://lrclib.net/api/search?artist_name={}&track_name={}",
urlencoded(artist),
urlencoded(title),
);
let resp = self.client.get(&url).send().await?;
if !resp.status().is_success() {
return Ok(LyricsResult {
found: false,
lyrics: None,
synced_lyrics: None,
});
}
let results: Vec<serde_json::Value> = resp.json().await?;
if let Some(entry) = results.first() {
let plain = entry
.get("plainLyrics")
.and_then(|v| v.as_str())
.map(String::from);
let synced = entry
.get("syncedLyrics")
.and_then(|v| v.as_str())
.map(String::from);
Ok(LyricsResult {
found: plain.is_some() || synced.is_some(),
lyrics: plain,
synced_lyrics: synced,
})
} else {
Ok(LyricsResult {
found: false,
lyrics: None,
synced_lyrics: None,
})
}
}
}

View File

@@ -0,0 +1,488 @@
use serde::Deserialize;
use tokio::time::Duration;
use crate::error::{DataError, DataResult};
use crate::http::{RateLimiter, escape_lucene, urlencoded};
use crate::traits::MetadataFetcher;
use crate::types::{
ArtistInfo, ArtistSearchResult, ArtistUrl, DiscographyEntry, RecordingDetails, RecordingMatch,
ReleaseGroupEntry, ReleaseMatch, ReleaseRef, ReleaseTrack,
};
const BASE_URL: &str = "https://musicbrainz.org/ws/2";
const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
const RATE_LIMIT: Duration = Duration::from_millis(1100); // slightly over 1s to be safe
/// MusicBrainz API client with rate limiting.
pub struct MusicBrainzFetcher {
client: reqwest::Client,
limiter: RateLimiter,
}
impl MusicBrainzFetcher {
pub fn new() -> DataResult<Self> {
let client = reqwest::Client::builder()
.user_agent(USER_AGENT)
.timeout(Duration::from_secs(30))
.build()?;
Ok(Self {
client,
limiter: RateLimiter::new(RATE_LIMIT),
})
}
async fn get_json<T: serde::de::DeserializeOwned>(&self, url: &str) -> DataResult<T> {
self.limiter.wait().await;
tracing::debug!(url = url, "MusicBrainz request");
let resp = self.client.get(url).send().await?;
let status = resp.status();
if !status.is_success() {
let body = resp.text().await.unwrap_or_default();
return Err(DataError::Other(format!(
"MusicBrainz API error {status}: {body}"
)));
}
Ok(resp.json().await?)
}
/// Look up an artist directly by MBID. Returns (name, disambiguation).
pub async fn get_artist_by_mbid(&self, mbid: &str) -> DataResult<(String, Option<String>)> {
let url = format!("{BASE_URL}/artist/{mbid}?fmt=json");
let resp: MbArtistLookup = self.get_json(&url).await?;
Ok((resp.name, resp.disambiguation.filter(|s| !s.is_empty())))
}
/// Look up detailed artist info by MBID, including URLs and metadata.
pub async fn get_artist_info(&self, mbid: &str) -> DataResult<ArtistInfo> {
let url = format!("{BASE_URL}/artist/{mbid}?inc=url-rels&fmt=json");
let resp: MbArtistFull = self.get_json(&url).await?;
let begin_year = resp
.life_span
.and_then(|ls| ls.begin)
.and_then(|d| d.split('-').next().map(String::from));
let urls = resp
.relations
.unwrap_or_default()
.into_iter()
.filter_map(|rel| {
rel.url.map(|u| ArtistUrl {
url: u.resource,
link_type: rel.relation_type,
})
})
.collect();
Ok(ArtistInfo {
name: resp.name,
mbid: Some(mbid.to_string()),
disambiguation: resp.disambiguation.filter(|s| !s.is_empty()),
country: resp.country.filter(|s| !s.is_empty()),
artist_type: resp.artist_type,
begin_year,
urls,
})
}
}
impl MetadataFetcher for MusicBrainzFetcher {
async fn search_recording(&self, artist: &str, title: &str) -> DataResult<Vec<RecordingMatch>> {
let query = if artist.is_empty() {
format!("recording:{}", escape_lucene(title))
} else {
format!(
"artist:{} AND recording:{}",
escape_lucene(artist),
escape_lucene(title)
)
};
let url = format!(
"{BASE_URL}/recording/?query={}&fmt=json&limit=5",
urlencoded(&query)
);
let resp: MbRecordingSearchResponse = self.get_json(&url).await?;
Ok(resp
.recordings
.into_iter()
.map(|r| {
let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
RecordingMatch {
mbid: r.id,
title: r.title,
artist: artist_name,
artist_mbid,
releases: r
.releases
.unwrap_or_default()
.into_iter()
.map(|rel| ReleaseRef {
mbid: rel.id,
title: rel.title,
date: rel.date,
track_number: None,
})
.collect(),
score: r.score.unwrap_or(0),
}
})
.collect())
}
async fn search_release(&self, artist: &str, album: &str) -> DataResult<Vec<ReleaseMatch>> {
let query = if artist.is_empty() {
format!("release:{}", escape_lucene(album))
} else {
format!(
"artist:{} AND release:{}",
escape_lucene(artist),
escape_lucene(album)
)
};
let url = format!(
"{BASE_URL}/release/?query={}&fmt=json&limit=5",
urlencoded(&query)
);
let resp: MbReleaseSearchResponse = self.get_json(&url).await?;
Ok(resp
.releases
.into_iter()
.map(|r| {
let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
ReleaseMatch {
mbid: r.id,
title: r.title,
artist: artist_name,
artist_mbid,
date: r.date,
track_count: r.track_count,
score: r.score.unwrap_or(0),
}
})
.collect())
}
async fn get_recording(&self, mbid: &str) -> DataResult<RecordingDetails> {
let url = format!("{BASE_URL}/recording/{mbid}?inc=artists+releases+genres&fmt=json");
let r: MbRecordingDetail = self.get_json(&url).await?;
let (artist_name, artist_mbid) = extract_artist_credit(&r.artist_credit);
let secondary_artists = extract_secondary_artists(&r.artist_credit);
Ok(RecordingDetails {
mbid: r.id,
title: r.title,
artist: artist_name,
artist_mbid,
releases: r
.releases
.unwrap_or_default()
.into_iter()
.map(|rel| ReleaseRef {
mbid: rel.id,
title: rel.title,
date: rel.date,
track_number: None,
})
.collect(),
duration_ms: r.length,
genres: r
.genres
.unwrap_or_default()
.into_iter()
.map(|g| g.name)
.collect(),
secondary_artists,
})
}
async fn search_artist(&self, query: &str, limit: u32) -> DataResult<Vec<ArtistSearchResult>> {
let url = format!(
"{BASE_URL}/artist/?query={}&fmt=json&limit={limit}",
urlencoded(&escape_lucene(query))
);
let resp: MbArtistSearchResponse = self.get_json(&url).await?;
Ok(resp
.artists
.into_iter()
.map(|a| ArtistSearchResult {
mbid: a.id,
name: a.name,
disambiguation: a.disambiguation.filter(|s| !s.is_empty()),
country: a.country,
artist_type: a.artist_type,
score: a.score.unwrap_or(0),
})
.collect())
}
async fn get_artist_releases(
&self,
artist_mbid: &str,
limit: u32,
) -> DataResult<Vec<DiscographyEntry>> {
let url = format!("{BASE_URL}/release/?artist={artist_mbid}&fmt=json&limit={limit}");
let resp: MbReleaseSearchResponse = self.get_json(&url).await?;
Ok(resp
.releases
.into_iter()
.map(|r| DiscographyEntry {
mbid: r.id,
title: r.title,
date: r.date,
release_type: None, // release-group type not in this response
track_count: r.track_count,
})
.collect())
}
async fn get_release_tracks(&self, release_mbid: &str) -> DataResult<Vec<ReleaseTrack>> {
let url = format!("{BASE_URL}/release/{release_mbid}?inc=recordings&fmt=json");
let resp: MbReleaseDetail = self.get_json(&url).await?;
let mut tracks = Vec::new();
for (disc_idx, medium) in resp.media.unwrap_or_default().into_iter().enumerate() {
for track in medium.tracks.unwrap_or_default() {
tracks.push(ReleaseTrack {
recording_mbid: track.recording.map(|r| r.id).unwrap_or_default(),
title: track.title,
track_number: track.position,
disc_number: Some(disc_idx as i32 + 1),
duration_ms: track.length,
});
}
}
Ok(tracks)
}
async fn get_artist_release_groups(
&self,
artist_mbid: &str,
) -> DataResult<Vec<ReleaseGroupEntry>> {
// Fetch album, single, and EP release groups
let url = format!(
"{BASE_URL}/release-group?artist={artist_mbid}&type=album|single|ep&fmt=json&limit=100"
);
let resp: MbReleaseGroupResponse = self.get_json(&url).await?;
Ok(resp
.release_groups
.unwrap_or_default()
.into_iter()
.map(|rg| ReleaseGroupEntry {
mbid: rg.id,
title: rg.title,
primary_type: rg.primary_type,
secondary_types: rg.secondary_types.unwrap_or_default(),
first_release_date: rg.first_release_date,
first_release_mbid: rg
.releases
.and_then(|r| r.into_iter().next().map(|rel| rel.id)),
})
.collect())
}
}
/// Extract the primary artist from MusicBrainz artist credits.
/// Always returns the first/primary artist only -- never concatenates
/// collaborators or featured artists into compound names.
fn extract_artist_credit(credits: &Option<Vec<MbArtistCredit>>) -> (String, Option<String>) {
match credits {
Some(credits) if !credits.is_empty() => {
let name = credits[0].artist.name.clone();
let mbid = Some(credits[0].artist.id.clone());
(name, mbid)
}
_ => ("Unknown Artist".to_string(), None),
}
}
/// Extract non-featuring secondary artists from MusicBrainz artist credits.
/// Returns (name, mbid) pairs for collaborators that aren't "featuring" credits.
fn extract_secondary_artists(credits: &Option<Vec<MbArtistCredit>>) -> Vec<(String, String)> {
let Some(credits) = credits else {
return vec![];
};
if credits.len() <= 1 {
return vec![];
}
// Walk credits after the first. Stop at any "feat"/"ft." joinphrase
// from the PREVIOUS credit (since joinphrase is on the credit BEFORE the next artist).
let mut result = Vec::new();
for i in 0..credits.len() - 1 {
let jp = credits[i].joinphrase.as_deref().unwrap_or("");
let lower = jp.to_lowercase();
if lower.contains("feat") || lower.contains("ft.") {
break;
}
// The next credit is a non-featuring collaborator
let next = &credits[i + 1];
result.push((next.artist.name.clone(), next.artist.id.clone()));
}
result
}
// --- MusicBrainz API response types ---
#[derive(Deserialize)]
struct MbArtistSearchResponse {
artists: Vec<MbArtistResult>,
}
#[derive(Deserialize)]
struct MbArtistResult {
id: String,
name: String,
score: Option<u8>,
disambiguation: Option<String>,
country: Option<String>,
#[serde(rename = "type")]
artist_type: Option<String>,
}
#[derive(Deserialize)]
struct MbArtistLookup {
name: String,
disambiguation: Option<String>,
}
#[derive(Deserialize)]
struct MbArtistFull {
name: String,
disambiguation: Option<String>,
country: Option<String>,
#[serde(rename = "type")]
artist_type: Option<String>,
#[serde(rename = "life-span")]
life_span: Option<MbLifeSpan>,
relations: Option<Vec<MbRelation>>,
}
#[derive(Deserialize)]
struct MbLifeSpan {
begin: Option<String>,
}
#[derive(Deserialize)]
struct MbRelation {
#[serde(rename = "type")]
relation_type: String,
url: Option<MbRelationUrl>,
}
#[derive(Deserialize)]
struct MbRelationUrl {
resource: String,
}
#[derive(Deserialize)]
struct MbRecordingSearchResponse {
recordings: Vec<MbRecordingResult>,
}
#[derive(Deserialize)]
struct MbRecordingResult {
id: String,
title: String,
score: Option<u8>,
#[serde(rename = "artist-credit")]
artist_credit: Option<Vec<MbArtistCredit>>,
releases: Option<Vec<MbReleaseResult>>,
}
#[derive(Deserialize)]
struct MbReleaseSearchResponse {
releases: Vec<MbReleaseResult>,
}
#[derive(Deserialize)]
struct MbReleaseResult {
id: String,
title: String,
score: Option<u8>,
#[serde(rename = "artist-credit")]
artist_credit: Option<Vec<MbArtistCredit>>,
date: Option<String>,
#[serde(rename = "track-count")]
track_count: Option<i32>,
}
#[derive(Deserialize)]
struct MbRecordingDetail {
id: String,
title: String,
#[serde(rename = "artist-credit")]
artist_credit: Option<Vec<MbArtistCredit>>,
releases: Option<Vec<MbReleaseResult>>,
length: Option<u64>,
genres: Option<Vec<MbGenre>>,
}
#[derive(Deserialize)]
struct MbArtistCredit {
artist: MbArtist,
joinphrase: Option<String>,
}
#[derive(Deserialize)]
struct MbArtist {
id: String,
name: String,
}
#[derive(Deserialize)]
struct MbGenre {
name: String,
}
#[derive(Deserialize)]
struct MbReleaseDetail {
media: Option<Vec<MbMedia>>,
}
#[derive(Deserialize)]
struct MbMedia {
tracks: Option<Vec<MbTrackEntry>>,
}
#[derive(Deserialize)]
struct MbTrackEntry {
title: String,
position: Option<i32>,
length: Option<u64>,
recording: Option<MbTrackRecording>,
}
#[derive(Deserialize)]
struct MbTrackRecording {
id: String,
}
#[derive(Deserialize)]
struct MbReleaseGroupResponse {
#[serde(rename = "release-groups")]
release_groups: Option<Vec<MbReleaseGroup>>,
}
#[derive(Deserialize)]
struct MbReleaseGroup {
id: String,
title: String,
#[serde(rename = "primary-type")]
primary_type: Option<String>,
#[serde(rename = "secondary-types", default)]
secondary_types: Option<Vec<String>>,
#[serde(rename = "first-release-date")]
first_release_date: Option<String>,
releases: Option<Vec<MbReleaseGroupRelease>>,
}
#[derive(Deserialize)]
struct MbReleaseGroupRelease {
id: String,
}

92
shanty-data/src/traits.rs Normal file
View File

@@ -0,0 +1,92 @@
use std::future::Future;
use crate::error::DataResult;
use crate::types::{
ArtistInfo, ArtistSearchResult, DiscographyEntry, LyricsResult, RecordingDetails,
RecordingMatch, ReleaseGroupEntry, ReleaseMatch, ReleaseTrack,
};
/// Trait for metadata lookup backends. MusicBrainz is the default implementation;
/// others (Last.fm, Discogs, etc.) can be added later.
pub trait MetadataFetcher: Send + Sync {
fn search_recording(
&self,
artist: &str,
title: &str,
) -> impl Future<Output = DataResult<Vec<RecordingMatch>>> + Send;
fn search_release(
&self,
artist: &str,
album: &str,
) -> impl Future<Output = DataResult<Vec<ReleaseMatch>>> + Send;
fn get_recording(
&self,
mbid: &str,
) -> impl Future<Output = DataResult<RecordingDetails>> + Send;
fn search_artist(
&self,
query: &str,
limit: u32,
) -> impl Future<Output = DataResult<Vec<ArtistSearchResult>>> + Send;
fn get_artist_releases(
&self,
artist_mbid: &str,
limit: u32,
) -> impl Future<Output = DataResult<Vec<DiscographyEntry>>> + Send;
fn get_release_tracks(
&self,
release_mbid: &str,
) -> impl Future<Output = DataResult<Vec<ReleaseTrack>>> + Send;
/// Get deduplicated release groups (albums, EPs, singles) for an artist.
fn get_artist_release_groups(
&self,
artist_mbid: &str,
) -> impl Future<Output = DataResult<Vec<ReleaseGroupEntry>>> + Send;
}
/// Fetches artist image URLs from an external source.
pub trait ArtistImageFetcher: Send + Sync {
/// Thumbnail/profile image for the artist.
fn get_artist_image(
&self,
artist_info: &ArtistInfo,
) -> impl Future<Output = DataResult<Option<String>>> + Send;
/// Wide banner/background image for the artist (e.g., 1920x1080).
/// Returns None by default — providers that don't support banners need not implement this.
fn get_artist_banner(
&self,
artist_info: &ArtistInfo,
) -> impl Future<Output = DataResult<Option<String>>> + Send {
let _ = artist_info;
async { Ok(None) }
}
}
/// Fetches an artist biography from an external source.
pub trait ArtistBioFetcher: Send + Sync {
fn get_artist_bio(
&self,
artist_info: &ArtistInfo,
) -> impl Future<Output = DataResult<Option<String>>> + Send;
}
/// Fetches song lyrics from an external source.
pub trait LyricsFetcher: Send + Sync {
fn get_lyrics(
&self,
artist: &str,
title: &str,
) -> impl Future<Output = DataResult<LyricsResult>> + Send;
}
/// Fetches cover art URLs for releases.
pub trait CoverArtFetcher: Send + Sync {
fn get_cover_art_url(&self, release_id: &str) -> Option<String>;
}

120
shanty-data/src/types.rs Normal file
View File

@@ -0,0 +1,120 @@
use serde::{Deserialize, Serialize};
/// A reference to a release (album) that a recording appears on.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReleaseRef {
pub mbid: String,
pub title: String,
pub date: Option<String>,
pub track_number: Option<i32>,
}
/// A recording match from a search query.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecordingMatch {
pub mbid: String,
pub title: String,
pub artist: String,
pub artist_mbid: Option<String>,
pub releases: Vec<ReleaseRef>,
/// MusicBrainz API score (0-100).
pub score: u8,
}
/// A release (album) match from a search query.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReleaseMatch {
pub mbid: String,
pub title: String,
pub artist: String,
pub artist_mbid: Option<String>,
pub date: Option<String>,
pub track_count: Option<i32>,
pub score: u8,
}
/// Full details for a recording, retrieved by MBID.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RecordingDetails {
pub mbid: String,
pub title: String,
pub artist: String,
pub artist_mbid: Option<String>,
pub releases: Vec<ReleaseRef>,
pub duration_ms: Option<u64>,
pub genres: Vec<String>,
/// Non-featuring collaborators beyond the primary artist.
#[serde(default)]
pub secondary_artists: Vec<(String, String)>,
}
/// Detailed artist info from a direct MBID lookup.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArtistInfo {
pub name: String,
#[serde(default)]
pub mbid: Option<String>,
pub disambiguation: Option<String>,
pub country: Option<String>,
pub artist_type: Option<String>,
pub begin_year: Option<String>,
pub urls: Vec<ArtistUrl>,
}
/// An external URL linked to an artist on MusicBrainz.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArtistUrl {
pub url: String,
pub link_type: String,
}
/// An artist match from a search query.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ArtistSearchResult {
pub mbid: String,
pub name: String,
pub disambiguation: Option<String>,
pub country: Option<String>,
pub artist_type: Option<String>,
pub score: u8,
}
/// A release entry in an artist's discography.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiscographyEntry {
pub mbid: String,
pub title: String,
pub date: Option<String>,
pub release_type: Option<String>,
pub track_count: Option<i32>,
}
/// A release group (deduplicated album/EP/single concept).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReleaseGroupEntry {
pub mbid: String,
pub title: String,
pub primary_type: Option<String>,
pub secondary_types: Vec<String>,
pub first_release_date: Option<String>,
/// MBID of the first release in this group (for fetching tracks).
pub first_release_mbid: Option<String>,
}
/// A track within a release.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReleaseTrack {
pub recording_mbid: String,
pub title: String,
pub track_number: Option<i32>,
pub disc_number: Option<i32>,
pub duration_ms: Option<u64>,
}
/// Result from a lyrics lookup.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LyricsResult {
pub found: bool,
pub lyrics: Option<String>,
pub synced_lyrics: Option<String>,
}

View File

@@ -0,0 +1,122 @@
use crate::error::DataResult;
use crate::http::build_client;
use crate::traits::{ArtistBioFetcher, ArtistImageFetcher};
use crate::types::ArtistInfo;
const USER_AGENT: &str = "Shanty/0.1.0 (shanty-music-app)";
/// Fetches artist images and bios from Wikipedia/Wikidata.
pub struct WikipediaFetcher {
client: reqwest::Client,
}
impl WikipediaFetcher {
pub fn new() -> DataResult<Self> {
let client = build_client(USER_AGENT, 30)?;
Ok(Self { client })
}
/// Find the Wikipedia URL for an artist from their MusicBrainz URLs.
/// Tries a direct Wikipedia link first, then resolves via Wikidata.
async fn resolve_wiki_url(&self, artist_info: &ArtistInfo) -> Option<String> {
// Direct Wikipedia link
if let Some(u) = artist_info.urls.iter().find(|u| u.link_type == "wikipedia") {
return Some(u.url.clone());
}
// Resolve via Wikidata
if let Some(wd) = artist_info.urls.iter().find(|u| u.link_type == "wikidata") {
let entity_id = wd.url.split('/').next_back().unwrap_or("");
return self.resolve_wikidata_to_wikipedia(entity_id).await;
}
None
}
/// Resolve a Wikidata entity ID to an English Wikipedia URL.
async fn resolve_wikidata_to_wikipedia(&self, entity_id: &str) -> Option<String> {
if entity_id.is_empty() {
return None;
}
let url = format!(
"https://www.wikidata.org/w/api.php?action=wbgetentities&ids={entity_id}&props=sitelinks&sitefilter=enwiki&format=json"
);
let resp: serde_json::Value = self.client.get(&url).send().await.ok()?.json().await.ok()?;
let title = resp
.get("entities")
.and_then(|e| e.get(entity_id))
.and_then(|e| e.get("sitelinks"))
.and_then(|s| s.get("enwiki"))
.and_then(|w| w.get("title"))
.and_then(|t| t.as_str())?;
Some(format!(
"https://en.wikipedia.org/wiki/{}",
title.replace(' ', "_")
))
}
/// Fetch the Wikipedia summary for a given URL, returning (photo_url, bio).
async fn fetch_summary(&self, wiki_url: &str) -> (Option<String>, Option<String>) {
let title = wiki_url.split("/wiki/").nth(1).unwrap_or("").to_string();
if title.is_empty() {
return (None, None);
}
let lang = wiki_url
.split("://")
.nth(1)
.and_then(|s| s.split('.').next())
.unwrap_or("en");
let api_url = format!("https://{lang}.wikipedia.org/api/rest_v1/page/summary/{title}");
let resp = match self.client.get(&api_url).send().await {
Ok(r) if r.status().is_success() => r,
_ => return (None, None),
};
let body: serde_json::Value = match resp.json().await {
Ok(v) => v,
Err(_) => return (None, None),
};
let photo_url = body
.get("thumbnail")
.and_then(|t| t.get("source"))
.and_then(|s| s.as_str())
.map(String::from);
let bio = body
.get("extract")
.and_then(|e| e.as_str())
.map(String::from);
(photo_url, bio)
}
}
impl ArtistImageFetcher for WikipediaFetcher {
async fn get_artist_image(&self, artist_info: &ArtistInfo) -> DataResult<Option<String>> {
let wiki_url = match self.resolve_wiki_url(artist_info).await {
Some(url) => url,
None => return Ok(None),
};
let (photo_url, _) = self.fetch_summary(&wiki_url).await;
Ok(photo_url)
}
}
impl ArtistBioFetcher for WikipediaFetcher {
async fn get_artist_bio(&self, artist_info: &ArtistInfo) -> DataResult<Option<String>> {
let wiki_url = match self.resolve_wiki_url(artist_info).await {
Some(url) => url,
None => return Ok(None),
};
let (_, bio) = self.fetch_summary(&wiki_url).await;
Ok(bio)
}
}

View File

@@ -6,9 +6,10 @@ use tracing_actix_web::TracingLogger;
use tracing_subscriber::EnvFilter; use tracing_subscriber::EnvFilter;
use shanty_config::AppConfig; use shanty_config::AppConfig;
use shanty_data::MusicBrainzFetcher;
use shanty_data::WikipediaFetcher;
use shanty_db::Database; use shanty_db::Database;
use shanty_search::MusicBrainzSearch; use shanty_search::MusicBrainzSearch;
use shanty_tag::MusicBrainzClient;
use shanty_web::routes; use shanty_web::routes;
use shanty_web::state::AppState; use shanty_web::state::AppState;
@@ -32,6 +33,9 @@ struct Cli {
#[actix_web::main] #[actix_web::main]
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
// Load .env file if present (before anything reads env vars)
dotenvy::dotenv().ok();
let cli = Cli::parse(); let cli = Cli::parse();
let filter = match cli.verbose { let filter = match cli.verbose {
@@ -53,8 +57,9 @@ async fn main() -> anyhow::Result<()> {
tracing::info!(url = %config.database_url, "connecting to database"); tracing::info!(url = %config.database_url, "connecting to database");
let db = Database::new(&config.database_url).await?; let db = Database::new(&config.database_url).await?;
let mb_client = MusicBrainzClient::new()?; let mb_client = MusicBrainzFetcher::new()?;
let search = MusicBrainzSearch::new()?; let search = MusicBrainzSearch::new()?;
let wiki_fetcher = WikipediaFetcher::new()?;
let bind = format!("{}:{}", config.web.bind, config.web.port); let bind = format!("{}:{}", config.web.bind, config.web.port);
tracing::info!(bind = %bind, "starting server"); tracing::info!(bind = %bind, "starting server");
@@ -64,6 +69,7 @@ async fn main() -> anyhow::Result<()> {
db, db,
mb_client, mb_client,
search, search,
wiki_fetcher,
config: std::sync::Arc::new(tokio::sync::RwLock::new(config)), config: std::sync::Arc::new(tokio::sync::RwLock::new(config)),
config_path, config_path,
tasks: TaskManager::new(), tasks: TaskManager::new(),