diff --git a/Cargo.lock b/Cargo.lock index 44c9300..d5e7e4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -293,6 +293,19 @@ dependencies = [ "version_check", ] +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -472,6 +485,12 @@ version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ea22880d78093b0cbe17c89f64a7d457941e65759157ec6cb31a31d652b05e5" +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" @@ -486,16 +505,13 @@ checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06" [[package]] name = "bigdecimal" -version = "0.4.10" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d6867f1565b3aad85681f1015055b087fcfd840d6aeee6eee7f2da317603695" +checksum = "a6773ddc0eafc0e509fb60e48dff7f450f8e674a0686ae8605e8d9901bd5eefa" dependencies = [ - "autocfg", - "libm", "num-bigint", "num-integer", "num-traits", - "serde", ] [[package]] @@ -724,15 +740,6 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" -[[package]] -name = "concurrent-queue" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "const-oid" version = "0.9.6" @@ -1015,6 +1022,18 @@ version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +[[package]] +name = "educe" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4bd92664bf78c4d3dba9b7cdafce6fa15b13ed3ed16175218196942e99168a8" +dependencies = [ + "enum-ordinalize", + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "either" version = "1.15.0" @@ -1033,6 +1052,26 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "enum-ordinalize" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0" +dependencies = [ + "enum-ordinalize-derive", +] + +[[package]] +name = "enum-ordinalize-derive" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -1062,14 +1101,21 @@ dependencies = [ [[package]] name = "event-listener" -version = "5.4.1" +version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" -dependencies = [ - "concurrent-queue", - "parking", - "pin-project-lite", -] +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" [[package]] name = "fastrand" @@ -1077,6 +1123,17 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "filetime" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" +dependencies = [ + "cfg-if", + "libc", + "libredox", +] + [[package]] name = "find-msvc-tools" version = "0.1.9" @@ -1204,6 +1261,17 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" +[[package]] +name = "futures-macro" +version = "0.3.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + [[package]] name = "futures-sink" version = "0.3.32" @@ -1222,8 +1290,10 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ + "futures-channel", "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -1337,7 +1407,17 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" dependencies = [ - "ahash", + "ahash 0.7.8", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash 0.8.12", + "allocator-api2", ] [[package]] @@ -1346,8 +1426,6 @@ version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ - "allocator-api2", - "equivalent", "foldhash", ] @@ -1359,11 +1437,11 @@ checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" [[package]] name = "hashlink" -version = "0.10.0" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7" dependencies = [ - "hashbrown 0.15.5", + "hashbrown 0.14.5", ] [[package]] @@ -1371,6 +1449,9 @@ name = "heck" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +dependencies = [ + "unicode-segmentation", +] [[package]] name = "heck" @@ -1823,9 +1904,9 @@ dependencies = [ [[package]] name = "libsqlite3-sys" -version = "0.30.1" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +checksum = "afc22eff61b133b115c6e8c74e818c628d6d5e7a502afea6f64dee076dd94326" dependencies = [ "cc", "pkg-config", @@ -1902,6 +1983,17 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "matchers" version = "0.2.0" @@ -1943,6 +2035,12 @@ dependencies = [ "unicase", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -1988,6 +2086,16 @@ dependencies = [ "tempfile", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -2138,18 +2246,18 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "ordered-float" -version = "4.6.0" +version = "3.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" +checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc" dependencies = [ "num-traits", ] [[package]] name = "ouroboros" -version = "0.18.5" +version = "0.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e0f050db9c44b97a94723127e6be766ac5c340c48f2c4bb3ffa11713744be59" +checksum = "e2ba07320d39dfea882faa70554b4bd342a5f273ed59ba7c1c6b4c840492c954" dependencies = [ "aliasable", "ouroboros_macro", @@ -2158,23 +2266,17 @@ dependencies = [ [[package]] name = "ouroboros_macro" -version = "0.18.5" +version = "0.17.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c7028bdd3d43083f6d8d4d5187680d0d3560d54df4cc9d752005268b41e64d0" +checksum = "ec4c6225c69b4ca778c0aea097321a64c421cf4577b331c61b229267edabb6f8" dependencies = [ "heck 0.4.1", + "proc-macro-error", "proc-macro2", - "proc-macro2-diagnostics", "quote", "syn 2.0.117", ] -[[package]] -name = "parking" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" - [[package]] name = "parking_lot" version = "0.12.5" @@ -2230,15 +2332,6 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" -[[package]] -name = "pgvector" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc58e2d255979a31caa7cabfa7aac654af0354220719ab7a68520ae7a91e8c0b" -dependencies = [ - "serde", -] - [[package]] name = "pin-project" version = "1.1.11" @@ -2359,6 +2452,30 @@ dependencies = [ "toml_edit", ] +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + [[package]] name = "proc-macro-error-attr2" version = "2.0.0" @@ -2390,19 +2507,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "proc-macro2-diagnostics" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", - "version_check", - "yansi", -] - [[package]] name = "ptr_meta" version = "0.1.4" @@ -2545,7 +2649,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ "getrandom 0.2.17", "libredox", - "thiserror", + "thiserror 2.0.18", ] [[package]] @@ -2602,6 +2706,7 @@ dependencies = [ "bytes", "encoding_rs", "futures-core", + "futures-util", "h2 0.4.13", "http 1.4.0", "http-body", @@ -2623,12 +2728,14 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-native-tls", + "tokio-util", "tower", "tower-http", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", ] @@ -2695,6 +2802,20 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rusqlite" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "549b9d036d571d42e6e85d1c1425e2ac83491075078ca9a15be021c56b1641f2" +dependencies = [ + "bitflags", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "smallvec", +] + [[package]] name = "rust_decimal" version = "1.40.0" @@ -2817,19 +2938,17 @@ dependencies = [ [[package]] name = "sea-orm" -version = "1.1.19" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d945f62558fac19e5988680d2fdf747b734c2dbc6ce2cb81ba33ed8dde5b103" +checksum = "ea1fee0cf8528dbe6eda29d5798afc522a63b75e44c5b15721e6e64af9c7cc4b" dependencies = [ "async-stream", "async-trait", "bigdecimal", "chrono", - "derive_more 2.1.1", - "futures-util", + "futures", "log", "ouroboros", - "pgvector", "rust_decimal", "sea-orm-macros", "sea-query", @@ -2838,7 +2957,7 @@ dependencies = [ "serde_json", "sqlx", "strum", - "thiserror", + "thiserror 1.0.69", "time", "tracing", "url", @@ -2847,9 +2966,9 @@ dependencies = [ [[package]] name = "sea-orm-cli" -version = "1.1.19" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c94492e2ab6c045b4cc38013809ce255d14c3d352c9f0d11e6b920e2adc948ad" +checksum = "5f0b8869c75cf3fbb1bd860abb025033cd2e514c5f4fa43e792697cb1fe6c882" dependencies = [ "chrono", "clap", @@ -2857,8 +2976,6 @@ dependencies = [ "glob", "regex", "sea-schema", - "sqlx", - "tokio", "tracing", "tracing-subscriber", "url", @@ -2866,11 +2983,11 @@ dependencies = [ [[package]] name = "sea-orm-macros" -version = "1.1.19" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84c2e64a50a9cc8339f10a27577e10062c7f995488e469f2c95762c5ee847832" +checksum = "8737b566799ed0444f278d13c300c4c6f1a91782f60ff5825a591852d5502030" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "sea-bae", @@ -2880,13 +2997,14 @@ dependencies = [ [[package]] name = "sea-orm-migration" -version = "1.1.19" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7315c0cadb7e60fb17ee2bb282aa27d01911fc2a7e5836ec1d4ac37d19250bb4" +checksum = "216643749e26ce27ab6c51d3475f2692981d4a902d34455bcd322f412900df5c" dependencies = [ "async-trait", "clap", "dotenvy", + "futures", "sea-orm", "sea-orm-cli", "sea-schema", @@ -2896,12 +3014,13 @@ dependencies = [ [[package]] name = "sea-query" -version = "0.32.7" +version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a5d1c518eaf5eda38e5773f902b26ab6d5e9e9e2bb2349ca6c64cf96f80448c" +checksum = "b4fd043b8117af233e221f73e3ea8dfbc8e8c3c928017c474296db45c649105c" dependencies = [ "bigdecimal", "chrono", + "educe", "inherent", "ordered-float", "rust_decimal", @@ -2913,9 +3032,9 @@ dependencies = [ [[package]] name = "sea-query-binder" -version = "0.7.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0019f47430f7995af63deda77e238c17323359af241233ec768aba1faea7608" +checksum = "754965d4aee6145bec25d0898e5c931e6c22859789ce62fd85a42a15ed5a8ce3" dependencies = [ "bigdecimal", "chrono", @@ -2938,20 +3057,18 @@ dependencies = [ "proc-macro2", "quote", "syn 2.0.117", - "thiserror", + "thiserror 2.0.18", ] [[package]] name = "sea-schema" -version = "0.16.2" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2239ff574c04858ca77485f112afea1a15e53135d3097d0c86509cef1def1338" +checksum = "ad52149fc81836ea7424c3425d8f6ed8ad448dd16d2e4f6a3907ba46f3f2fd78" dependencies = [ "futures", "sea-query", - "sea-query-binder", "sea-schema-derive", - "sqlx", ] [[package]] @@ -3132,12 +3249,17 @@ dependencies = [ name = "shanty-data" version = "0.1.0" dependencies = [ + "chrono", + "futures-util", "reqwest", + "rusqlite", "serde", "serde_json", - "thiserror", + "tar", + "thiserror 2.0.18", "tokio", "tracing", + "xz2", ] [[package]] @@ -3149,7 +3271,7 @@ dependencies = [ "sea-orm-migration", "serde", "serde_json", - "thiserror", + "thiserror 2.0.18", "tokio", "tracing", ] @@ -3167,7 +3289,7 @@ dependencies = [ "serde_json", "shanty-db", "tempfile", - "thiserror", + "thiserror 2.0.18", "tokio", "tracing", "tracing-subscriber", @@ -3186,7 +3308,7 @@ dependencies = [ "serde", "shanty-db", "tempfile", - "thiserror", + "thiserror 2.0.18", "tokio", "tracing", "tracing-subscriber", @@ -3200,7 +3322,7 @@ dependencies = [ "clap", "serde", "shanty-db", - "thiserror", + "thiserror 2.0.18", "tracing", ] @@ -3217,7 +3339,7 @@ dependencies = [ "serde", "shanty-db", "tempfile", - "thiserror", + "thiserror 2.0.18", "tokio", "tracing", "tracing-subscriber", @@ -3231,7 +3353,7 @@ dependencies = [ "clap", "serde", "shanty-db", - "thiserror", + "thiserror 2.0.18", "tracing", ] @@ -3246,7 +3368,7 @@ dependencies = [ "serde_json", "shanty-data", "shanty-db", - "thiserror", + "thiserror 2.0.18", "tracing", ] @@ -3264,7 +3386,7 @@ dependencies = [ "shanty-data", "shanty-db", "shanty-tag", - "thiserror", + "thiserror 2.0.18", "tokio", "tracing", "tracing-subscriber", @@ -3277,7 +3399,7 @@ dependencies = [ "clap", "serde", "shanty-db", - "thiserror", + "thiserror 2.0.18", "tokio", "tracing", ] @@ -3300,7 +3422,7 @@ dependencies = [ "shanty-db", "strsim", "tempfile", - "thiserror", + "thiserror 2.0.18", "tokio", "tracing", "tracing-subscriber", @@ -3322,7 +3444,7 @@ dependencies = [ "shanty-db", "shanty-tag", "strsim", - "thiserror", + "thiserror 2.0.18", "tokio", "tracing", "tracing-subscriber", @@ -3363,7 +3485,7 @@ dependencies = [ "shanty-search", "shanty-tag", "shanty-watch", - "thiserror", + "thiserror 2.0.18", "tokio", "tokio-util", "tracing", @@ -3430,9 +3552,6 @@ name = "smallvec" version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" -dependencies = [ - "serde", -] [[package]] name = "socket2" @@ -3474,10 +3593,20 @@ dependencies = [ ] [[package]] -name = "sqlx" -version = "0.8.6" +name = "sqlformat" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fefb893899429669dcdd979aff487bd78f4064e5e7907e4269081e0ef7d97dc" +checksum = "7bba3a93db0cc4f7bdece8bb09e77e2e785c20bfebf79eb8340ed80708048790" +dependencies = [ + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlx" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e50c216e3624ec8e7ecd14c6a6a6370aad6ee5d8cfc3ab30b5162eeeef2ed33" dependencies = [ "sqlx-core", "sqlx-macros", @@ -3488,36 +3617,42 @@ dependencies = [ [[package]] name = "sqlx-core" -version = "0.8.6" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee6798b1838b6a0f69c007c133b8df5866302197e404e8b6ee8ed3e3a5e68dc6" +checksum = "8d6753e460c998bbd4cd8c6f0ed9a64346fcca0723d6e75e52fdc351c5d2169d" dependencies = [ - "base64 0.22.1", + "ahash 0.8.12", + "atoi", "bigdecimal", + "byteorder", "bytes", "chrono", "crc", "crossbeam-queue", + "dotenvy", "either", "event-listener", + "futures-channel", "futures-core", "futures-intrusive", "futures-io", "futures-util", - "hashbrown 0.15.5", "hashlink", + "hex", "indexmap", "log", "memchr", "native-tls", "once_cell", + "paste", "percent-encoding", "rust_decimal", "serde", "serde_json", "sha2", "smallvec", - "thiserror", + "sqlformat", + "thiserror 1.0.69", "time", "tokio", "tokio-stream", @@ -3528,26 +3663,26 @@ dependencies = [ [[package]] name = "sqlx-macros" -version = "0.8.6" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2d452988ccaacfbf5e0bdbc348fb91d7c8af5bee192173ac3636b5fb6e6715d" +checksum = "9a793bb3ba331ec8359c1853bd39eed32cdd7baaf22c35ccf5c92a7e8d1189ec" dependencies = [ "proc-macro2", "quote", "sqlx-core", "sqlx-macros-core", - "syn 2.0.117", + "syn 1.0.109", ] [[package]] name = "sqlx-macros-core" -version = "0.8.6" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19a9c1841124ac5a61741f96e1d9e2ec77424bf323962dd894bdb93f37d5219b" +checksum = "0a4ee1e104e00dedb6aa5ffdd1343107b0a4702e862a84320ee7cc74782d96fc" dependencies = [ "dotenvy", "either", - "heck 0.5.0", + "heck 0.4.1", "hex", "once_cell", "proc-macro2", @@ -3559,19 +3694,20 @@ dependencies = [ "sqlx-mysql", "sqlx-postgres", "sqlx-sqlite", - "syn 2.0.117", + "syn 1.0.109", + "tempfile", "tokio", "url", ] [[package]] name = "sqlx-mysql" -version = "0.8.6" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" +checksum = "864b869fdf56263f4c95c45483191ea0af340f9f3e3e7b4d57a61c7c87a970db" dependencies = [ "atoi", - "base64 0.22.1", + "base64 0.21.7", "bigdecimal", "bitflags", "byteorder", @@ -3604,7 +3740,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 1.0.69", "time", "tracing", "uuid", @@ -3613,12 +3749,12 @@ dependencies = [ [[package]] name = "sqlx-postgres" -version = "0.8.6" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" +checksum = "eb7ae0e6a97fb3ba33b23ac2671a5ce6e3cabe003f451abd5a56e7951d975624" dependencies = [ "atoi", - "base64 0.22.1", + "base64 0.21.7", "bigdecimal", "bitflags", "byteorder", @@ -3628,6 +3764,7 @@ dependencies = [ "etcetera", "futures-channel", "futures-core", + "futures-io", "futures-util", "hex", "hkdf", @@ -3643,11 +3780,12 @@ dependencies = [ "rust_decimal", "serde", "serde_json", + "sha1", "sha2", "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 1.0.69", "time", "tracing", "uuid", @@ -3656,9 +3794,9 @@ dependencies = [ [[package]] name = "sqlx-sqlite" -version = "0.8.6" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2d12fe70b2c1b4401038055f90f151b78208de1f9f89a7dbfd41587a10c3eea" +checksum = "d59dc83cf45d89c555a577694534fcd1b55c545a816c816ce51f20bbe56a4f3f" dependencies = [ "atoi", "chrono", @@ -3672,9 +3810,7 @@ dependencies = [ "log", "percent-encoding", "serde", - "serde_urlencoded", "sqlx-core", - "thiserror", "time", "tracing", "url", @@ -3791,6 +3927,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tar" +version = "0.4.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22692a6476a21fa75fdfc11d452fda482af402c008cdbaf3476414e122040973" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" version = "3.27.0" @@ -3804,13 +3951,33 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + [[package]] name = "thiserror" version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -4168,6 +4335,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "universal-hash" version = "0.5.1" @@ -4380,6 +4553,19 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasmparser" version = "0.244.0" @@ -4752,10 +4938,23 @@ dependencies = [ ] [[package]] -name = "yansi" -version = "1.0.1" +name = "xattr" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix", +] + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] [[package]] name = "yoke" diff --git a/shanty-config/src/lib.rs b/shanty-config/src/lib.rs index 16a51b0..f84f1e7 100644 --- a/shanty-config/src/lib.rs +++ b/shanty-config/src/lib.rs @@ -41,6 +41,9 @@ pub struct AppConfig { #[serde(default)] pub subsonic: SubsonicConfig, + + #[serde(default)] + pub musicbrainz: MusicBrainzConfig, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -166,6 +169,18 @@ pub struct SubsonicConfig { pub transcoding_enabled: bool, } +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct MusicBrainzConfig { + /// Path to the local MusicBrainz SQLite database (shanty-mb.db). + /// If None, only the remote API is used. + #[serde(default)] + pub local_db_path: Option, + + /// Automatically re-download and re-import dumps weekly. + #[serde(default)] + pub auto_update: bool, +} + impl Default for SubsonicConfig { fn default() -> Self { Self { @@ -203,6 +218,7 @@ impl Default for AppConfig { metadata: MetadataConfig::default(), scheduling: SchedulingConfig::default(), subsonic: SubsonicConfig::default(), + musicbrainz: MusicBrainzConfig::default(), } } } diff --git a/shanty-data/Cargo.toml b/shanty-data/Cargo.toml index 9297a81..3697c1d 100644 --- a/shanty-data/Cargo.toml +++ b/shanty-data/Cargo.toml @@ -11,7 +11,16 @@ serde_json = "1" thiserror = "2" tracing = "0.1" tokio = { version = "1", features = ["full"] } -reqwest = { version = "0.12", features = ["json"] } +reqwest = { version = "0.12", features = ["json", "stream"] } +futures-util = "0.3" +rusqlite = { version = "0.29", optional = true } +xz2 = { version = "0.1", optional = true } +tar = { version = "0.4", optional = true } +chrono = { version = "0.4", optional = true } + +[features] +default = ["local-mb"] +local-mb = ["rusqlite", "xz2", "tar", "chrono"] [dev-dependencies] tokio = { version = "1", features = ["full", "test-util"] } diff --git a/shanty-data/src/lib.rs b/shanty-data/src/lib.rs index 7f4c72c..5279c89 100644 --- a/shanty-data/src/lib.rs +++ b/shanty-data/src/lib.rs @@ -4,6 +4,12 @@ pub mod fanarttv; pub mod http; pub mod lastfm; pub mod lrclib; +#[cfg(feature = "local-mb")] +pub mod mb_hybrid; +#[cfg(feature = "local-mb")] +pub mod mb_import; +#[cfg(feature = "local-mb")] +pub mod mb_local; pub mod musicbrainz; pub mod traits; pub mod types; @@ -14,6 +20,10 @@ pub use error::{DataError, DataResult}; pub use fanarttv::FanartTvFetcher; pub use lastfm::{LastFmBioFetcher, LastFmSimilarFetcher}; pub use lrclib::LrclibFetcher; +#[cfg(feature = "local-mb")] +pub use mb_hybrid::HybridMusicBrainzFetcher; +#[cfg(feature = "local-mb")] +pub use mb_local::LocalMusicBrainzFetcher; pub use musicbrainz::MusicBrainzFetcher; pub use traits::*; pub use types::*; diff --git a/shanty-data/src/mb_hybrid.rs b/shanty-data/src/mb_hybrid.rs new file mode 100644 index 0000000..ff25e2c --- /dev/null +++ b/shanty-data/src/mb_hybrid.rs @@ -0,0 +1,171 @@ +//! Hybrid MusicBrainz fetcher: local DB first, API fallback. +//! +//! Tries the local SQLite database for instant lookups. If the local DB is not +//! configured, not available, or doesn't have the requested entity, falls back +//! to the rate-limited MusicBrainz API. + +use crate::error::DataResult; +use crate::mb_local::{LocalMbStats, LocalMusicBrainzFetcher}; +use crate::musicbrainz::MusicBrainzFetcher; +use crate::traits::MetadataFetcher; +use crate::types::{ + ArtistInfo, ArtistSearchResult, DiscographyEntry, RecordingDetails, RecordingMatch, + ReleaseGroupEntry, ReleaseMatch, ReleaseTrack, +}; + +/// A [`MetadataFetcher`] that tries a local MusicBrainz SQLite database first, +/// then falls back to the remote MusicBrainz API. +pub struct HybridMusicBrainzFetcher { + local: Option, + remote: MusicBrainzFetcher, +} + +impl HybridMusicBrainzFetcher { + /// Create a hybrid fetcher. If `local` is `None`, all queries go to the API. + pub fn new(local: Option, remote: MusicBrainzFetcher) -> Self { + Self { local, remote } + } + + /// Whether a local database is configured and has data. + pub fn has_local_db(&self) -> bool { + self.local.as_ref().is_some_and(|l| l.is_available()) + } + + /// Get stats from the local database (if available). + pub fn local_stats(&self) -> Option { + self.local + .as_ref() + .filter(|l| l.is_available()) + .map(|l| l.stats()) + } + + /// Get a reference to the underlying remote fetcher (for methods not on the trait). + pub fn remote(&self) -> &MusicBrainzFetcher { + &self.remote + } + + /// Returns a reference to the local fetcher if available and populated. + fn local_if_available(&self) -> Option<&LocalMusicBrainzFetcher> { + self.local.as_ref().filter(|l| l.is_available()) + } + + /// Look up an artist by MBID. Tries local first, then remote. + pub async fn get_artist_by_mbid(&self, mbid: &str) -> DataResult<(String, Option)> { + if let Some(local) = self.local_if_available() + && let Ok(result) = local.get_artist_by_mbid_sync(mbid) + { + return Ok(result); + } + self.remote.get_artist_by_mbid(mbid).await + } + + /// Get detailed artist info by MBID. Tries local first, then remote. + pub async fn get_artist_info(&self, mbid: &str) -> DataResult { + if let Some(local) = self.local_if_available() + && let Ok(result) = local.get_artist_info_sync(mbid) + { + return Ok(result); + } + self.remote.get_artist_info(mbid).await + } + + /// Get a clone of the rate limiter for sharing with other MB clients. + pub fn limiter(&self) -> crate::http::RateLimiter { + self.remote.limiter() + } +} + +/// Try a local search; returns `Some(results)` if non-empty, `None` to fall through. +async fn try_local_vec>>>( + f: F, +) -> Option>> { + let results = f.await; + match results { + Ok(ref r) if !r.is_empty() => Some(results), + _ => None, + } +} + +impl MetadataFetcher for HybridMusicBrainzFetcher { + async fn search_recording(&self, artist: &str, title: &str) -> DataResult> { + if let Some(local) = self.local_if_available() + && let Some(results) = try_local_vec(local.search_recording(artist, title)).await + { + return results; + } + self.remote.search_recording(artist, title).await + } + + async fn search_release(&self, artist: &str, album: &str) -> DataResult> { + if let Some(local) = self.local_if_available() + && let Some(results) = try_local_vec(local.search_release(artist, album)).await + { + return results; + } + self.remote.search_release(artist, album).await + } + + async fn get_recording(&self, mbid: &str) -> DataResult { + if let Some(local) = self.local_if_available() + && let Ok(result) = local.get_recording(mbid).await + { + return Ok(result); + } + self.remote.get_recording(mbid).await + } + + async fn search_artist(&self, query: &str, limit: u32) -> DataResult> { + if let Some(local) = self.local_if_available() + && let Some(results) = try_local_vec(local.search_artist(query, limit)).await + { + return results; + } + self.remote.search_artist(query, limit).await + } + + async fn get_artist_releases( + &self, + artist_mbid: &str, + limit: u32, + ) -> DataResult> { + if let Some(local) = self.local_if_available() + && let Some(results) = + try_local_vec(local.get_artist_releases(artist_mbid, limit)).await + { + return results; + } + self.remote.get_artist_releases(artist_mbid, limit).await + } + + async fn get_release_tracks(&self, release_mbid: &str) -> DataResult> { + if let Some(local) = self.local_if_available() + && let Ok(tracks) = local.get_release_tracks(release_mbid).await + { + return Ok(tracks); + } + self.remote.get_release_tracks(release_mbid).await + } + + async fn get_artist_release_groups( + &self, + artist_mbid: &str, + ) -> DataResult> { + if let Some(local) = self.local_if_available() + && let Some(results) = try_local_vec(local.get_artist_release_groups(artist_mbid)).await + { + return results; + } + self.remote.get_artist_release_groups(artist_mbid).await + } + + async fn resolve_release_from_group(&self, release_group_mbid: &str) -> DataResult { + if let Some(local) = self.local_if_available() + && let Ok(result) = local.resolve_release_from_group(release_group_mbid).await + { + return Ok(result); + } + self.remote + .resolve_release_from_group(release_group_mbid) + .await + } +} diff --git a/shanty-data/src/mb_import.rs b/shanty-data/src/mb_import.rs new file mode 100644 index 0000000..b48bfcc --- /dev/null +++ b/shanty-data/src/mb_import.rs @@ -0,0 +1,913 @@ +//! MusicBrainz JSON dump importer. +//! +//! Downloads and parses MusicBrainz JSON data dumps (`.tar.xz` files) into a +//! local SQLite database (`shanty-mb.db`) for instant, rate-limit-free lookups. +//! +//! Each dump file contains one JSON object per line. We stream-decompress the +//! tar archive, read entries line-by-line, extract the fields we need, and batch +//! INSERT into SQLite with periodic transaction commits. + +use std::io::BufRead; +use std::path::{Path, PathBuf}; + +use rusqlite::Connection; +use serde::Deserialize; +use tracing; + +/// Batch size for transaction commits during import. +const BATCH_SIZE: u64 = 10_000; + +/// Base URL for MusicBrainz JSON data dumps. +const DUMP_BASE_URL: &str = "https://data.metabrainz.org/pub/musicbrainz/data/json-dumps/"; + +/// Statistics from an import run. +#[derive(Debug, Clone, Default)] +pub struct ImportStats { + pub artists: u64, + pub artist_urls: u64, + pub release_groups: u64, + pub releases: u64, + pub tracks: u64, + pub recordings: u64, +} + +impl std::fmt::Display for ImportStats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Imported: {} artists, {} artist URLs, {} release groups, {} releases, {} tracks, {} recordings", + self.artists, + self.artist_urls, + self.release_groups, + self.releases, + self.tracks, + self.recordings + ) + } +} + +/// Create the SQLite schema for the local MusicBrainz database. +pub fn create_schema(conn: &Connection) -> Result<(), Box> { + conn.execute_batch( + " + CREATE TABLE IF NOT EXISTS mb_artists ( + mbid TEXT PRIMARY KEY, + name TEXT NOT NULL, + sort_name TEXT, + disambiguation TEXT, + artist_type TEXT, + country TEXT, + begin_year INTEGER + ); + CREATE INDEX IF NOT EXISTS idx_mb_artists_name ON mb_artists(name COLLATE NOCASE); + + CREATE TABLE IF NOT EXISTS mb_artist_urls ( + artist_mbid TEXT NOT NULL, + url TEXT NOT NULL, + link_type TEXT NOT NULL + ); + CREATE INDEX IF NOT EXISTS idx_mb_artist_urls_artist ON mb_artist_urls(artist_mbid); + + CREATE TABLE IF NOT EXISTS mb_release_groups ( + mbid TEXT PRIMARY KEY, + title TEXT NOT NULL, + artist_mbid TEXT, + primary_type TEXT, + secondary_types TEXT, + first_release_date TEXT + ); + CREATE INDEX IF NOT EXISTS idx_mb_rg_artist ON mb_release_groups(artist_mbid); + + CREATE TABLE IF NOT EXISTS mb_releases ( + mbid TEXT PRIMARY KEY, + title TEXT NOT NULL, + release_group_mbid TEXT, + artist_mbid TEXT, + date TEXT, + country TEXT, + status TEXT + ); + CREATE INDEX IF NOT EXISTS idx_mb_releases_rg ON mb_releases(release_group_mbid); + CREATE INDEX IF NOT EXISTS idx_mb_releases_artist ON mb_releases(artist_mbid); + + CREATE TABLE IF NOT EXISTS mb_tracks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + release_mbid TEXT NOT NULL, + recording_mbid TEXT NOT NULL, + title TEXT NOT NULL, + track_number INTEGER, + disc_number INTEGER, + duration_ms INTEGER, + position INTEGER + ); + CREATE INDEX IF NOT EXISTS idx_mb_tracks_release ON mb_tracks(release_mbid); + CREATE INDEX IF NOT EXISTS idx_mb_tracks_recording ON mb_tracks(recording_mbid); + + CREATE TABLE IF NOT EXISTS mb_recordings ( + mbid TEXT PRIMARY KEY, + title TEXT NOT NULL, + artist_mbid TEXT, + duration_ms INTEGER + ); + CREATE INDEX IF NOT EXISTS idx_mb_recordings_artist ON mb_recordings(artist_mbid); + CREATE INDEX IF NOT EXISTS idx_mb_recordings_title ON mb_recordings(title COLLATE NOCASE); + + CREATE TABLE IF NOT EXISTS mb_import_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL + ); + ", + )?; + Ok(()) +} + +// --- JSON structures matching MusicBrainz dump format --- + +#[derive(Deserialize)] +struct DumpArtist { + id: String, + name: String, + #[serde(rename = "sort-name")] + sort_name: Option, + disambiguation: Option, + #[serde(rename = "type")] + artist_type: Option, + country: Option, + #[serde(rename = "life-span")] + life_span: Option, + relations: Option>, +} + +#[derive(Deserialize)] +struct DumpLifeSpan { + begin: Option, +} + +#[derive(Deserialize)] +struct DumpRelation { + #[serde(rename = "type")] + relation_type: String, + url: Option, +} + +#[derive(Deserialize)] +struct DumpRelationUrl { + resource: String, +} + +#[derive(Deserialize)] +struct DumpReleaseGroup { + id: String, + title: String, + #[serde(rename = "primary-type")] + primary_type: Option, + #[serde(rename = "secondary-types", default)] + secondary_types: Option>, + #[serde(rename = "first-release-date")] + first_release_date: Option, + #[serde(rename = "artist-credit")] + artist_credit: Option>, +} + +#[derive(Deserialize)] +struct DumpRelease { + id: String, + title: String, + #[serde(rename = "release-group")] + release_group: Option, + date: Option, + country: Option, + status: Option, + #[serde(rename = "artist-credit")] + artist_credit: Option>, + media: Option>, +} + +#[derive(Deserialize)] +struct DumpReleaseGroupRef { + id: String, +} + +#[derive(Deserialize)] +struct DumpMedia { + position: Option, + tracks: Option>, +} + +#[derive(Deserialize)] +struct DumpTrack { + position: Option, + title: String, + length: Option, + recording: Option, +} + +#[derive(Deserialize)] +struct DumpTrackRecording { + id: String, +} + +#[derive(Deserialize)] +struct DumpRecording { + id: String, + title: String, + length: Option, + #[serde(rename = "artist-credit")] + artist_credit: Option>, +} + +#[derive(Deserialize)] +struct DumpArtistCredit { + artist: DumpArtistRef, +} + +#[derive(Deserialize)] +struct DumpArtistRef { + id: String, +} + +/// Extract the primary artist MBID from artist credits. +fn primary_artist_mbid(credits: &Option>) -> Option { + credits + .as_ref() + .and_then(|c| c.first().map(|ac| ac.artist.id.clone())) +} + +/// Extract begin year from a life-span date string like "1990" or "1990-05-14". +fn extract_begin_year(life_span: &Option) -> Option { + life_span + .as_ref() + .and_then(|ls| ls.begin.as_ref()) + .and_then(|d| d.split('-').next()) + .and_then(|y| y.parse::().ok()) +} + +/// Import artists from a line-delimited JSON reader. +/// +/// Each line should be a complete JSON object matching the MusicBrainz artist dump format. +/// Returns the number of artists imported. +pub fn import_artists( + conn: &Connection, + reader: impl BufRead, + progress: impl Fn(u64), +) -> Result> { + // Clear existing data for clean re-import + conn.execute("DELETE FROM mb_artist_urls", [])?; + conn.execute("DELETE FROM mb_artists", [])?; + + let mut count: u64 = 0; + let mut url_count: u64 = 0; + let tx = conn.unchecked_transaction()?; + + for line_result in reader.lines() { + let line = match line_result { + Ok(l) => l, + Err(e) => { + tracing::warn!(error = %e, "skipping unreadable line"); + continue; + } + }; + let line = line.trim(); + if line.is_empty() { + continue; + } + + let artist: DumpArtist = match serde_json::from_str(line) { + Ok(a) => a, + Err(e) => { + tracing::trace!(error = %e, "skipping malformed artist JSON line"); + continue; + } + }; + + let begin_year = extract_begin_year(&artist.life_span); + + tx.execute( + "INSERT OR REPLACE INTO mb_artists (mbid, name, sort_name, disambiguation, artist_type, country, begin_year) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", + rusqlite::params![ + artist.id, + artist.name, + artist.sort_name, + artist.disambiguation, + artist.artist_type, + artist.country, + begin_year, + ], + )?; + + // Insert URL relations + if let Some(relations) = artist.relations { + for rel in relations { + if let Some(url) = rel.url { + tx.execute( + "INSERT INTO mb_artist_urls (artist_mbid, url, link_type) VALUES (?1, ?2, ?3)", + rusqlite::params![artist.id, url.resource, rel.relation_type], + )?; + url_count += 1; + } + } + } + + count += 1; + if count.is_multiple_of(BATCH_SIZE) { + progress(count); + } + } + + tx.commit()?; + progress(count); + + // Store URL count in import meta + conn.execute( + "INSERT OR REPLACE INTO mb_import_meta (key, value) VALUES ('artist_url_count', ?1)", + rusqlite::params![url_count.to_string()], + )?; + + Ok(count) +} + +/// Import release groups from a line-delimited JSON reader. +pub fn import_release_groups( + conn: &Connection, + reader: impl BufRead, + progress: impl Fn(u64), +) -> Result> { + conn.execute("DELETE FROM mb_release_groups", [])?; + + let mut count: u64 = 0; + let tx = conn.unchecked_transaction()?; + + for line_result in reader.lines() { + let line = match line_result { + Ok(l) => l, + Err(e) => { + tracing::warn!(error = %e, "skipping unreadable line"); + continue; + } + }; + let line = line.trim(); + if line.is_empty() { + continue; + } + + let rg: DumpReleaseGroup = match serde_json::from_str(line) { + Ok(r) => r, + Err(e) => { + tracing::trace!(error = %e, "skipping malformed release-group JSON line"); + continue; + } + }; + + let artist_mbid = primary_artist_mbid(&rg.artist_credit); + let secondary_types = rg + .secondary_types + .as_ref() + .map(|st| serde_json::to_string(st).unwrap_or_default()); + + tx.execute( + "INSERT OR REPLACE INTO mb_release_groups (mbid, title, artist_mbid, primary_type, secondary_types, first_release_date) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + rusqlite::params![ + rg.id, + rg.title, + artist_mbid, + rg.primary_type, + secondary_types, + rg.first_release_date, + ], + )?; + + count += 1; + if count.is_multiple_of(BATCH_SIZE) { + progress(count); + } + } + + tx.commit()?; + progress(count); + Ok(count) +} + +/// Import releases (and their tracks) from a line-delimited JSON reader. +pub fn import_releases( + conn: &Connection, + reader: impl BufRead, + progress: impl Fn(u64), +) -> Result> { + conn.execute("DELETE FROM mb_tracks", [])?; + conn.execute("DELETE FROM mb_releases", [])?; + + let mut count: u64 = 0; + let mut track_count: u64 = 0; + let tx = conn.unchecked_transaction()?; + + for line_result in reader.lines() { + let line = match line_result { + Ok(l) => l, + Err(e) => { + tracing::warn!(error = %e, "skipping unreadable line"); + continue; + } + }; + let line = line.trim(); + if line.is_empty() { + continue; + } + + let release: DumpRelease = match serde_json::from_str(line) { + Ok(r) => r, + Err(e) => { + tracing::trace!(error = %e, "skipping malformed release JSON line"); + continue; + } + }; + + let rg_mbid = release.release_group.as_ref().map(|rg| &rg.id); + let artist_mbid = primary_artist_mbid(&release.artist_credit); + + tx.execute( + "INSERT OR REPLACE INTO mb_releases (mbid, title, release_group_mbid, artist_mbid, date, country, status) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", + rusqlite::params![ + release.id, + release.title, + rg_mbid, + artist_mbid, + release.date, + release.country, + release.status, + ], + )?; + + // Insert tracks from media + if let Some(media) = release.media { + for medium in media { + let disc_number = medium.position; + if let Some(tracks) = medium.tracks { + for track in tracks { + let recording_mbid = track + .recording + .as_ref() + .map(|r| r.id.as_str()) + .unwrap_or(""); + if recording_mbid.is_empty() { + continue; + } + tx.execute( + "INSERT INTO mb_tracks (release_mbid, recording_mbid, title, track_number, disc_number, duration_ms, position) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", + rusqlite::params![ + release.id, + recording_mbid, + track.title, + track.position, + disc_number, + track.length, + track.position, + ], + )?; + track_count += 1; + } + } + } + } + + count += 1; + if count.is_multiple_of(BATCH_SIZE) { + progress(count); + } + } + + tx.commit()?; + progress(count); + + // Store track count in import meta + conn.execute( + "INSERT OR REPLACE INTO mb_import_meta (key, value) VALUES ('track_count', ?1)", + rusqlite::params![track_count.to_string()], + )?; + + Ok(count) +} + +/// Import recordings from a line-delimited JSON reader. +pub fn import_recordings( + conn: &Connection, + reader: impl BufRead, + progress: impl Fn(u64), +) -> Result> { + conn.execute("DELETE FROM mb_recordings", [])?; + + let mut count: u64 = 0; + let tx = conn.unchecked_transaction()?; + + for line_result in reader.lines() { + let line = match line_result { + Ok(l) => l, + Err(e) => { + tracing::warn!(error = %e, "skipping unreadable line"); + continue; + } + }; + let line = line.trim(); + if line.is_empty() { + continue; + } + + let recording: DumpRecording = match serde_json::from_str(line) { + Ok(r) => r, + Err(e) => { + tracing::trace!(error = %e, "skipping malformed recording JSON line"); + continue; + } + }; + + let artist_mbid = primary_artist_mbid(&recording.artist_credit); + + tx.execute( + "INSERT OR REPLACE INTO mb_recordings (mbid, title, artist_mbid, duration_ms) VALUES (?1, ?2, ?3, ?4)", + rusqlite::params![ + recording.id, + recording.title, + artist_mbid, + recording.length, + ], + )?; + + count += 1; + if count.is_multiple_of(BATCH_SIZE) { + progress(count); + } + } + + tx.commit()?; + progress(count); + Ok(count) +} + +/// Discover the latest dump folder timestamp from the MB server. +/// +/// Fetches the directory listing and finds the `latest-is-{TIMESTAMP}` file. +pub async fn discover_latest_dump_folder() +-> Result> { + let client = reqwest::Client::builder() + .user_agent("Shanty/0.1.0 (shanty-music-app)") + .timeout(std::time::Duration::from_secs(30)) + .build()?; + + let resp = client.get(DUMP_BASE_URL).send().await?; + if !resp.status().is_success() { + return Err(format!("HTTP {} fetching dump listing", resp.status()).into()); + } + + let body = resp.text().await?; + + // Parse the HTML directory listing to find "latest-is-YYYYMMDD-HHMMSS" + // The listing contains links like: + let latest = body + .split("latest-is-") + .nth(1) + .and_then(|s| s.split('"').next()) + .map(|s| s.trim_end_matches('/').to_string()); + + match latest { + Some(timestamp) => { + tracing::info!(timestamp = %timestamp, "discovered latest MB dump folder"); + Ok(timestamp) + } + None => Err("could not find latest dump folder in directory listing".into()), + } +} + +/// Download a MusicBrainz JSON dump file and return the path it was saved to. +/// +/// Downloads from `https://data.metabrainz.org/pub/musicbrainz/data/json-dumps/{timestamp}/{filename}`. +/// The `timestamp` is the dated folder name (e.g., "20260321-001002"). +pub async fn download_dump( + filename: &str, + timestamp: &str, + target_dir: &Path, + progress: impl Fn(&str), +) -> Result> { + let url = format!("{DUMP_BASE_URL}{timestamp}/{filename}"); + let target_path = target_dir.join(filename); + + progress(&format!("Downloading {filename}...")); + tracing::info!(url = %url, target = %target_path.display(), "downloading MB dump"); + + let client = reqwest::Client::builder() + .user_agent("Shanty/0.1.0 (shanty-music-app)") + .timeout(std::time::Duration::from_secs(3600)) // 1 hour timeout for large files + .build()?; + + let resp = client.get(&url).send().await?; + if !resp.status().is_success() { + return Err(format!("HTTP {} downloading {url}", resp.status()).into()); + } + + // Stream to disk — don't buffer the whole file in memory + std::fs::create_dir_all(target_dir)?; + let mut file = tokio::fs::File::create(&target_path).await?; + let mut stream = resp.bytes_stream(); + let mut downloaded: u64 = 0; + let mut last_report: u64 = 0; + + use futures_util::StreamExt; + use tokio::io::AsyncWriteExt; + + while let Some(chunk) = stream.next().await { + let chunk = chunk?; + file.write_all(&chunk).await?; + downloaded += chunk.len() as u64; + + // Report progress every ~50 MB + if downloaded - last_report > 50 * 1_048_576 { + let mb = downloaded / 1_048_576; + progress(&format!("Downloading {filename}... {mb} MB")); + last_report = downloaded; + } + } + file.flush().await?; + + let size_mb = downloaded / 1_048_576; + progress(&format!("Downloaded {filename} ({size_mb} MB)")); + tracing::info!( + file = %target_path.display(), + size_mb = size_mb, + "download complete" + ); + + Ok(target_path) +} + +/// Open a `.tar.xz` file and return a buffered reader for the data entry. +/// +/// MusicBrainz dump archives contain metadata files (TIMESTAMP, COPYING, etc.) +/// followed by the actual data at `mbdump/{entity_name}`. This skips to the +/// `mbdump/` entry and returns a streaming reader — no buffering into memory. +/// +/// IMPORTANT: The returned reader borrows from the archive internals. Because +/// tar::Entry borrows the archive, we can't return it directly. Instead we use +/// a helper that owns the archive and provides line-by-line iteration. +pub fn import_from_tar_xz( + path: &Path, + conn: &Connection, + entity_type: &str, + progress: impl Fn(u64), +) -> Result> { + let file = std::fs::File::open(path)?; + let xz_reader = xz2::read::XzDecoder::new(std::io::BufReader::with_capacity(64 * 1024, file)); + let mut archive = tar::Archive::new(xz_reader); + + // Find the mbdump/* entry (skip TIMESTAMP, COPYING, README, etc.) + for entry_result in archive.entries()? { + let entry = entry_result?; + let entry_path = entry.path()?.to_string_lossy().to_string(); + + if entry_path.starts_with("mbdump/") { + tracing::info!(entry = %entry_path, "found data entry in tar archive"); + let reader = std::io::BufReader::with_capacity(256 * 1024, entry); + + // Dispatch to the right importer based on entity type + return match entity_type { + "artist" => import_artists(conn, reader, progress), + "release-group" => import_release_groups(conn, reader, progress), + "release" => import_releases(conn, reader, progress), + "recording" => import_recordings(conn, reader, progress), + _ => Err(format!("unknown entity type: {entity_type}").into()), + }; + } + } + + Err(format!("no mbdump/ entry found in {}", path.display()).into()) +} + +/// Run a full import of all dump files from a directory. +/// +/// Expects `artist.tar.xz`, `release-group.tar.xz`, `release.tar.xz`, and +/// `recording.tar.xz` to exist in `dump_dir`. +pub fn run_import( + conn: &Connection, + dump_dir: &Path, + progress: impl Fn(&str), +) -> Result> { + create_schema(conn)?; + + // Optimize for bulk import + conn.execute_batch( + "PRAGMA journal_mode = WAL; + PRAGMA synchronous = NORMAL; + PRAGMA cache_size = -64000; + PRAGMA temp_store = MEMORY; + PRAGMA foreign_keys = OFF;", + )?; + + let mut stats = ImportStats::default(); + + // Import artists + let artist_path = dump_dir.join("artist.tar.xz"); + if artist_path.exists() { + progress("Importing artists..."); + stats.artists = import_from_tar_xz(&artist_path, conn, "artist", |n| { + if n % 100_000 == 0 { + progress(&format!("Artists: {n}...")); + } + })?; + progress(&format!("Artists: {} done", stats.artists)); + } else { + tracing::warn!(path = %artist_path.display(), "artist dump not found, skipping"); + } + + // Import release groups + let rg_path = dump_dir.join("release-group.tar.xz"); + if rg_path.exists() { + progress("Importing release groups..."); + stats.release_groups = import_from_tar_xz(&rg_path, conn, "release-group", |n| { + if n % 100_000 == 0 { + progress(&format!("Release groups: {n}...")); + } + })?; + progress(&format!("Release groups: {} done", stats.release_groups)); + } else { + tracing::warn!(path = %rg_path.display(), "release-group dump not found, skipping"); + } + + // Import releases (and tracks) + let release_path = dump_dir.join("release.tar.xz"); + if release_path.exists() { + progress("Importing releases..."); + stats.releases = import_from_tar_xz(&release_path, conn, "release", |n| { + if n % 100_000 == 0 { + progress(&format!("Releases: {n}...")); + } + })?; + // Read track count from meta + if let Ok(tc) = conn.query_row( + "SELECT value FROM mb_import_meta WHERE key = 'track_count'", + [], + |row| row.get::<_, String>(0), + ) { + stats.tracks = tc.parse().unwrap_or(0); + } + progress(&format!( + "Releases: {} done ({} tracks)", + stats.releases, stats.tracks + )); + } else { + tracing::warn!(path = %release_path.display(), "release dump not found, skipping"); + } + + // Import recordings + let recording_path = dump_dir.join("recording.tar.xz"); + if recording_path.exists() { + progress("Importing recordings..."); + stats.recordings = import_from_tar_xz(&recording_path, conn, "recording", |n| { + if n % 100_000 == 0 { + progress(&format!("Recordings: {n}...")); + } + })?; + progress(&format!("Recordings: {} done", stats.recordings)); + } else { + tracing::warn!(path = %recording_path.display(), "recording dump not found, skipping"); + } + + // Read artist URL count from meta + if let Ok(uc) = conn.query_row( + "SELECT value FROM mb_import_meta WHERE key = 'artist_url_count'", + [], + |row| row.get::<_, String>(0), + ) { + stats.artist_urls = uc.parse().unwrap_or(0); + } + + // Record import timestamp + let now = chrono::Utc::now().to_rfc3339(); + conn.execute( + "INSERT OR REPLACE INTO mb_import_meta (key, value) VALUES ('last_import_date', ?1)", + rusqlite::params![now], + )?; + + // Record entity counts + conn.execute( + "INSERT OR REPLACE INTO mb_import_meta (key, value) VALUES ('artist_count', ?1)", + rusqlite::params![stats.artists.to_string()], + )?; + conn.execute( + "INSERT OR REPLACE INTO mb_import_meta (key, value) VALUES ('release_group_count', ?1)", + rusqlite::params![stats.release_groups.to_string()], + )?; + conn.execute( + "INSERT OR REPLACE INTO mb_import_meta (key, value) VALUES ('release_count', ?1)", + rusqlite::params![stats.releases.to_string()], + )?; + conn.execute( + "INSERT OR REPLACE INTO mb_import_meta (key, value) VALUES ('recording_count', ?1)", + rusqlite::params![stats.recordings.to_string()], + )?; + + progress(&format!("Import complete: {stats}")); + Ok(stats) +} + +/// The dump filenames to download. +pub const DUMP_FILES: &[&str] = &[ + "artist.tar.xz", + "release-group.tar.xz", + "release.tar.xz", + "recording.tar.xz", +]; + +/// High-level import function: opens the database, runs import, closes it. +/// +/// This is the main entry point for external callers that don't want to manage +/// a `rusqlite::Connection` directly. +pub fn run_import_at_path( + db_path: &Path, + dump_dir: &Path, + progress: impl Fn(&str), +) -> Result> { + if let Some(parent) = db_path.parent() { + std::fs::create_dir_all(parent)?; + } + let conn = Connection::open(db_path)?; + run_import(&conn, dump_dir, progress) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_schema() { + let conn = Connection::open_in_memory().unwrap(); + create_schema(&conn).unwrap(); + // Verify tables exist + let count: i32 = conn + .query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name LIKE 'mb_%'", + [], + |row| row.get(0), + ) + .unwrap(); + assert!(count >= 6, "Expected at least 6 mb_ tables, got {count}"); + } + + #[test] + fn test_import_artists_empty() { + let conn = Connection::open_in_memory().unwrap(); + create_schema(&conn).unwrap(); + let reader = std::io::BufReader::new(std::io::Cursor::new(b"")); + let count = import_artists(&conn, reader, |_| {}).unwrap(); + assert_eq!(count, 0); + } + + #[test] + fn test_import_single_artist() { + let conn = Connection::open_in_memory().unwrap(); + create_schema(&conn).unwrap(); + + let json = r#"{"id":"some-uuid","name":"Test Artist","sort-name":"Artist, Test","disambiguation":"test","type":"Person","country":"US","life-span":{"begin":"1990-05-01"},"relations":[{"type":"wikipedia","url":{"resource":"https://en.wikipedia.org/wiki/Test"}}]}"#; + let reader = std::io::BufReader::new(std::io::Cursor::new(json.as_bytes())); + let count = import_artists(&conn, reader, |_| {}).unwrap(); + assert_eq!(count, 1); + + // Verify artist was inserted + let name: String = conn + .query_row( + "SELECT name FROM mb_artists WHERE mbid = 'some-uuid'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(name, "Test Artist"); + + // Verify URL was inserted + let url_count: i32 = conn + .query_row( + "SELECT COUNT(*) FROM mb_artist_urls WHERE artist_mbid = 'some-uuid'", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(url_count, 1); + } + + #[test] + fn test_import_release_groups() { + let conn = Connection::open_in_memory().unwrap(); + create_schema(&conn).unwrap(); + + let json = r#"{"id":"rg-uuid","title":"Test Album","primary-type":"Album","secondary-types":["Compilation"],"first-release-date":"2020-01-15","artist-credit":[{"artist":{"id":"artist-uuid","name":"Test Artist"}}]}"#; + let reader = std::io::BufReader::new(std::io::Cursor::new(json.as_bytes())); + let count = import_release_groups(&conn, reader, |_| {}).unwrap(); + assert_eq!(count, 1); + } + + #[test] + fn test_import_recordings() { + let conn = Connection::open_in_memory().unwrap(); + create_schema(&conn).unwrap(); + + let json = r#"{"id":"rec-uuid","title":"Test Recording","length":240000,"artist-credit":[{"artist":{"id":"artist-uuid","name":"Test Artist"}}]}"#; + let reader = std::io::BufReader::new(std::io::Cursor::new(json.as_bytes())); + let count = import_recordings(&conn, reader, |_| {}).unwrap(); + assert_eq!(count, 1); + } +} diff --git a/shanty-data/src/mb_local.rs b/shanty-data/src/mb_local.rs new file mode 100644 index 0000000..389db47 --- /dev/null +++ b/shanty-data/src/mb_local.rs @@ -0,0 +1,583 @@ +//! Local MusicBrainz database fetcher. +//! +//! Implements [`MetadataFetcher`] backed by a local SQLite database (populated +//! via [`crate::mb_import`]). All queries are instant local lookups — no rate +//! limiting needed. + +use std::sync::Mutex; + +use rusqlite::Connection; + +use crate::error::{DataError, DataResult}; +use crate::traits::MetadataFetcher; +use crate::types::{ + ArtistInfo, ArtistSearchResult, ArtistUrl, DiscographyEntry, RecordingDetails, RecordingMatch, + ReleaseGroupEntry, ReleaseMatch, ReleaseRef, ReleaseTrack, +}; + +/// Statistics about the local MusicBrainz database. +#[derive(Debug, Clone, Default, serde::Serialize)] +pub struct LocalMbStats { + pub artists: u64, + pub release_groups: u64, + pub releases: u64, + pub recordings: u64, + pub tracks: u64, + pub last_import_date: Option, +} + +/// A [`MetadataFetcher`] backed by a local SQLite database. +pub struct LocalMusicBrainzFetcher { + conn: Mutex, +} + +impl LocalMusicBrainzFetcher { + /// Open (or create) a local MusicBrainz SQLite database. + pub fn new(db_path: &str) -> Result> { + let conn = Connection::open(db_path)?; + conn.execute_batch("PRAGMA journal_mode = WAL; PRAGMA cache_size = -16000;")?; + Ok(Self { + conn: Mutex::new(conn), + }) + } + + /// Check whether the database has been populated with data. + pub fn is_available(&self) -> bool { + let conn = self.conn.lock().unwrap(); + // Check if the mb_artists table exists and has rows + conn.query_row( + "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='mb_artists'", + [], + |row| row.get::<_, i32>(0), + ) + .map(|c| c > 0) + .unwrap_or(false) + && conn + .query_row("SELECT COUNT(*) FROM mb_artists LIMIT 1", [], |row| { + row.get::<_, i32>(0) + }) + .unwrap_or(0) + > 0 + } + + /// Get statistics about the imported data. + pub fn stats(&self) -> LocalMbStats { + let conn = self.conn.lock().unwrap(); + let get_meta = |key: &str| -> Option { + conn.query_row( + "SELECT value FROM mb_import_meta WHERE key = ?1", + rusqlite::params![key], + |row| row.get(0), + ) + .ok() + }; + + LocalMbStats { + artists: get_meta("artist_count") + .and_then(|s| s.parse().ok()) + .unwrap_or(0), + release_groups: get_meta("release_group_count") + .and_then(|s| s.parse().ok()) + .unwrap_or(0), + releases: get_meta("release_count") + .and_then(|s| s.parse().ok()) + .unwrap_or(0), + recordings: get_meta("recording_count") + .and_then(|s| s.parse().ok()) + .unwrap_or(0), + tracks: get_meta("track_count") + .and_then(|s| s.parse().ok()) + .unwrap_or(0), + last_import_date: get_meta("last_import_date"), + } + } + + /// Look up an artist by MBID (returns name and disambiguation). + pub fn get_artist_by_mbid_sync(&self, mbid: &str) -> DataResult<(String, Option)> { + let conn = self.conn.lock().unwrap(); + let result = conn.query_row( + "SELECT name, disambiguation FROM mb_artists WHERE mbid = ?1", + rusqlite::params![mbid], + |row| { + let name: String = row.get(0)?; + let disambiguation: Option = row.get(1)?; + Ok((name, disambiguation.filter(|s| !s.is_empty()))) + }, + ); + match result { + Ok(r) => Ok(r), + Err(rusqlite::Error::QueryReturnedNoRows) => { + Err(DataError::Other(format!("artist {mbid} not found locally"))) + } + Err(e) => Err(DataError::Other(e.to_string())), + } + } + + /// Look up detailed artist info by MBID, including URLs. + pub fn get_artist_info_sync(&self, mbid: &str) -> DataResult { + let conn = self.conn.lock().unwrap(); + let artist = conn.query_row( + "SELECT name, disambiguation, country, artist_type, begin_year FROM mb_artists WHERE mbid = ?1", + rusqlite::params![mbid], + |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, Option>(1)?, + row.get::<_, Option>(2)?, + row.get::<_, Option>(3)?, + row.get::<_, Option>(4)?, + )) + }, + ); + + let (name, disambiguation, country, artist_type, begin_year) = match artist { + Ok(a) => a, + Err(rusqlite::Error::QueryReturnedNoRows) => { + return Err(DataError::Other(format!("artist {mbid} not found locally"))); + } + Err(e) => return Err(DataError::Other(e.to_string())), + }; + + // Fetch URLs + let mut url_stmt = conn + .prepare("SELECT url, link_type FROM mb_artist_urls WHERE artist_mbid = ?1") + .map_err(|e| DataError::Other(e.to_string()))?; + let urls: Vec = url_stmt + .query_map(rusqlite::params![mbid], |row| { + Ok(ArtistUrl { + url: row.get(0)?, + link_type: row.get(1)?, + }) + }) + .map_err(|e| DataError::Other(e.to_string()))? + .filter_map(|r| r.ok()) + .collect(); + + Ok(ArtistInfo { + name, + mbid: Some(mbid.to_string()), + disambiguation: disambiguation.filter(|s| !s.is_empty()), + country: country.filter(|s| !s.is_empty()), + artist_type, + begin_year: begin_year.map(|y| y.to_string()), + urls, + }) + } +} + +impl MetadataFetcher for LocalMusicBrainzFetcher { + async fn search_recording(&self, artist: &str, title: &str) -> DataResult> { + let conn = self.conn.lock().unwrap(); + + let query = if artist.is_empty() { + let pattern = format!("%{title}%"); + let mut stmt = conn + .prepare( + "SELECT r.mbid, r.title, r.artist_mbid, a.name + FROM mb_recordings r + LEFT JOIN mb_artists a ON r.artist_mbid = a.mbid + WHERE r.title LIKE ?1 COLLATE NOCASE + LIMIT 10", + ) + .map_err(|e| DataError::Other(e.to_string()))?; + stmt.query_map(rusqlite::params![pattern], |row| { + Ok(RecordingMatch { + mbid: row.get(0)?, + title: row.get(1)?, + artist_mbid: row.get(2)?, + artist: row + .get::<_, Option>(3)? + .unwrap_or_else(|| "Unknown Artist".into()), + releases: vec![], + score: 100, + }) + }) + .map_err(|e| DataError::Other(e.to_string()))? + .filter_map(|r| r.ok()) + .collect() + } else { + let artist_pattern = format!("%{artist}%"); + let title_pattern = format!("%{title}%"); + let mut stmt = conn + .prepare( + "SELECT r.mbid, r.title, r.artist_mbid, a.name + FROM mb_recordings r + LEFT JOIN mb_artists a ON r.artist_mbid = a.mbid + WHERE r.title LIKE ?1 COLLATE NOCASE + AND a.name LIKE ?2 COLLATE NOCASE + LIMIT 10", + ) + .map_err(|e| DataError::Other(e.to_string()))?; + stmt.query_map(rusqlite::params![title_pattern, artist_pattern], |row| { + Ok(RecordingMatch { + mbid: row.get(0)?, + title: row.get(1)?, + artist_mbid: row.get(2)?, + artist: row + .get::<_, Option>(3)? + .unwrap_or_else(|| "Unknown Artist".into()), + releases: vec![], + score: 100, + }) + }) + .map_err(|e| DataError::Other(e.to_string()))? + .filter_map(|r| r.ok()) + .collect() + }; + + Ok(query) + } + + async fn search_release(&self, artist: &str, album: &str) -> DataResult> { + let conn = self.conn.lock().unwrap(); + + let results = if artist.is_empty() { + let pattern = format!("%{album}%"); + let mut stmt = conn + .prepare( + "SELECT r.mbid, r.title, r.artist_mbid, a.name, r.date + FROM mb_releases r + LEFT JOIN mb_artists a ON r.artist_mbid = a.mbid + WHERE r.title LIKE ?1 COLLATE NOCASE + LIMIT 10", + ) + .map_err(|e| DataError::Other(e.to_string()))?; + stmt.query_map(rusqlite::params![pattern], |row| { + Ok(ReleaseMatch { + mbid: row.get(0)?, + title: row.get(1)?, + artist_mbid: row.get(2)?, + artist: row + .get::<_, Option>(3)? + .unwrap_or_else(|| "Unknown Artist".into()), + date: row.get(4)?, + track_count: None, + score: 100, + }) + }) + .map_err(|e| DataError::Other(e.to_string()))? + .filter_map(|r| r.ok()) + .collect() + } else { + let artist_pattern = format!("%{artist}%"); + let album_pattern = format!("%{album}%"); + let mut stmt = conn + .prepare( + "SELECT r.mbid, r.title, r.artist_mbid, a.name, r.date + FROM mb_releases r + LEFT JOIN mb_artists a ON r.artist_mbid = a.mbid + WHERE r.title LIKE ?1 COLLATE NOCASE + AND a.name LIKE ?2 COLLATE NOCASE + LIMIT 10", + ) + .map_err(|e| DataError::Other(e.to_string()))?; + stmt.query_map(rusqlite::params![album_pattern, artist_pattern], |row| { + Ok(ReleaseMatch { + mbid: row.get(0)?, + title: row.get(1)?, + artist_mbid: row.get(2)?, + artist: row + .get::<_, Option>(3)? + .unwrap_or_else(|| "Unknown Artist".into()), + date: row.get(4)?, + track_count: None, + score: 100, + }) + }) + .map_err(|e| DataError::Other(e.to_string()))? + .filter_map(|r| r.ok()) + .collect() + }; + + Ok(results) + } + + async fn get_recording(&self, mbid: &str) -> DataResult { + let conn = self.conn.lock().unwrap(); + + let recording = conn.query_row( + "SELECT r.mbid, r.title, r.artist_mbid, r.duration_ms, a.name + FROM mb_recordings r + LEFT JOIN mb_artists a ON r.artist_mbid = a.mbid + WHERE r.mbid = ?1", + rusqlite::params![mbid], + |row| { + Ok(RecordingDetails { + mbid: row.get(0)?, + title: row.get(1)?, + artist_mbid: row.get(2)?, + duration_ms: row.get(3)?, + artist: row + .get::<_, Option>(4)? + .unwrap_or_else(|| "Unknown Artist".into()), + releases: vec![], + genres: vec![], + secondary_artists: vec![], + }) + }, + ); + + match recording { + Ok(mut r) => { + // Fetch releases that contain this recording + let mut stmt = conn + .prepare( + "SELECT DISTINCT rel.mbid, rel.title, rel.date + FROM mb_tracks t + JOIN mb_releases rel ON t.release_mbid = rel.mbid + WHERE t.recording_mbid = ?1 + LIMIT 10", + ) + .map_err(|e| DataError::Other(e.to_string()))?; + r.releases = stmt + .query_map(rusqlite::params![mbid], |row| { + Ok(ReleaseRef { + mbid: row.get(0)?, + title: row.get(1)?, + date: row.get(2)?, + track_number: None, + }) + }) + .map_err(|e| DataError::Other(e.to_string()))? + .filter_map(|r| r.ok()) + .collect(); + Ok(r) + } + Err(rusqlite::Error::QueryReturnedNoRows) => Err(DataError::Other(format!( + "recording {mbid} not found locally" + ))), + Err(e) => Err(DataError::Other(e.to_string())), + } + } + + async fn search_artist(&self, query: &str, limit: u32) -> DataResult> { + let conn = self.conn.lock().unwrap(); + let pattern = format!("%{query}%"); + let mut stmt = conn + .prepare( + "SELECT mbid, name, disambiguation, country, artist_type + FROM mb_artists + WHERE name LIKE ?1 COLLATE NOCASE + LIMIT ?2", + ) + .map_err(|e| DataError::Other(e.to_string()))?; + + let results: Vec = stmt + .query_map(rusqlite::params![pattern, limit], |row| { + Ok(ArtistSearchResult { + mbid: row.get(0)?, + name: row.get(1)?, + disambiguation: row.get::<_, Option>(2)?.filter(|s| !s.is_empty()), + country: row.get(3)?, + artist_type: row.get(4)?, + score: 100, + }) + }) + .map_err(|e| DataError::Other(e.to_string()))? + .filter_map(|r| r.ok()) + .collect(); + + Ok(results) + } + + async fn get_artist_releases( + &self, + artist_mbid: &str, + limit: u32, + ) -> DataResult> { + let conn = self.conn.lock().unwrap(); + let mut stmt = conn + .prepare( + "SELECT mbid, title, date, status + FROM mb_releases + WHERE artist_mbid = ?1 + LIMIT ?2", + ) + .map_err(|e| DataError::Other(e.to_string()))?; + + let results: Vec = stmt + .query_map(rusqlite::params![artist_mbid, limit], |row| { + Ok(DiscographyEntry { + mbid: row.get(0)?, + title: row.get(1)?, + date: row.get(2)?, + release_type: row.get(3)?, + track_count: None, + }) + }) + .map_err(|e| DataError::Other(e.to_string()))? + .filter_map(|r| r.ok()) + .collect(); + + Ok(results) + } + + async fn get_release_tracks(&self, release_mbid: &str) -> DataResult> { + let conn = self.conn.lock().unwrap(); + let mut stmt = conn + .prepare( + "SELECT recording_mbid, title, track_number, disc_number, duration_ms + FROM mb_tracks + WHERE release_mbid = ?1 + ORDER BY disc_number, track_number", + ) + .map_err(|e| DataError::Other(e.to_string()))?; + + let tracks: Vec = stmt + .query_map(rusqlite::params![release_mbid], |row| { + Ok(ReleaseTrack { + recording_mbid: row.get(0)?, + title: row.get(1)?, + track_number: row.get(2)?, + disc_number: row.get(3)?, + duration_ms: row.get(4)?, + }) + }) + .map_err(|e| DataError::Other(e.to_string()))? + .filter_map(|r| r.ok()) + .collect(); + + if tracks.is_empty() { + Err(DataError::Other(format!( + "no tracks found for release {release_mbid}" + ))) + } else { + Ok(tracks) + } + } + + async fn get_artist_release_groups( + &self, + artist_mbid: &str, + ) -> DataResult> { + let conn = self.conn.lock().unwrap(); + let mut stmt = conn + .prepare( + "SELECT rg.mbid, rg.title, rg.primary_type, rg.secondary_types, rg.first_release_date, + (SELECT r.mbid FROM mb_releases r WHERE r.release_group_mbid = rg.mbid LIMIT 1) as first_release_mbid + FROM mb_release_groups rg + WHERE rg.artist_mbid = ?1 + ORDER BY rg.first_release_date", + ) + .map_err(|e| DataError::Other(e.to_string()))?; + + let results: Vec = stmt + .query_map(rusqlite::params![artist_mbid], |row| { + let secondary_types_json: Option = row.get(3)?; + let secondary_types: Vec = secondary_types_json + .and_then(|s| serde_json::from_str(&s).ok()) + .unwrap_or_default(); + + Ok(ReleaseGroupEntry { + mbid: row.get(0)?, + title: row.get(1)?, + primary_type: row.get(2)?, + secondary_types, + first_release_date: row.get(4)?, + first_release_mbid: row.get(5)?, + }) + }) + .map_err(|e| DataError::Other(e.to_string()))? + .filter_map(|r| r.ok()) + .collect(); + + Ok(results) + } + + async fn resolve_release_from_group(&self, release_group_mbid: &str) -> DataResult { + let conn = self.conn.lock().unwrap(); + let result = conn.query_row( + "SELECT mbid FROM mb_releases WHERE release_group_mbid = ?1 LIMIT 1", + rusqlite::params![release_group_mbid], + |row| row.get::<_, String>(0), + ); + + match result { + Ok(mbid) => Ok(mbid), + Err(rusqlite::Error::QueryReturnedNoRows) => Err(DataError::Other(format!( + "no releases for release-group {release_group_mbid}" + ))), + Err(e) => Err(DataError::Other(e.to_string())), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::mb_import; + + fn setup_test_db() -> Connection { + let conn = Connection::open_in_memory().unwrap(); + mb_import::create_schema(&conn).unwrap(); + + // Insert test data + conn.execute( + "INSERT INTO mb_artists (mbid, name, sort_name, disambiguation, artist_type, country, begin_year) VALUES ('a-1', 'Test Artist', 'Artist, Test', 'test', 'Person', 'US', 1990)", + [], + ).unwrap(); + + conn.execute( + "INSERT INTO mb_artist_urls (artist_mbid, url, link_type) VALUES ('a-1', 'https://en.wikipedia.org/wiki/Test', 'wikipedia')", + [], + ).unwrap(); + + conn.execute( + "INSERT INTO mb_release_groups (mbid, title, artist_mbid, primary_type, secondary_types, first_release_date) VALUES ('rg-1', 'Test Album', 'a-1', 'Album', NULL, '2020-01-15')", + [], + ).unwrap(); + + conn.execute( + "INSERT INTO mb_releases (mbid, title, release_group_mbid, artist_mbid, date, country, status) VALUES ('r-1', 'Test Album', 'rg-1', 'a-1', '2020-01-15', 'US', 'Official')", + [], + ).unwrap(); + + conn.execute( + "INSERT INTO mb_tracks (release_mbid, recording_mbid, title, track_number, disc_number, duration_ms, position) VALUES ('r-1', 'rec-1', 'Track One', 1, 1, 240000, 1)", + [], + ).unwrap(); + + conn.execute( + "INSERT INTO mb_recordings (mbid, title, artist_mbid, duration_ms) VALUES ('rec-1', 'Track One', 'a-1', 240000)", + [], + ).unwrap(); + + // Insert import metadata + conn.execute( + "INSERT INTO mb_import_meta (key, value) VALUES ('artist_count', '1')", + [], + ) + .unwrap(); + + conn + } + + #[test] + fn test_get_artist_info_sync() { + let conn = setup_test_db(); + // We can't easily test the struct directly since it wraps a Mutex, + // but we can test the SQL works + let (name, disambig): (String, Option) = conn + .query_row( + "SELECT name, disambiguation FROM mb_artists WHERE mbid = 'a-1'", + [], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .unwrap(); + assert_eq!(name, "Test Artist"); + assert_eq!(disambig, Some("test".to_string())); + } + + #[test] + fn test_resolve_release_from_group() { + let conn = setup_test_db(); + let mbid: String = conn + .query_row( + "SELECT mbid FROM mb_releases WHERE release_group_mbid = 'rg-1' LIMIT 1", + [], + |row| row.get(0), + ) + .unwrap(); + assert_eq!(mbid, "r-1"); + } +} diff --git a/shanty-web b/shanty-web index 75f3b4f..3dba620 160000 --- a/shanty-web +++ b/shanty-web @@ -1 +1 @@ -Subproject commit 75f3b4f7045af8435d7cc35ecccfc61b2adcd5a7 +Subproject commit 3dba620c9b76f2881f3d90c7dd0cdee0626bc1da diff --git a/src/main.rs b/src/main.rs index ecba8b4..0125d7f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,13 @@ use actix_cors::Cors; use actix_session::{SessionMiddleware, storage::CookieSessionStore}; use actix_web::{App, HttpServer, cookie::Key, web}; -use clap::Parser; +use clap::{Parser, Subcommand}; use tracing_actix_web::TracingLogger; use tracing_subscriber::EnvFilter; use shanty_config::AppConfig; -use shanty_data::MusicBrainzFetcher; use shanty_data::WikipediaFetcher; +use shanty_data::{HybridMusicBrainzFetcher, LocalMusicBrainzFetcher, MusicBrainzFetcher}; use shanty_db::Database; use shanty_search::MusicBrainzSearch; @@ -29,6 +29,24 @@ struct Cli { /// Increase verbosity (-v info, -vv debug, -vvv trace). #[arg(short, long, action = clap::ArgAction::Count)] verbose: u8, + + #[command(subcommand)] + command: Option, +} + +#[derive(Subcommand)] +enum Commands { + /// Import MusicBrainz JSON data dumps into local SQLite database. + MbImport { + /// Download fresh dump files from metabrainz.org before importing. + #[arg(long)] + download: bool, + + /// Directory containing (or to download) dump files. + /// Defaults to the application data directory. + #[arg(long)] + data_dir: Option, + }, } #[actix_web::main] @@ -54,11 +72,36 @@ async fn main() -> anyhow::Result<()> { config.web.port = port; } + // Handle subcommands + if let Some(Commands::MbImport { download, data_dir }) = cli.command { + return run_mb_import(&config, download, data_dir.as_deref()).await; + } + tracing::info!(url = %config.database_url, "connecting to database"); let db = Database::new(&config.database_url).await?; - let mb_client = MusicBrainzFetcher::new()?; - let search = MusicBrainzSearch::with_limiter(mb_client.limiter())?; + let mb_remote = MusicBrainzFetcher::new()?; + let search = MusicBrainzSearch::with_limiter(mb_remote.limiter())?; + + // Set up local MB database if configured + let local_mb = create_local_mb_fetcher(&config); + let mb_client = HybridMusicBrainzFetcher::new(local_mb, mb_remote); + + if mb_client.has_local_db() + && let Some(stats) = mb_client.local_stats() + { + tracing::info!( + artists = stats.artists, + release_groups = stats.release_groups, + releases = stats.releases, + recordings = stats.recordings, + last_import = ?stats.last_import_date, + "local MusicBrainz database loaded" + ); + } else if !mb_client.has_local_db() { + tracing::info!("no local MusicBrainz database — using API only"); + } + let wiki_fetcher = WikipediaFetcher::new()?; let bind = format!("{}:{}", config.web.bind, config.web.port); @@ -88,6 +131,7 @@ async fn main() -> anyhow::Result<()> { // Start pipeline and monitor schedulers shanty_web::pipeline_scheduler::spawn(state.clone()); shanty_web::monitor::spawn(state.clone()); + shanty_web::mb_update::spawn(state.clone()); // Resolve static files directory let static_dir = std::env::current_exe() @@ -168,3 +212,97 @@ async fn main() -> anyhow::Result<()> { tracing::info!("server stopped"); Ok(()) } + +/// Create a LocalMusicBrainzFetcher from config if available. +fn create_local_mb_fetcher(config: &AppConfig) -> Option { + let db_path = config + .musicbrainz + .local_db_path + .as_ref() + .map(|p| p.to_string_lossy().to_string()) + .or_else(|| { + // Default location: data_dir/shanty-mb.db (only if it exists) + let default_path = shanty_config::data_dir().join("shanty-mb.db"); + if default_path.exists() { + Some(default_path.to_string_lossy().to_string()) + } else { + None + } + })?; + + match LocalMusicBrainzFetcher::new(&db_path) { + Ok(fetcher) => { + if fetcher.is_available() { + tracing::info!(path = %db_path, "opened local MusicBrainz database"); + Some(fetcher) + } else { + tracing::debug!(path = %db_path, "local MB database exists but has no data"); + None + } + } + Err(e) => { + tracing::warn!(path = %db_path, error = %e, "failed to open local MB database"); + None + } + } +} + +/// Run the `mb-import` subcommand. +async fn run_mb_import( + config: &AppConfig, + download: bool, + data_dir_override: Option<&str>, +) -> anyhow::Result<()> { + let data_dir = data_dir_override + .map(std::path::PathBuf::from) + .unwrap_or_else(|| shanty_config::data_dir().join("mb-dumps")); + + let db_path = config + .musicbrainz + .local_db_path + .clone() + .unwrap_or_else(|| shanty_config::data_dir().join("shanty-mb.db")); + + tracing::info!( + dump_dir = %data_dir.display(), + db_path = %db_path.display(), + download = download, + "starting MusicBrainz import" + ); + + // Download dumps if requested + if download { + std::fs::create_dir_all(&data_dir)?; + let timestamp = shanty_data::mb_import::discover_latest_dump_folder() + .await + .map_err(|e| anyhow::anyhow!("failed to discover latest dump: {e}"))?; + tracing::info!(timestamp = %timestamp, "using dump folder"); + for filename in shanty_data::mb_import::DUMP_FILES { + shanty_data::mb_import::download_dump(filename, ×tamp, &data_dir, |msg| { + tracing::info!("{msg}"); + }) + .await + .map_err(|e| anyhow::anyhow!("{e}"))?; + } + } + + // Ensure the data directory exists and has at least one dump file + if !data_dir.exists() { + anyhow::bail!( + "dump directory {} does not exist. Use --download to fetch dumps, or provide --data-dir pointing to existing dump files.", + data_dir.display() + ); + } + + // Run import (handles opening the database internally) + let stats = shanty_data::mb_import::run_import_at_path(&db_path, &data_dir, |msg| { + tracing::info!("{msg}"); + }) + .map_err(|e| anyhow::anyhow!("{e}"))?; + + tracing::info!(%stats, db_path = %db_path.display(), "MusicBrainz import complete"); + println!("{stats}"); + println!("Database: {}", db_path.display()); + + Ok(()) +}