diff --git a/Cargo.lock b/Cargo.lock index 91821e35..7df7802d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -220,17 +220,6 @@ dependencies = [ "syn", ] -[[package]] -name = "bindgen_cuda" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be55fb326843bb67cccceeeaf21c961ef303f60018f9a2ab69494dad8eaf9" -dependencies = [ - "glob", - "num_cpus", - "rayon", -] - [[package]] name = "bit-set" version = "0.5.3" @@ -310,67 +299,6 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" -[[package]] -name = "candle-core" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c15b675b80d994b2eadb20a4bbe434eabeb454eac3ee5e2b4cf6f147ee9be091" -dependencies = [ - "byteorder", - "candle-kernels", - "candle-ug", - "cudarc 0.19.4", - "float8 0.6.1", - "gemm 0.19.0", - "half", - "libm", - "memmap2", - "num-traits", - "num_cpus", - "rand 0.9.2", - "rand_distr 0.5.1", - "rayon", - "safetensors 0.7.0", - "thiserror 2.0.18", - "yoke 0.8.2", - "zip", -] - -[[package]] -name = "candle-kernels" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8455f84bd810047c7c41216683c1020c915a9f8a740b3b0eabdd4fb2fbaa660" -dependencies = [ - "bindgen_cuda", -] - -[[package]] -name = "candle-nn" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3045fa9e7aef8567d209a27d56b692f60b96f4d0569f4c3011f8ca6715c65e03" -dependencies = [ - "candle-core", - "half", - "libc", - "num-traits", - "rayon", - "safetensors 0.7.0", - "serde", - "thiserror 2.0.18", -] - -[[package]] -name = "candle-ug" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c22d62be69068bf58987a45f690612739d8d2ea1bf508c1b87dc6815a019575d" -dependencies = [ - "ug", - "ug-cuda", -] - [[package]] name = "cassowary" version = "0.3.0" @@ -668,23 +596,12 @@ dependencies = [ "hybrid-array", ] -[[package]] -name = "cudarc" -version = "0.17.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf99ab37ee7072d64d906aa2dada9a3422f1d975cdf8c8055a573bc84897ed8" -dependencies = [ - "half", - "libloading 0.8.9", -] - [[package]] name = "cudarc" version = "0.19.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f071cd6a7b5d51607df76aa2d426aaabc7a74bc6bdb885b8afa63a880572ad9b" dependencies = [ - "float8 0.7.0", "half", "libloading 0.9.0", ] @@ -871,22 +788,6 @@ dependencies = [ "syn", ] -[[package]] -name = "dyn-stack" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c4713e43e2886ba72b8271aa66c93d722116acf7a75555cce11dcde84388fe8" -dependencies = [ - "bytemuck", - "dyn-stack-macros", -] - -[[package]] -name = "dyn-stack-macros" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1d926b4d407d372f141f93bb444696142c29d32962ccbd3531117cf3aa0bfa9" - [[package]] name = "either" version = "1.15.0" @@ -908,18 +809,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "enum-as-inner" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "enumflags2" version = "0.7.12" @@ -1031,28 +920,6 @@ dependencies = [ "miniz_oxide", ] -[[package]] -name = "float8" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719a903cc23e4a89e87962c2a80fdb45cdaad0983a89bd150bb57b4c8571a7d5" -dependencies = [ - "cudarc 0.19.4", - "half", - "num-traits", - "rand 0.9.2", - "rand_distr 0.5.1", -] - -[[package]] -name = "float8" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2d1f04709a8ac06e8e8042875a3c466cc4832d3c1a18dbcb9dba3c6e83046bc" -dependencies = [ - "half", -] - [[package]] name = "fnv" version = "1.0.7" @@ -1183,244 +1050,6 @@ dependencies = [ "slab", ] -[[package]] -name = "gemm" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab96b703d31950f1aeddded248bc95543c9efc7ac9c4a21fda8703a83ee35451" -dependencies = [ - "dyn-stack", - "gemm-c32 0.18.2", - "gemm-c64 0.18.2", - "gemm-common 0.18.2", - "gemm-f16 0.18.2", - "gemm-f32 0.18.2", - "gemm-f64 0.18.2", - "num-complex", - "num-traits", - "paste", - "raw-cpuid", - "seq-macro", -] - -[[package]] -name = "gemm" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa0673db364b12263d103b68337a68fbecc541d6f6b61ba72fe438654709eacb" -dependencies = [ - "dyn-stack", - "gemm-c32 0.19.0", - "gemm-c64 0.19.0", - "gemm-common 0.19.0", - "gemm-f16 0.19.0", - "gemm-f32 0.19.0", - "gemm-f64 0.19.0", - "num-complex", - "num-traits", - "paste", - "raw-cpuid", - "seq-macro", -] - -[[package]] -name = "gemm-c32" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6db9fd9f40421d00eea9dd0770045a5603b8d684654816637732463f4073847" -dependencies = [ - "dyn-stack", - "gemm-common 0.18.2", - "num-complex", - "num-traits", - "paste", - "raw-cpuid", - "seq-macro", -] - -[[package]] -name = "gemm-c32" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "086936dbdcb99e37aad81d320f98f670e53c1e55a98bee70573e83f95beb128c" -dependencies = [ - "dyn-stack", - "gemm-common 0.19.0", - "num-complex", - "num-traits", - "paste", - "raw-cpuid", - "seq-macro", -] - -[[package]] -name = "gemm-c64" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfcad8a3d35a43758330b635d02edad980c1e143dc2f21e6fd25f9e4eada8edf" -dependencies = [ - "dyn-stack", - "gemm-common 0.18.2", - "num-complex", - "num-traits", - "paste", - "raw-cpuid", - "seq-macro", -] - -[[package]] -name = "gemm-c64" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20c8aeeeec425959bda4d9827664029ba1501a90a0d1e6228e48bef741db3a3f" -dependencies = [ - "dyn-stack", - "gemm-common 0.19.0", - "num-complex", - "num-traits", - "paste", - "raw-cpuid", - "seq-macro", -] - -[[package]] -name = "gemm-common" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a352d4a69cbe938b9e2a9cb7a3a63b7e72f9349174a2752a558a8a563510d0f3" -dependencies = [ - "bytemuck", - "dyn-stack", - "half", - "libm", - "num-complex", - "num-traits", - "once_cell", - "paste", - "pulp 0.21.5", - "raw-cpuid", - "rayon", - "seq-macro", - "sysctl", -] - -[[package]] -name = "gemm-common" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88027625910cc9b1085aaaa1c4bc46bb3a36aad323452b33c25b5e4e7c8e2a3e" -dependencies = [ - "bytemuck", - "dyn-stack", - "half", - "libm", - "num-complex", - "num-traits", - "once_cell", - "paste", - "pulp 0.22.2", - "raw-cpuid", - "rayon", - "seq-macro", - "sysctl", -] - -[[package]] -name = "gemm-f16" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cff95ae3259432f3c3410eaa919033cd03791d81cebd18018393dc147952e109" -dependencies = [ - "dyn-stack", - "gemm-common 0.18.2", - "gemm-f32 0.18.2", - "half", - "num-complex", - "num-traits", - "paste", - "raw-cpuid", - "rayon", - "seq-macro", -] - -[[package]] -name = "gemm-f16" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3df7a55202e6cd6739d82ae3399c8e0c7e1402859b30e4cb780e61525d9486e" -dependencies = [ - "dyn-stack", - "gemm-common 0.19.0", - "gemm-f32 0.19.0", - "half", - "num-complex", - "num-traits", - "paste", - "raw-cpuid", - "rayon", - "seq-macro", -] - -[[package]] -name = "gemm-f32" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc8d3d4385393304f407392f754cd2dc4b315d05063f62cf09f47b58de276864" -dependencies = [ - "dyn-stack", - "gemm-common 0.18.2", - "num-complex", - "num-traits", - "paste", - "raw-cpuid", - "seq-macro", -] - -[[package]] -name = "gemm-f32" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e0b8c9da1fbec6e3e3ab2ce6bc259ef18eb5f6f0d3e4edf54b75f9fd41a81c" -dependencies = [ - "dyn-stack", - "gemm-common 0.19.0", - "num-complex", - "num-traits", - "paste", - "raw-cpuid", - "seq-macro", -] - -[[package]] -name = "gemm-f64" -version = "0.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35b2a4f76ce4b8b16eadc11ccf2e083252d8237c1b589558a49b0183545015bd" -dependencies = [ - "dyn-stack", - "gemm-common 0.18.2", - "num-complex", - "num-traits", - "paste", - "raw-cpuid", - "seq-macro", -] - -[[package]] -name = "gemm-f64" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "056131e8f2a521bfab322f804ccd652520c79700d81209e9d9275bbdecaadc6a" -dependencies = [ - "dyn-stack", - "gemm-common 0.19.0", - "num-complex", - "num-traits", - "paste", - "raw-cpuid", - "seq-macro", -] - [[package]] name = "getopts" version = "0.2.24" @@ -1510,7 +1139,6 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ - "bytemuck", "cfg-if", "crunchy", "num-traits", @@ -1589,7 +1217,7 @@ dependencies = [ "reqwest", "serde", "serde_json", - "thiserror 2.0.18", + "thiserror", "tokio", "ureq", "windows-sys 0.60.2", @@ -1789,7 +1417,7 @@ dependencies = [ "displaydoc", "potential_utf", "utf8_iter", - "yoke 0.8.2", + "yoke", "zerofrom", "zerovec", ] @@ -1856,7 +1484,7 @@ dependencies = [ "displaydoc", "icu_locale_core", "writeable", - "yoke 0.8.2", + "yoke", "zerofrom", "zerotrie", "zerovec", @@ -2117,7 +1745,7 @@ dependencies = [ "encoding_rs", "enumflags2", "llama-cpp-sys-2", - "thiserror 2.0.18", + "thiserror", "tracing", "tracing-core", ] @@ -2265,16 +1893,6 @@ version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" -[[package]] -name = "memmap2" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" -dependencies = [ - "libc", - "stable_deref_trait", -] - [[package]] name = "mime" version = "0.3.17" @@ -2410,20 +2028,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - [[package]] name = "num-bigint" version = "0.4.6" @@ -2440,7 +2044,6 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" dependencies = [ - "bytemuck", "num-traits", ] @@ -2459,17 +2062,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - [[package]] name = "num-rational" version = "0.4.2" @@ -2813,43 +2405,6 @@ dependencies = [ "prost", ] -[[package]] -name = "pulp" -version = "0.21.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907" -dependencies = [ - "bytemuck", - "cfg-if", - "libm", - "num-complex", - "reborrow", - "version_check", -] - -[[package]] -name = "pulp" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e205bb30d5b916c55e584c22201771bcf2bad9aabd5d4127f38387140c38632" -dependencies = [ - "bytemuck", - "cfg-if", - "libm", - "num-complex", - "paste", - "pulp-wasm-simd-flag", - "raw-cpuid", - "reborrow", - "version_check", -] - -[[package]] -name = "pulp-wasm-simd-flag" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40e24eee682d89fb193496edf918a7f407d30175b2e785fe057e4392dfd182e0" - [[package]] name = "quote" version = "1.0.45" @@ -2971,15 +2526,6 @@ dependencies = [ "unicode-width 0.2.0", ] -[[package]] -name = "raw-cpuid" -version = "11.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" -dependencies = [ - "bitflags", -] - [[package]] name = "rawpointer" version = "0.2.1" @@ -3017,12 +2563,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "reborrow" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430" - [[package]] name = "redox_syscall" version = "0.5.18" @@ -3040,7 +2580,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ "getrandom 0.2.17", "libredox", - "thiserror 2.0.18", + "thiserror", ] [[package]] @@ -3280,16 +2820,6 @@ dependencies = [ "bytemuck", ] -[[package]] -name = "safetensors" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44560c11236a6130a46ce36c836a62936dc81ebf8c36a37947423571be0e55b6" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "safetensors" version = "0.7.0" @@ -3354,12 +2884,6 @@ version = "1.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" -[[package]] -name = "seq-macro" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" - [[package]] name = "serde" version = "1.0.228" @@ -3669,20 +3193,6 @@ dependencies = [ "syn", ] -[[package]] -name = "sysctl" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" -dependencies = [ - "bitflags", - "byteorder", - "enum-as-inner", - "libc", - "thiserror 1.0.69", - "walkdir", -] - [[package]] name = "system-configuration" version = "0.7.0" @@ -3723,33 +3233,13 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "thiserror" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" -dependencies = [ - "thiserror-impl 1.0.69", -] - [[package]] name = "thiserror" version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.18", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "thiserror-impl", ] [[package]] @@ -3864,7 +3354,7 @@ dependencies = [ "serde", "serde_json", "spm_precompiled", - "thiserror 2.0.18", + "thiserror", "unicode-normalization-alignments", "unicode-segmentation", "unicode_categories", @@ -4094,7 +3584,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "786d480bce6247ab75f005b14ae1624ad978d3029d9113f0a22fa1ac773faeaf" dependencies = [ "crossbeam-channel", - "thiserror 2.0.18", + "thiserror", "time", "tracing-subscriber", ] @@ -4168,52 +3658,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "typed-path" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e28f89b80c87b8fb0cf04ab448d5dd0dd0ade2f8891bae878de66a75a28600e" - [[package]] name = "typenum" version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" -[[package]] -name = "ug" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76b761acf8af3494640d826a8609e2265e19778fb43306c7f15379c78c9b05b0" -dependencies = [ - "gemm 0.18.2", - "half", - "libloading 0.8.9", - "memmap2", - "num", - "num-traits", - "num_cpus", - "rayon", - "safetensors 0.4.5", - "serde", - "thiserror 1.0.69", - "tracing", - "yoke 0.7.5", -] - -[[package]] -name = "ug-cuda" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f0a1fa748f26166778c33b8498255ebb7c6bffb472bcc0a72839e07ebb1d9b5" -dependencies = [ - "cudarc 0.17.8", - "half", - "serde", - "thiserror 1.0.69", - "ug", -] - [[package]] name = "unic-char-property" version = "0.9.0" @@ -4596,7 +4046,7 @@ dependencies = [ "sha2", "statrs", "tempfile", - "thiserror 2.0.18", + "thiserror", "tracing", "tracing-subscriber", "weaver-trace", @@ -4619,7 +4069,7 @@ dependencies = [ "serde_yaml", "similar", "tempfile", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-stream", "toml", @@ -4658,7 +4108,7 @@ dependencies = [ "sha2", "syn", "tempfile", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-stream", "tokio-util", @@ -4683,7 +4133,7 @@ dependencies = [ "serde_yaml", "sha2", "tempfile", - "thiserror 2.0.18", + "thiserror", "tokio", "tracing", "tracing-subscriber", @@ -4700,28 +4150,8 @@ dependencies = [ name = "weaver-embedding" version = "0.1.0" dependencies = [ - "anyhow", - "async-trait", - "candle-core", - "candle-nn", - "chrono", - "hyper-util", - "llama-cpp-2", - "prost", - "prost-types", - "serde", - "serde_json", - "tempfile", - "thiserror 2.0.18", - "tokenizers", - "tokio", - "tokio-stream", - "tonic", - "tonic-build", - "tower", - "tracing", "weaver-core", - "weaver-inference", + "weaver-spu", ] [[package]] @@ -4747,7 +4177,7 @@ dependencies = [ "serde_json", "serde_yaml", "tempfile", - "thiserror 2.0.18", + "thiserror", "tokio", "tokio-stream", "toml", @@ -4768,27 +4198,37 @@ name = "weaver-spu" version = "0.1.0" dependencies = [ "anyhow", + "async-trait", "base64 0.22.1", "bytemuck", "cc", + "chrono", "clap", - "cudarc 0.19.4", + "cudarc", "encoding_rs", "half", "hf-hub", + "hyper-util", "llama-cpp-2", "parking_lot", + "prost", + "prost-types", "reqwest", "rustc-hash 1.1.0", - "safetensors 0.7.0", + "safetensors", "serde", "serde_json", + "sha2", "tempfile", - "thiserror 2.0.18", + "thiserror", "tiktoken-rs", "tokenizers", "tokio", + "tokio-stream", "toml", + "tonic", + "tonic-build", + "tower", "tracing", "tracing-subscriber", "weaver-core", @@ -4804,7 +4244,7 @@ dependencies = [ "serde", "serde_json", "tempfile", - "thiserror 2.0.18", + "thiserror", "tokio", "toml", "tracing", @@ -5228,18 +4668,6 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" -[[package]] -name = "yoke" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" -dependencies = [ - "serde", - "stable_deref_trait", - "yoke-derive 0.7.5", - "zerofrom", -] - [[package]] name = "yoke" version = "0.8.2" @@ -5247,22 +4675,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" dependencies = [ "stable_deref_trait", - "yoke-derive 0.8.2", + "yoke-derive", "zerofrom", ] -[[package]] -name = "yoke-derive" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - [[package]] name = "yoke-derive" version = "0.8.2" @@ -5329,7 +4745,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" dependencies = [ "displaydoc", - "yoke 0.8.2", + "yoke", "zerofrom", ] @@ -5339,7 +4755,7 @@ version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" dependencies = [ - "yoke 0.8.2", + "yoke", "zerofrom", "zerovec-derive", ] @@ -5355,18 +4771,6 @@ dependencies = [ "syn", ] -[[package]] -name = "zip" -version = "7.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42e33efc22a0650c311c2ef19115ce232583abbe80850bc8b66509ebef02de0" -dependencies = [ - "crc32fast", - "indexmap", - "memchr", - "typed-path", -] - [[package]] name = "zmij" version = "1.0.21" diff --git a/crates/weaver-embedding/Cargo.toml b/crates/weaver-embedding/Cargo.toml index ca200ea3..094b1dbf 100644 --- a/crates/weaver-embedding/Cargo.toml +++ b/crates/weaver-embedding/Cargo.toml @@ -4,131 +4,21 @@ version.workspace = true edition.workspace = true rust-version.workspace = true license.workspace = true -description = "Embedding pipelines (chunking, prompt-prefix routing, pooling, late-chunking, gRPC client, native llama.cpp backend) for WeaverTools." - -# Crate skeleton landed in PR #248. Subsequent move PRs in -# Block A of the unified-SPU sprint pull existing pieces in: -# -# - `late_chunk.rs` from `weaver-database::chunking::late` (PR #249). -# - `gguf_backend.rs` from `weaver-inference::gguf_embed` (this PR). -# - `grpc_client.rs` from `weaver-database::persephone::embedding` (next). -# - `pin.rs` from `weaver-interface::embedder_pin` (next). -# - The `Embedder` trait + the new in-process Rust backend -# (`embedder_client.rs`) implementing it (Phase 1). +description = "Deprecated re-export shell — content folded into weaver-spu (encoder side) + weaver-core (Embedder trait) per PR-0.5.B + PR-0.5.D; removed in PR-0.5.E." [features] +# Forward feature flags to weaver-spu so consumers that opt into +# `gguf` (legacy GGUF encoder backend + Persephone gRPC client) via +# this crate still get the corresponding weaver-spu feature enabled. +# Removed in PR-0.5.E. default = [] -# Native llama.cpp GGUF embedding backend. Behind a feature flag -# so the gRPC-only consumer path (Python embedder during the -# migration window) doesn't pull `llama-cpp-2` and its CUDA -# build dependencies. Mirrors the same-named feature in -# `weaver-inference` — both crates have an independent `gguf` -# feature for their respective decoder / encoder use of llama.cpp. -gguf = ["dep:llama-cpp-2"] - -# candle-based encoder backend (Block C of the unified-SPU -# sprint, per `embedder-oxidization-Spec.md` §6). Pulls candle -# for safetensors loading + FP16 forward pass and the HF -# `tokenizers` crate. CPU-only by default; the operator opts into -# CUDA via the `candle-cuda` feature on top. Gated separately -# from `gguf` so the migration-window builds (gRPC-only -# consumers) don't pull candle. -candle = ["dep:candle-core", "dep:candle-nn", "dep:tokenizers"] -# CUDA acceleration for the candle backend. Implies `candle`. -# Pulls `nvcc` + `cudnn` build deps from candle-{core,nn}; the -# default `candle` feature stays CPU so workspace `cargo check` -# without CUDA toolchain still passes. -candle-cuda = ["candle", "candle-core/cuda", "candle-nn/cuda"] +gguf = ["weaver-spu/gguf"] [dependencies] -# `weaver-core` carries the `Embedder` trait + associated types -# post-PR-0.5.B. `grpc_client::EmbeddingClient`'s `Embedder` impl -# is in this crate (allowed by orphan rules: trait in weaver-core, -# concrete type in this crate). `embedder.rs` re-exports the trait -# during the transition window so existing consumers compile -# unchanged. +# Sole runtime deps: weaver-core (for the relocated `Embedder` trait +# + types) and weaver-spu (for the relocated late_chunk / pin / +# legacy gRPC + GGUF backends + proto module). +# `default-features = false` on weaver-spu to avoid feature leakage — +# this shell crate forwards features explicitly via the `gguf` flag. weaver-core = { workspace = true } - -# `tracing` and `anyhow` are unconditional — `late_chunk` is a -# pure-stdlib algorithm but we want module-level instrumentation -# hooks and ergonomic error returns available for any future -# addition without re-gating. The `gguf_backend` (feature-gated) -# uses `anyhow::Result` for its public surface today; future -# native-backend code in this crate will share the same error -# convention. -tracing = { workspace = true } -anyhow = { workspace = true } -thiserror = { workspace = true } - -# `async-trait` for the [`Embedder`] trait's `async fn` methods. -# Object-safe-via-`#[async_trait]` so consumers can hold -# `Arc`. -async-trait = { workspace = true } - -# `pin` (the cohort-identity lock-file module) needs serde for -# the `EmbedderPin` struct, serde_json for the JSON read/write -# path, and chrono for stamping `pinned_at` rfc3339. -serde = { workspace = true } -serde_json = { workspace = true } -chrono = { workspace = true } - -# `grpc_client` (the Persephone embedding gRPC client) — same -# transport stack `weaver-database`'s removed `persephone` -# module used. The Unix-socket transport uses `hyper-util` + -# `tower::service_fn` to wrap a UDS connection into a tonic -# `Channel`. `prost` carries the generated proto messages. -# `tokio` (UnixStream) handles the actual UDS connect. -tonic = { workspace = true } -prost = { workspace = true } -prost-types = { workspace = true } -hyper-util = { workspace = true } -tower = { workspace = true } -tokio = { workspace = true } - -# Native llama.cpp backend. Same version + features as -# `weaver-inference`'s `llama-cpp-2` declaration; both crates -# load `llama-cpp-2` independently but the singleton `LlamaBackend` -# they share is still process-global (per -# `embedder-oxidization-Spec.md` §4.4). -llama-cpp-2 = { version = "0.1.143", optional = true, default-features = false, features = ["cuda", "dynamic-link", "mtmd"] } - -# candle FP16 safetensors backend (gated behind the `candle` -# feature). `candle-core` carries Tensor / Device / DType; -# `candle-nn` carries the layer primitives (linear, layer-norm, -# embedding) used by the XLM-RoBERTa encoder + LoRA adapter -# wiring per `embedder-oxidization-Spec.md` §6.2 / §6.3. The -# `candle-cuda` feature on this crate enables `cuda` on both -# candle crates simultaneously; default `candle` stays CPU so a -# bare `cargo check --features candle` works without nvcc. -candle-core = { workspace = true, optional = true } -candle-nn = { workspace = true, optional = true } - -# HF `tokenizers` crate — production reads the same -# `tokenizer.json` the Python reference uses (Spec §7.1). -tokenizers = { workspace = true, optional = true } - -[build-dependencies] -# Generates `crate::proto::embedding::*` from the embedding.proto -# at workspace `proto/persephone/embedding/embedding.proto`. See -# build.rs for the rationale on duplicate generation alongside -# `weaver-database`'s build.rs during the migration window. -tonic-build = { workspace = true } - -[dev-dependencies] -# Tests in `gguf_backend.rs` load a real GGUF via -# `weaver-inference::gguf::{init_backend, load_model, -# GgufModelParams}` — those primitives belong to the model-runtime -# layer (`weaver-inference`), not the embedding-pipeline layer -# (this crate). The dev-dep keeps tests passing in their new home. -# Phase 1's `EmbedderClient` impl will make this a regular -# dependency. -weaver-inference = { workspace = true, features = ["gguf"] } - -# `pin` tests use `tempfile::TempDir` for hermetic file-system -# fixtures. -tempfile = { workspace = true } - -# `grpc_client` integration tests stand up a mock tonic server -# over a `TcpListener`-derived stream. `tokio-stream` provides -# the wrapper. -tokio-stream = { workspace = true } +weaver-spu = { workspace = true, default-features = false } diff --git a/crates/weaver-embedding/build.rs b/crates/weaver-embedding/build.rs deleted file mode 100644 index cd2b062c..00000000 --- a/crates/weaver-embedding/build.rs +++ /dev/null @@ -1,44 +0,0 @@ -//! Compile the Persephone embedding service's `.proto` definitions -//! for the `weaver-embedding` crate. -//! -//! Per `embedder-oxidization-Spec.md` §2-§5, embedding-service types -//! belong in `weaver-embedding`, not `weaver-database`. This -//! build script compiles the same `embedding.proto` source that -//! `weaver-database`'s build.rs also compiles — both crates generate -//! independent Rust types from the same wire format. Wire-compatible -//! by construction; the duplication is intentional during the -//! migration window so external proto consumers in -//! `weaver-database`'s test suite (e.g. `tests/proto_types.rs`) -//! continue to work without churn. -//! -//! A future cleanup PR (post-Phase-1) removes the embedding proto -//! generation from `weaver-database` entirely once all consumers -//! migrate to `weaver_embedding::proto::embedding::*`. - -use std::path::PathBuf; - -fn main() -> Result<(), Box> { - let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set"); - let proto_root = PathBuf::from(manifest_dir) - .join("../../proto") - .canonicalize() - .expect("proto/ directory not found — expected at workspace root"); - - let protos: Vec = [ - // Embedding only. weaver-database keeps generating common / - // extraction / training for its own consumers. - "persephone/embedding/embedding.proto", - ] - .iter() - .map(|p| proto_root.join(p)) - .collect(); - - tonic_build::configure() - .build_server(true) - .build_client(true) - .compile_protos(&protos, &[&proto_root])?; - - println!("cargo:rerun-if-changed={}", proto_root.display()); - - Ok(()) -} diff --git a/crates/weaver-embedding/src/embedder.rs b/crates/weaver-embedding/src/embedder.rs deleted file mode 100644 index 926e0f0d..00000000 --- a/crates/weaver-embedding/src/embedder.rs +++ /dev/null @@ -1,30 +0,0 @@ -//! Deprecated re-export — the `Embedder` trait + types relocated to -//! `weaver-core::embedder` per `docs/specs/weaver-spu-Spec.md` PR-0.5.B. -//! -//! New code should import from `weaver_core::embedder::*` directly. -//! This module remains during the transition window so existing -//! consumers compile unchanged. Removed in PR-0.5.E along with the -//! whole `weaver-embedding` crate (folded into `weaver-spu`). - -// `Embedder` is a trait — Rust has no stable trait-alias mechanism, so -// the only available form is `pub use` with a `#[deprecated]` attribute. -// Caveat: `#[deprecated]` on `pub use` does not reliably emit a warning -// at consumer call sites (see grpc_client.rs's note on this). Consumers -// migrating to `weaver_core::embedder::Embedder` should be informed via -// the spec + commit history rather than relying solely on rustc to -// flag it. Removed in PR-0.5.E. -#[deprecated( - since = "0.1.0", - note = "moved to weaver_core::embedder::Embedder; this re-export goes away in PR-0.5.E" -)] -pub use weaver_core::embedder::Embedder; - -// Type aliases (not `pub use`) so deprecation warnings fire reliably at -// consumer call sites. Same underlying type — any consumer that has -// already migrated to `weaver_core::embedder::EmbedderInfo` keeps -// working unchanged. Removed in PR-0.5.E. -#[deprecated( - since = "0.1.0", - note = "moved to weaver_core::embedder::EmbedderInfo; this alias goes away in PR-0.5.E" -)] -pub type EmbedderInfo = weaver_core::embedder::EmbedderInfo; diff --git a/crates/weaver-embedding/src/lib.rs b/crates/weaver-embedding/src/lib.rs index 1127a13c..9b80de17 100644 --- a/crates/weaver-embedding/src/lib.rs +++ b/crates/weaver-embedding/src/lib.rs @@ -1,107 +1,104 @@ -//! Embedding pipelines for WeaverTools. +//! Deprecated re-export shell — `weaver-embedding`'s encoder-side +//! modules folded into `weaver-spu` per `docs/specs/weaver-spu-Spec.md` +//! PR-0.5.D. //! -//! This crate is the home for everything between the -//! `weaver-inference` model-runtime layer and the consumers of -//! embedding vectors (`weaver-database` for substrate writes, -//! `weaver-core` for the surfacing engine and Notepad). Per the -//! ontology in issue #166 and `docs/specs/embedder-oxidization-Spec.md` -//! §2: +//! New code should import: +//! - `Embedder` trait + types: `weaver_core::embedder::*` +//! (relocated in PR-0.5.B). +//! - `late_chunk` + `LateChunkConfig` / `LateChunkResult` / +//! `late_chunk_embeddings`: `weaver_spu::encoder::*`. +//! - gRPC client (`EmbeddingClient`, `EmbeddingClientConfig`, +//! `EmbeddingEndpoint`): `weaver_spu::encoder::grpc_client_legacy` +//! (always available — production embedder backend during the +//! migration window; retires in PR-3.A). +//! - GGUF embedder backend: `weaver_spu::encoder::gguf_backend` +//! (gated by `gguf` feature; retires in PR-3.B). +//! - Cohort-pin guard: `weaver_spu::core::pin` (relocated in +//! PR-0.5.D). +//! - Persephone proto: `weaver_spu::proto::embedding::*` (always +//! available; retires in PR-3.A). //! -//! ```text -//! weaver-embedding ──→ weaver-core, weaver-inference -//! (uses LlamaModel + llama.cpp singleton) -//! ``` -//! -//! ## Status — skeleton only -//! -//! This is the first PR of the Phase 0 refactor (issue #166 / sprint -//! Block A per `docs/specs/unified-spu-sprint-sequence.md` §4). The -//! crate exists as a workspace member that builds clean; all -//! functional code lives in subsequent move PRs that target one -//! source file at a time. -//! -//! Move targets, in dependency order: -//! -//! - `late_chunk` ← `weaver-database::chunking::late` -//! - `gguf_backend` ← `weaver-inference::gguf_embed` -//! - `grpc_client` ← `weaver-database::persephone::embedding` -//! - `pin` ← `weaver-interface::embedder_pin` -//! - `embedder` (the `Embedder` trait + Rust in-process impl) — new code -//! -//! Once all moves land and the trait + impl are in place, the -//! Python embedder service migrates behind a feature flag per -//! Phase 1 of `embedder-oxidization-Spec.md` §10. +//! This crate stays in the workspace through the migration window so +//! existing consumers compile unchanged. Removed in PR-0.5.E along +//! with `weaver-inference` and the rest of the legacy crates. +// Module-scoped allow — this entire crate is a deprecated shell. +// Every re-export below points at a stable weaver-spu or weaver-core +// module. External consumers see deprecation warnings on their own +// `use` lines. +#![allow(deprecated)] #![doc(html_root_url = "https://docs.rs/weaver-embedding/0.1.0")] -#![deny(rust_2018_idioms)] -pub mod late_chunk; +// `Embedder` trait + types (relocated in PR-0.5.B). +#[deprecated( + since = "0.1.0", + note = "moved to weaver_core::embedder::Embedder; this re-export goes away in PR-0.5.E" +)] +pub use weaver_core::embedder::Embedder; -/// Persephone gRPC/protobuf definitions — embedding service. -/// -/// Generated by `build.rs` from the workspace -/// `proto/persephone/embedding/embedding.proto`. Consumers that -/// previously imported from `weaver_database::proto::embedding::*` -/// migrate to `weaver_embedding::proto::embedding::*` post-A.5. -/// `weaver-database` still generates its own copy of these types -/// during the migration window for backward-compat with -/// `weaver-database/tests/proto_types.rs`; a future cleanup PR -/// removes the duplication once that test migrates too. -pub mod proto { - /// Embedding service — vector embedding generation. - pub mod embedding { - tonic::include_proto!("persephone.embedding"); - } -} +#[deprecated( + since = "0.1.0", + note = "moved to weaver_core::embedder::EmbedderInfo; this alias goes away in PR-0.5.E" +)] +pub type EmbedderInfo = weaver_core::embedder::EmbedderInfo; -/// Typed gRPC client for the Persephone embedding service over -/// a Unix domain socket or TCP. Moved from -/// `weaver-database::persephone::embedding`. Phase 1's -/// `EmbedderClient` (the in-process Rust backend) joins this -/// module behind a shared `Embedder` trait per spec §3. -pub mod grpc_client; +// Late-chunking surface (relocated in PR-0.5.D). +#[deprecated( + since = "0.1.0", + note = "moved to weaver_spu::encoder::late_chunk::LateChunkConfig; this alias goes away in PR-0.5.E" +)] +pub type LateChunkConfig = weaver_spu::encoder::LateChunkConfig; -/// The backend-agnostic [`Embedder`] trait — implemented by the -/// gRPC client today and by the Rust in-process backend post-Phase 1. -/// Consumers (surfacing engine, Notepad, Pen, sleep stage A, -/// preseed materialization) hold `Arc` and stay -/// backend-agnostic. -pub mod embedder; +#[deprecated( + since = "0.1.0", + note = "moved to weaver_spu::encoder::late_chunk::LateChunkResult; this alias goes away in PR-0.5.E" +)] +pub type LateChunkResult = weaver_spu::encoder::LateChunkResult; -// `Embedder` and `EmbedderInfo` re-exports — the trait + types relocated -// to `weaver-core::embedder` per `docs/specs/weaver-spu-Spec.md` PR-0.5.B. -// New code should import from `weaver_core::embedder::*`. These re-exports -// keep existing consumers compiling during the migration; removed in -// PR-0.5.E with the rest of this crate. -// -// `#[allow(deprecated)]` is required here because `embedder::Embedder` -// and `embedder::EmbedderInfo` are themselves marked deprecated in the -// `embedder` module — re-exporting them at the crate root would trigger -// `-D warnings` without this. Narrower allow than a crate-level one; -// scoped to just this re-export line per the reviewer's "narrower -// allow" guidance. -#[allow(deprecated)] -pub use embedder::{Embedder, EmbedderInfo}; +#[deprecated( + since = "0.1.0", + note = "moved to weaver_spu::encoder::late_chunk::late_chunk_embeddings; consumers should call the function via its new path" +)] +pub use weaver_spu::encoder::late_chunk_embeddings; -// Native llama.cpp GGUF embedding backend. Behind the `gguf` -// cargo feature so consumers that only need the gRPC client -// path (Python embedder migration window) don't pull in -// `llama-cpp-2`. Phase 1's `EmbedderClient` impl wraps this -// module's `embed_pooled` against a `LlamaContext` constructed -// per `embedder-oxidization-Spec.md` §6. -#[cfg(feature = "gguf")] -pub mod gguf_backend; +#[deprecated( + since = "0.1.0", + note = "moved to weaver_spu::encoder::late_chunk; this re-export goes away in PR-0.5.E" +)] +pub use weaver_spu::encoder::late_chunk; + +// Cohort-pin guard (relocated in PR-0.5.D). +#[deprecated( + since = "0.1.0", + note = "moved to weaver_spu::core::pin; this re-export goes away in PR-0.5.E" +)] +pub use weaver_spu::core::pin; + +// gRPC client + proto (always-available; the gRPC client is the +// production embedder backend during the migration window). +// Retires in PR-3.A. +#[deprecated( + since = "0.1.0", + note = "moved to weaver_spu::encoder::grpc_client_legacy; this re-export goes away in PR-0.5.E" +)] +pub use weaver_spu::encoder::grpc_client_legacy as grpc_client; -// Cohort-identity pin — SG5-C harness-lifecycle lock file. Refuses -// to start the daemon if the live embedder identity has drifted -// from the recorded pin. Lives in `weaver-embedding` so it sits -// alongside the loader paths it guards (the cohort-pin guard is -// embedding-substrate concern, not a CLI concern). -pub mod pin; +#[deprecated( + since = "0.1.0", + note = "moved to weaver_spu::proto; this re-export goes away in PR-0.5.E" +)] +pub use weaver_spu::proto; + +// GGUF backend (feature-gated; legacy in-process llama.cpp embedder +// path; retires in PR-3.B). +#[cfg(feature = "gguf")] +#[deprecated( + since = "0.1.0", + note = "moved to weaver_spu::encoder::gguf_backend; this re-export goes away in PR-0.5.E" +)] +pub use weaver_spu::encoder::gguf_backend; -// Re-export the late-chunking surface at the crate root so -// consumers can write `use weaver_embedding::{LateChunkConfig, -// LateChunkResult, late_chunk_embeddings};` — matches the shape -// the (now-removed) `weaver_database::chunking` re-exports -// presented at the previous home. -pub use late_chunk::{LateChunkConfig, LateChunkResult, late_chunk_embeddings}; +// Convenience top-level re-exports preserving the prior shape: +// `use weaver_embedding::{LateChunkConfig, LateChunkResult, +// late_chunk_embeddings};` continues to compile, but each name is +// individually deprecated above so consumers see warnings. diff --git a/crates/weaver-spu/Cargo.toml b/crates/weaver-spu/Cargo.toml index 34a5dcd2..304118cd 100644 --- a/crates/weaver-spu/Cargo.toml +++ b/crates/weaver-spu/Cargo.toml @@ -44,6 +44,22 @@ llama-cpp-legacy = ["gguf"] # / GPU primitives the legacy decoder backend depends on. weaver-core = { workspace = true } +# Encoder-side gRPC stack (folded in from weaver-embedding in +# PR-0.5.D). Used by `encoder::grpc_client_legacy` to talk to the +# Python `weaver-embedder.service` during the migration window. +# Always compiled — `grpc_client_legacy` is the **production +# embedder backend** today, not a feature-gated path. Retires in +# PR-3.A alongside the gRPC client. +tonic = { workspace = true } +prost = { workspace = true } +prost-types = { workspace = true } +hyper-util = { workspace = true } +tower = { workspace = true } +async-trait = { workspace = true } +serde_json = { workspace = true } +chrono = { workspace = true } +sha2 = { workspace = true } + # HuggingFace ecosystem (decoder side: download, safetensors, tokenizer) hf-hub = "0.4" safetensors = { workspace = true } @@ -70,7 +86,7 @@ reqwest = { workspace = true } # Serialization serde = { workspace = true } -serde_json = { workspace = true } +# serde_json declared in the encoder gRPC stack block above. # Logging tracing = { workspace = true } @@ -97,9 +113,17 @@ tracing-subscriber = { workspace = true } # `weaver-inference` had a `cc` build-dep for compiling its custom # CUDA kernels via build.rs. Folds in unchanged via PR-0.5.C. cc = "1" +# `weaver-embedding` had a `tonic-build` build-dep for compiling the +# Persephone embedding-service proto. Folded in via PR-0.5.D; retires +# alongside the gRPC client in PR-3.A. +tonic-build = { workspace = true } [dev-dependencies] tempfile = { workspace = true } +# `tokio-stream` for the legacy grpc_client integration tests folded +# in from weaver-embedding in PR-0.5.D (the test spins up an +# in-process tonic server backed by a TcpListener-derived stream). +tokio-stream = { workspace = true } [[bin]] name = "weaver-infer" diff --git a/crates/weaver-spu/build.rs b/crates/weaver-spu/build.rs index f51369bc..da55046c 100644 --- a/crates/weaver-spu/build.rs +++ b/crates/weaver-spu/build.rs @@ -1,8 +1,32 @@ -fn main() { - // Only compile CUDA kernels when the cuda feature is enabled +//! `weaver-spu` build script — combines: +//! +//! 1. The legacy CUDA-kernel compile step (folded in from +//! `weaver-inference` in PR-0.5.C; gated by `cuda` feature, since +//! the kernels only get linked when the cudarc-backed decoder +//! path is in scope). +//! 2. The Persephone embedding-service proto compile step (folded +//! in from `weaver-embedding` in PR-0.5.D; **always compiled** +//! because `encoder::grpc_client_legacy` — the production +//! embedder backend during the migration window — depends on +//! the generated proto types unconditionally). +//! +//! The CUDA step is skipped when `cuda` is not set. The proto step +//! always runs. Default `cargo build` (no features) does the proto +//! compile only. + +use std::path::PathBuf; + +fn main() -> Result<(), Box> { if std::env::var("CARGO_FEATURE_CUDA").is_ok() { compile_cuda_kernels(); } + // Persephone proto is always compiled — `encoder::grpc_client_legacy` + // is the production embedder backend during the migration window + // (talks to the Python `weaver-embedder.service`), not a + // feature-gated path. Retires alongside the gRPC client in + // PR-3.A. + compile_persephone_proto()?; + Ok(()) } fn compile_cuda_kernels() { @@ -38,3 +62,35 @@ fn compile_cuda_kernels() { println!("cargo:rustc-link-lib=dylib=cudart"); println!("cargo:rustc-link-lib=dylib=cublas"); } + +fn compile_persephone_proto() -> Result<(), Box> { + let manifest_dir = std::env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR not set"); + let proto_root = PathBuf::from(manifest_dir) + .join("../../proto") + .canonicalize() + .expect("proto/ directory not found — expected at workspace root"); + + // Embedding-only. `weaver-database` keeps generating common / + // extraction / training for its own consumers; this proto step + // retires alongside the gRPC client in PR-3.A. + let protos: Vec = ["persephone/embedding/embedding.proto"] + .iter() + .map(|p| proto_root.join(p)) + .collect(); + + tonic_build::configure() + .build_server(true) + .build_client(true) + .compile_protos(&protos, &[&proto_root])?; + + // Per-proto rerun-if-changed so editing any individual `.proto` + // forces a rebuild — `proto_root` alone catches new files added + // to the directory but not edits to existing ones reliably + // across cargo versions. + for proto in &protos { + println!("cargo:rerun-if-changed={}", proto.display()); + } + println!("cargo:rerun-if-changed={}", proto_root.display()); + + Ok(()) +} diff --git a/crates/weaver-spu/src/core/mod.rs b/crates/weaver-spu/src/core/mod.rs index 49d915ea..6b7633ea 100644 --- a/crates/weaver-spu/src/core/mod.rs +++ b/crates/weaver-spu/src/core/mod.rs @@ -24,6 +24,7 @@ pub mod gpu_orchestrator; pub mod nvlink; +pub mod pin; pub mod probe; pub mod vram; diff --git a/crates/weaver-embedding/src/pin.rs b/crates/weaver-spu/src/core/pin.rs similarity index 100% rename from crates/weaver-embedding/src/pin.rs rename to crates/weaver-spu/src/core/pin.rs diff --git a/crates/weaver-embedding/src/gguf_backend.rs b/crates/weaver-spu/src/encoder/gguf_backend.rs similarity index 100% rename from crates/weaver-embedding/src/gguf_backend.rs rename to crates/weaver-spu/src/encoder/gguf_backend.rs diff --git a/crates/weaver-embedding/src/grpc_client.rs b/crates/weaver-spu/src/encoder/grpc_client_legacy.rs similarity index 98% rename from crates/weaver-embedding/src/grpc_client.rs rename to crates/weaver-spu/src/encoder/grpc_client_legacy.rs index 0d2cc722..0ccb6723 100644 --- a/crates/weaver-embedding/src/grpc_client.rs +++ b/crates/weaver-spu/src/encoder/grpc_client_legacy.rs @@ -511,11 +511,11 @@ impl std::fmt::Debug for EmbeddingClient { // in-process impl post-Phase 1 cutover, with no consumer-side changes. // // The `info()` impl converts the proto `InfoResponse` to the -// backend-agnostic [`crate::embedder::EmbedderInfo`] so the trait +// backend-agnostic [`weaver_core::embedder::EmbedderInfo`] so the trait // surface stays clean of proto types. #[async_trait::async_trait] -impl crate::embedder::Embedder for EmbeddingClient { +impl weaver_core::embedder::Embedder for EmbeddingClient { async fn embed( &self, texts: &[String], @@ -535,9 +535,9 @@ impl crate::embedder::Embedder for EmbeddingClient { EmbeddingClient::embed_late_chunked(self, text, task).await } - async fn info(&self) -> Result { + async fn info(&self) -> Result { let resp = EmbeddingClient::info(self).await?; - Ok(crate::embedder::EmbedderInfo { + Ok(weaver_core::embedder::EmbedderInfo { model_name: resp.model_name, model_loaded: resp.model_loaded, dimension: resp.dimension, diff --git a/crates/weaver-embedding/src/late_chunk.rs b/crates/weaver-spu/src/encoder/late_chunk.rs similarity index 100% rename from crates/weaver-embedding/src/late_chunk.rs rename to crates/weaver-spu/src/encoder/late_chunk.rs diff --git a/crates/weaver-spu/src/encoder/mod.rs b/crates/weaver-spu/src/encoder/mod.rs index db5770e7..303287f2 100644 --- a/crates/weaver-spu/src/encoder/mod.rs +++ b/crates/weaver-spu/src/encoder/mod.rs @@ -1,32 +1,59 @@ //! Encoder half of the SPU — Jina V4 embedder runtime. //! -//! Per `docs/specs/weaver-spu-Spec.md` §2.3 + §3, this module hosts: +//! Per `docs/specs/weaver-spu-Spec.md` §2.3 + §3. PR-0.5.D folded +//! in the existing `weaver-embedding` modules: //! -//! - `lora_dispatch` — `MultiAdapterLinear` analogue, a -//! PEFT-compatible multi-adapter LoRA dispatcher. Holds 3 -//! task-keyed `(lora_A, lora_B)` pairs simultaneously and -//! dispatches per-call on `task_label`. Phase 1 PR-1.C. -//! Companion upstream contribution to `candle-nn` / -//! `candle-lora`. -//! - `pooling` — attention-mask weighted mean + L2 norm. Produces -//! the 2048-dim `single_vec_emb` from per-token hidden states. -//! Phase 1 PR-1.D. -//! - `matryoshka` — truncate-and-renormalize for matryoshka -//! dimensions `{128, 256, 512, 1024}`. Phase 1 PR-1.D. -//! - `late_chunk` — late-chunking algorithm. Folds in from -//! `weaver-embedding::late_chunk` during PR-0.5.D (algorithm -//! unchanged; just relocates). -//! - `jina_v4` — Jina V4 wrapper that loads base shards + adapter -//! shard, attaches `MultiAdapterLinear` wrappers per §3.2, -//! implements the 4-string API contract via internal 4→3 -//! adapter mapping + prefix-prepending. Phase 1 PR-1.D. -//! - `client` — `EmbedderClient`, the in-process -//! `weaver_core::embedder::Embedder` impl backed by candle. -//! Phase 1 PR-1.F. -//! - `grpc_client_legacy` — legacy gRPC client to the Python -//! `weaver-embedder.service` (rollback target during the -//! migration window). Folds in from `weaver-embedding::grpc_client` -//! during PR-0.5.D; deleted in Phase 3 PR-3.A. +//! - `late_chunk` — late-chunking algorithm (mean-pool token +//! embeddings into per-chunk vectors). Algorithm unchanged from +//! the prior `weaver-embedding::late_chunk`. +//! - `grpc_client_legacy` — gRPC client to the Python +//! `weaver-embedder.service`. **Production embedder backend +//! during the migration window**, always compiled (not +//! feature-gated). Renamed during the move (was +//! `weaver-embedding::grpc_client`) to make its transitional +//! status explicit. Deleted in Phase 3 PR-3.A. +//! - `gguf_backend` (feature-gated `gguf`) — legacy llama.cpp-based +//! in-process GGUF embedder backend (an experimental path; not +//! the production embedder). Pulls llama-cpp-2 only when the +//! feature is enabled. Deleted in Phase 3 PR-3.B. //! -//! Skeleton only — sub-modules land in their respective Phase 0.5 / -//! Phase 1 PRs. +//! ## Planned modules (Phase 1 — not yet present) +//! +//! Phase 1 PRs (PR-1.B onwards per +//! `docs/specs/weaver-spu-Spec.md` §10) add the candle-backed +//! encoder modules listed below. These are **not importable +//! today** — they're documented here so the eventual code shape +//! is visible in the rustdoc, and so that PR reviewers can spot +//! drift between the planned API surface and what lands. +//! +//! - `lora_dispatch` — `MultiAdapterLinear` analogue (planned; +//! PR-1.C, companion upstream contribution). +//! - `pooling` — attention-mask weighted mean + L2 norm (planned; +//! PR-1.D). +//! - `matryoshka` — truncate-and-renormalize (planned; PR-1.D). +//! - `jina_v4` — Jina V4 wrapper (planned; PR-1.D). +//! - `client` — `EmbedderClient` (planned; PR-1.F). + +pub mod late_chunk; + +// `grpc_client_legacy` is NOT feature-gated: it's the production +// embedder backend during the migration window (talks to the Python +// `weaver-embedder.service` over Unix socket gRPC). Always-available +// because the daemon needs an embedder regardless of features. +// Retires in PR-3.A. +pub mod grpc_client_legacy; + +// `gguf_backend` is feature-gated because it pulls llama-cpp-2 (a +// substantial transitive build cost). Used only by experimental +// in-process GGUF embedder paths; not the production path. Retires +// in PR-3.B. +#[cfg(feature = "gguf")] +pub mod gguf_backend; + +// Re-export of the late-chunking surface that the prior +// `weaver_embedding::lib` re-exported at the crate root. Consumers +// that did `use weaver_embedding::{LateChunkConfig, LateChunkResult, +// late_chunk_embeddings};` continue to work via the deprecated +// re-export shell in `weaver-embedding`; this re-export keeps the +// shape consistent within `weaver-spu` itself. +pub use late_chunk::{LateChunkConfig, LateChunkResult, late_chunk_embeddings}; diff --git a/crates/weaver-spu/src/lib.rs b/crates/weaver-spu/src/lib.rs index 706e60fb..8f99bb84 100644 --- a/crates/weaver-spu/src/lib.rs +++ b/crates/weaver-spu/src/lib.rs @@ -53,3 +53,18 @@ pub mod core; pub mod decoder; pub mod encoder; pub mod models; + +/// Persephone gRPC/protobuf definitions — embedding service. +/// +/// Generated by `build.rs` from the workspace +/// `proto/persephone/embedding/embedding.proto`. Folded in from +/// `weaver-embedding::proto` via PR-0.5.D. Always available (the +/// gRPC client to the Python embedder is the production backend +/// during the migration window, not a feature). Removed in Phase 3 +/// PR-3.A alongside the gRPC client itself. +pub mod proto { + /// Embedding service — vector embedding generation. + pub mod embedding { + tonic::include_proto!("persephone.embedding"); + } +} diff --git a/crates/weaver-embedding/tests/grpc_client.rs b/crates/weaver-spu/tests/grpc_client_legacy.rs similarity index 96% rename from crates/weaver-embedding/tests/grpc_client.rs rename to crates/weaver-spu/tests/grpc_client_legacy.rs index 969e900d..7d596597 100644 --- a/crates/weaver-embedding/tests/grpc_client.rs +++ b/crates/weaver-spu/tests/grpc_client_legacy.rs @@ -8,7 +8,7 @@ //! surface (Embed, EmbedLateChunked, Info, ensure_ready, GracefulShutdown). // Module-scoped `allow(deprecated)` — these tests reference the -// `weaver_embedding::grpc_client::{EmbedResult, EmbeddingError, ...}` +// `weaver_spu::encoder::grpc_client_legacy::{EmbedResult, EmbeddingError, ...}` // type aliases that were marked `#[deprecated]` in PR-0.5.B (they now // alias `weaver_core::embedder::*`). The tests stay on the legacy path // because they're testing the legacy gRPC client; the whole crate @@ -18,7 +18,7 @@ use std::path::PathBuf; -use weaver_embedding::grpc_client::{EmbedResult, EmbeddingClientConfig, EmbeddingEndpoint}; +use weaver_spu::encoder::grpc_client_legacy::{EmbedResult, EmbeddingClientConfig, EmbeddingEndpoint}; #[test] fn test_config_defaults() { @@ -58,7 +58,7 @@ fn test_endpoint_variants() { #[tokio::test] async fn test_connect_nonexistent_socket_fails() { - use weaver_embedding::grpc_client::EmbeddingClient; + use weaver_spu::encoder::grpc_client_legacy::EmbeddingClient; let dir = tempfile::tempdir().expect("failed to create tempdir"); let socket_path = dir.path().join("nonexistent.sock"); @@ -100,10 +100,10 @@ mod mock_server { use tokio_stream::wrappers::TcpListenerStream; use tonic::{Request, Response, Status, transport::Server}; - use weaver_embedding::proto::embedding::embedding_service_server::{ + use weaver_spu::proto::embedding::embedding_service_server::{ EmbeddingService, EmbeddingServiceServer, }; - use weaver_embedding::proto::embedding::{ + use weaver_spu::proto::embedding::{ EmbedLateChunkedRequest, EmbedLateChunkedResponse, EmbedRequest, EmbedResponse, Embedding, GracefulShutdownRequest, GracefulShutdownResponse, InfoRequest, InfoResponse, LateChunk, }; @@ -308,8 +308,8 @@ mod mock_server { /// Shared helper: build a client against the mock server's TCP URL. Every /// integration test needs the same `EmbeddingClientConfig { endpoint: Tcp(…), /// ..Default::default() }` dance, so centralize it here. -async fn connect_tcp_client(url: String) -> weaver_embedding::grpc_client::EmbeddingClient { - use weaver_embedding::grpc_client::{ +async fn connect_tcp_client(url: String) -> weaver_spu::encoder::grpc_client_legacy::EmbeddingClient { + use weaver_spu::encoder::grpc_client_legacy::{ EmbeddingClient, EmbeddingClientConfig, EmbeddingEndpoint, }; EmbeddingClient::connect(EmbeddingClientConfig { @@ -431,7 +431,7 @@ async fn test_embed_late_chunked_roundtrip() { async fn test_embed_late_chunked_rejects_zero_dimension_with_values() { use mock_server::{MockService, spawn}; use std::sync::{Arc, atomic::AtomicU32}; - use weaver_embedding::grpc_client::EmbeddingError; + use weaver_spu::encoder::grpc_client_legacy::EmbeddingError; let srv = spawn(MockService { model_loaded: true, @@ -481,7 +481,7 @@ async fn test_ensure_ready_ok_when_loaded() { async fn test_ensure_ready_rejects_unloaded_model() { use mock_server::{MockService, spawn}; use std::sync::{Arc, atomic::AtomicU32}; - use weaver_embedding::grpc_client::EmbeddingError; + use weaver_spu::encoder::grpc_client_legacy::EmbeddingError; let srv = spawn(MockService { model_loaded: false, @@ -550,7 +550,7 @@ async fn test_ensure_ready_maps_unavailable_status_to_not_available() { use mock_server::{MockService, spawn}; use std::sync::{Arc, atomic::AtomicU32}; use tonic::Status; - use weaver_embedding::grpc_client::EmbeddingError; + use weaver_spu::encoder::grpc_client_legacy::EmbeddingError; let srv = spawn(MockService { model_loaded: true, @@ -580,7 +580,7 @@ async fn test_ensure_ready_maps_deadline_exceeded_status_to_not_available() { use mock_server::{MockService, spawn}; use std::sync::{Arc, atomic::AtomicU32}; use tonic::Status; - use weaver_embedding::grpc_client::EmbeddingError; + use weaver_spu::encoder::grpc_client_legacy::EmbeddingError; let srv = spawn(MockService { model_loaded: true, @@ -605,7 +605,7 @@ async fn test_ensure_ready_maps_failed_precondition_status_to_not_available() { use mock_server::{MockService, spawn}; use std::sync::{Arc, atomic::AtomicU32}; use tonic::Status; - use weaver_embedding::grpc_client::EmbeddingError; + use weaver_spu::encoder::grpc_client_legacy::EmbeddingError; let srv = spawn(MockService { model_loaded: true, @@ -634,7 +634,7 @@ async fn test_ensure_ready_propagates_other_status_codes_as_transport() { use mock_server::{MockService, spawn}; use std::sync::{Arc, atomic::AtomicU32}; use tonic::Status; - use weaver_embedding::grpc_client::EmbeddingError; + use weaver_spu::encoder::grpc_client_legacy::EmbeddingError; let srv = spawn(MockService { model_loaded: true, @@ -663,7 +663,7 @@ async fn test_ensure_ready_maps_dropped_server_to_not_available() { // `ensure_ready`'s match. use mock_server::{MockService, spawn}; use std::sync::{Arc, atomic::AtomicU32}; - use weaver_embedding::grpc_client::EmbeddingError; + use weaver_spu::encoder::grpc_client_legacy::EmbeddingError; let srv = spawn(MockService { model_loaded: true,