Commit 8588e33a authored by GuanLuo's avatar GuanLuo Committed by GitHub
Browse files

feat: Add KV publisher and receiver. Add KV aware routing example.


Signed-off-by: default avatarNeelay Shah <neelays@nvidia.com>
Co-authored-by: default avataraflowers <aflowers@nvidia.com>
Co-authored-by: default avatarRyan McCormick <rmccormick@nvidia.com>
Co-authored-by: default avatarhongkuanz <hongkuanz@nvidia.com>
Co-authored-by: default avatarNeelay Shah <neelays@nvidia.com>
parent d8aada0b
...@@ -238,14 +238,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" ...@@ -238,14 +238,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f"
dependencies = [ dependencies = [
"async-trait", "async-trait",
"axum-core", "axum-core 0.4.5",
"bytes", "bytes",
"futures-util", "futures-util",
"http", "http",
"http-body", "http-body",
"http-body-util", "http-body-util",
"itoa", "itoa",
"matchit", "matchit 0.7.3",
"memchr", "memchr",
"mime", "mime",
"percent-encoding", "percent-encoding",
...@@ -258,6 +258,40 @@ dependencies = [ ...@@ -258,6 +258,40 @@ dependencies = [
"tower-service", "tower-service",
] ]
[[package]]
name = "axum"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
dependencies = [
"axum-core 0.5.0",
"bytes",
"form_urlencoded",
"futures-util",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-util",
"itoa",
"matchit 0.8.4",
"memchr",
"mime",
"percent-encoding",
"pin-project-lite",
"rustversion",
"serde",
"serde_json",
"serde_path_to_error",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tower 0.5.2",
"tower-layer",
"tower-service",
"tracing",
]
[[package]] [[package]]
name = "axum-core" name = "axum-core"
version = "0.4.5" version = "0.4.5"
...@@ -278,6 +312,26 @@ dependencies = [ ...@@ -278,6 +312,26 @@ dependencies = [
"tower-service", "tower-service",
] ]
[[package]]
name = "axum-core"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df1362f362fd16024ae199c1970ce98f9661bf5ef94b9808fee734bc3698b733"
dependencies = [
"bytes",
"futures-util",
"http",
"http-body",
"http-body-util",
"mime",
"pin-project-lite",
"rustversion",
"sync_wrapper",
"tower-layer",
"tower-service",
"tracing",
]
[[package]] [[package]]
name = "backtrace" name = "backtrace"
version = "0.3.74" version = "0.3.74"
...@@ -1533,6 +1587,12 @@ version = "0.7.3" ...@@ -1533,6 +1587,12 @@ version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
[[package]]
name = "matchit"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.7.4" version = "2.7.4"
...@@ -2468,6 +2528,16 @@ dependencies = [ ...@@ -2468,6 +2528,16 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "serde_path_to_error"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6"
dependencies = [
"itoa",
"serde",
]
[[package]] [[package]]
name = "serde_repr" name = "serde_repr"
version = "0.1.19" version = "0.1.19"
...@@ -2488,6 +2558,18 @@ dependencies = [ ...@@ -2488,6 +2558,18 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "serde_urlencoded"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
dependencies = [
"form_urlencoded",
"itoa",
"ryu",
"serde",
]
[[package]] [[package]]
name = "sha2" name = "sha2"
version = "0.10.8" version = "0.10.8"
...@@ -2895,7 +2977,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" ...@@ -2895,7 +2977,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
dependencies = [ dependencies = [
"async-stream", "async-stream",
"async-trait", "async-trait",
"axum", "axum 0.7.9",
"base64", "base64",
"bytes", "bytes",
"h2", "h2",
...@@ -2961,8 +3043,10 @@ dependencies = [ ...@@ -2961,8 +3043,10 @@ dependencies = [
"futures-util", "futures-util",
"pin-project-lite", "pin-project-lite",
"sync_wrapper", "sync_wrapper",
"tokio",
"tower-layer", "tower-layer",
"tower-service", "tower-service",
"tracing",
] ]
[[package]] [[package]]
...@@ -2983,6 +3067,7 @@ version = "0.1.41" ...@@ -2983,6 +3067,7 @@ version = "0.1.41"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
dependencies = [ dependencies = [
"log",
"pin-project-lite", "pin-project-lite",
"tracing-attributes", "tracing-attributes",
"tracing-core", "tracing-core",
...@@ -3096,6 +3181,36 @@ dependencies = [ ...@@ -3096,6 +3181,36 @@ dependencies = [
"xxhash-rust", "xxhash-rust",
] ]
[[package]]
name = "triton-llm"
version = "0.2.0"
dependencies = [
"anyhow",
"async-stream",
"async-trait",
"axum 0.8.1",
"bytes",
"chrono",
"derive_builder",
"either",
"futures",
"indexmap 2.7.1",
"prometheus",
"regex",
"serde",
"serde_json",
"thiserror 2.0.11",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"triton-distributed",
"unicode-segmentation",
"uuid",
"validator",
"xxhash-rust",
]
[[package]] [[package]]
name = "triton_distributed_py3" name = "triton_distributed_py3"
version = "0.2.0" version = "0.2.0"
...@@ -3111,7 +3226,9 @@ dependencies = [ ...@@ -3111,7 +3226,9 @@ dependencies = [
"tokio", "tokio",
"tokio-stream", "tokio-stream",
"tracing", "tracing",
"tracing-subscriber",
"triton-distributed", "triton-distributed",
"triton-llm",
] ]
[[package]] [[package]]
...@@ -3152,6 +3269,12 @@ version = "1.0.16" ...@@ -3152,6 +3269,12 @@ version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
[[package]]
name = "unicode-segmentation"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]] [[package]]
name = "unindent" name = "unindent"
version = "0.2.3" version = "0.2.3"
......
...@@ -30,7 +30,8 @@ crate-type = ["cdylib"] ...@@ -30,7 +30,8 @@ crate-type = ["cdylib"]
[dependencies] [dependencies]
triton-distributed = { path = "../" } triton-llm = { path = "../llm/rust/triton-llm" }
triton-distributed = { path = "../runtime/rust" }
futures = "0.3" futures = "0.3"
once_cell = "1.20.3" once_cell = "1.20.3"
...@@ -40,6 +41,7 @@ thiserror = "2.0" ...@@ -40,6 +41,7 @@ thiserror = "2.0"
tokio = { version = "1", features = ["full"] } tokio = { version = "1", features = ["full"] }
tokio-stream = "0" tokio-stream = "0"
tracing = "0" tracing = "0"
tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] }
# "extension-module" tells pyo3 we want to build an extension module (skips linking against libpython.so) # "extension-module" tells pyo3 we want to build an extension module (skips linking against libpython.so)
# "abi3-py39" tells pyo3 (and maturin) to build using the stable ABI with minimum Python version 3.9 # "abi3-py39" tells pyo3 (and maturin) to build using the stable ABI with minimum Python version 3.9
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment