Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
1af7433b
Commit
1af7433b
authored
Mar 05, 2025
by
Neelay Shah
Committed by
GitHub
Mar 05, 2025
Browse files
refactor: rename triton_distributed to dynemo (#22)
Co-authored-by:
Graham King
<
grahamk@nvidia.com
>
parent
ee4ef06b
Changes
165
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
242 additions
and
246 deletions
+242
-246
examples/python_rs/llm/vllm_nixl/worker.py
examples/python_rs/llm/vllm_nixl/worker.py
+3
-7
examples/rust/Cargo.lock
examples/rust/Cargo.lock
+99
-99
examples/rust/Cargo.toml
examples/rust/Cargo.toml
+4
-4
examples/rust/hello_world/Cargo.toml
examples/rust/hello_world/Cargo.toml
+1
-1
examples/rust/hello_world/src/bin/client.rs
examples/rust/hello_world/src/bin/client.rs
+2
-2
examples/rust/hello_world/src/bin/server.rs
examples/rust/hello_world/src/bin/server.rs
+3
-3
examples/rust/hello_world/src/lib.rs
examples/rust/hello_world/src/lib.rs
+1
-1
examples/rust/http/Cargo.toml
examples/rust/http/Cargo.toml
+2
-2
examples/rust/http/src/main.rs
examples/rust/http/src/main.rs
+2
-2
examples/rust/llmctl/Cargo.toml
examples/rust/llmctl/Cargo.toml
+2
-2
examples/rust/llmctl/src/main.rs
examples/rust/llmctl/src/main.rs
+2
-2
examples/rust/service_metrics/Cargo.toml
examples/rust/service_metrics/Cargo.toml
+2
-2
examples/rust/service_metrics/README.md
examples/rust/service_metrics/README.md
+3
-3
examples/rust/service_metrics/src/bin/client.rs
examples/rust/service_metrics/src/bin/client.rs
+1
-1
examples/rust/service_metrics/src/bin/server.rs
examples/rust/service_metrics/src/bin/server.rs
+2
-2
examples/rust/service_metrics/src/lib.rs
examples/rust/service_metrics/src/lib.rs
+1
-1
launch/dynemo-run/Cargo.lock
launch/dynemo-run/Cargo.lock
+98
-98
launch/dynemo-run/Cargo.toml
launch/dynemo-run/Cargo.toml
+9
-9
launch/dynemo-run/src/input/endpoint.rs
launch/dynemo-run/src/input/endpoint.rs
+3
-3
launch/dynemo-run/src/input/http.rs
launch/dynemo-run/src/input/http.rs
+2
-2
No files found.
examples/python_rs/llm/vllm_nixl/worker.py
View file @
1af7433b
...
...
@@ -33,11 +33,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from
vllm.entrypoints.openai.serving_models
import
BaseModelPath
,
OpenAIServingModels
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
triton_distributed.runtime
import
(
DistributedRuntime
,
triton_endpoint
,
triton_worker
,
)
from
dynemo.runtime
import
DistributedRuntime
,
dynemo_endpoint
,
dynemo_worker
class
RequestHandler
:
...
...
@@ -87,7 +83,7 @@ class RequestHandler:
return
callback
@
triton
_endpoint
(
ChatCompletionRequest
,
ChatCompletionStreamResponse
)
@
dynemo
_endpoint
(
ChatCompletionRequest
,
ChatCompletionStreamResponse
)
async
def
generate
(
self
,
request
):
if
not
self
.
initialized
:
await
self
.
init
()
...
...
@@ -113,7 +109,7 @@ class RequestHandler:
yield
response
@
triton
_worker
()
@
dynemo
_worker
()
async
def
worker
(
runtime
:
DistributedRuntime
,
engine_args
:
AsyncEngineArgs
):
component
=
runtime
.
namespace
(
"test-nixl"
).
component
(
"vllm"
)
await
component
.
create_service
()
...
...
examples/rust/Cargo.lock
View file @
1af7433b
...
...
@@ -955,6 +955,99 @@ dependencies = [
"syn 2.0.98",
]
[[package]]
name = "dynemo-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"axum 0.8.1",
"bindgen",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"dynemo-runtime",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"minijinja",
"minijinja-contrib",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie",
"toktrie_hf_tokenizers",
"tracing",
"unicode-segmentation",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "dynemo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "ed25519"
version = "2.2.3"
...
...
@@ -1370,7 +1463,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
name = "hello_world"
version = "0.2.0"
dependencies = [
"
triton-distributed
-runtime",
"
dynemo
-runtime",
]
[[package]]
...
...
@@ -1395,11 +1488,11 @@ name = "http"
version = "0.2.0"
dependencies = [
"clap",
"dynemo-llm",
"dynemo-runtime",
"serde",
"serde_json",
"tokio",
"triton-distributed-llm",
"triton-distributed-runtime",
]
[[package]]
...
...
@@ -1895,13 +1988,13 @@ name = "llmctl"
version = "0.2.0"
dependencies = [
"clap",
"dynemo-llm",
"dynemo-runtime",
"serde",
"serde_json",
"tabled",
"tokio",
"tracing",
"triton-distributed-llm",
"triton-distributed-runtime",
]
[[package]]
...
...
@@ -3297,11 +3390,11 @@ dependencies = [
name = "service_metrics"
version = "0.2.0"
dependencies = [
"dynemo-runtime",
"futures",
"serde",
"serde_json",
"tokio",
"triton-distributed-runtime",
]
[[package]]
...
...
@@ -4000,99 +4093,6 @@ dependencies = [
"tracing-serde",
]
[[package]]
name = "triton-distributed-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"axum 0.8.1",
"bindgen",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"minijinja",
"minijinja-contrib",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie",
"toktrie_hf_tokenizers",
"tracing",
"triton-distributed-runtime",
"unicode-segmentation",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "triton-distributed-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "try-lock"
version = "0.2.5"
...
...
examples/rust/Cargo.toml
View file @
1af7433b
...
...
@@ -27,14 +27,14 @@ version = "0.2.0"
edition
=
"2021"
authors
=
["NVIDIA"]
license
=
"Apache-2.0"
homepage
=
"https://github.com/
triton-inference-server/triton_distributed
"
repository
=
"https://github.com/
triton-inference-server/triton_distributed
"
homepage
=
"https://github.com/
dynemo-ai/dynemo
"
repository
=
"https://github.com/
dynemo-ai/dynemo.git
"
[workspace.dependencies]
# local or crates.io
triton-distributed
-runtime
=
{
path
=
"../../lib/runtime"
}
triton-distributed
-llm
=
{
path
=
"../../lib/llm"
}
dynemo
-runtime
=
{
path
=
"../../lib/runtime"
}
dynemo
-llm
=
{
path
=
"../../lib/llm"
}
# crates.io
anyhow
=
{
version
=
"1"
}
...
...
examples/rust/hello_world/Cargo.toml
View file @
1af7433b
...
...
@@ -22,6 +22,6 @@ license.workspace = true
homepage.workspace
=
true
[dependencies]
triton-distributed
-runtime
=
{
workspace
=
true
}
dynemo
-runtime
=
{
workspace
=
true
}
# third-party
examples/rust/hello_world/src/bin/client.rs
View file @
1af7433b
...
...
@@ -13,11 +13,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use
hello_world
::
DEFAULT_NAMESPACE
;
use
triton_distributed_runtime
::{
use
dynemo_runtime
::{
logging
,
protocols
::
annotated
::
Annotated
,
stream
::
StreamExt
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
};
use
hello_world
::
DEFAULT_NAMESPACE
;
fn
main
()
->
Result
<
()
>
{
logging
::
init
();
...
...
examples/rust/hello_world/src/bin/server.rs
View file @
1af7433b
...
...
@@ -13,9 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use
hello_world
::
DEFAULT_NAMESPACE
;
use
std
::
sync
::
Arc
;
use
triton_distributed_runtime
::{
use
dynemo_runtime
::{
logging
,
pipeline
::{
async_trait
,
network
::
Ingress
,
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
...
...
@@ -24,6 +22,8 @@ use triton_distributed_runtime::{
protocols
::
annotated
::
Annotated
,
stream
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
};
use
hello_world
::
DEFAULT_NAMESPACE
;
use
std
::
sync
::
Arc
;
fn
main
()
->
Result
<
()
>
{
logging
::
init
();
...
...
examples/rust/hello_world/src/lib.rs
View file @
1af7433b
...
...
@@ -13,4 +13,4 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub
const
DEFAULT_NAMESPACE
:
&
str
=
"
triton-init
"
;
pub
const
DEFAULT_NAMESPACE
:
&
str
=
"
dynemo
"
;
examples/rust/http/Cargo.toml
View file @
1af7433b
...
...
@@ -24,8 +24,8 @@ homepage.workspace = true
repository.workspace
=
true
[dependencies]
triton-distributed
-runtime
=
{
workspace
=
true
}
triton-distributed
-llm
=
{
workspace
=
true
}
dynemo
-runtime
=
{
workspace
=
true
}
dynemo
-llm
=
{
workspace
=
true
}
clap
=
{
version
=
"4.5"
,
features
=
["derive"]
}
serde
=
{
workspace
=
true
}
...
...
examples/rust/http/src/main.rs
View file @
1af7433b
...
...
@@ -16,14 +16,14 @@
use
clap
::
Parser
;
use
std
::
sync
::
Arc
;
use
triton_distributed
_llm
::{
use
dynemo
_llm
::{
http
::
service
::{
discovery
::{
model_watcher
,
ModelWatchState
},
service_v2
::
HttpService
,
},
model_type
::
ModelType
,
};
use
triton_distributed
_runtime
::{
use
dynemo
_runtime
::{
logging
,
transports
::
etcd
::
PrefixWatcher
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
};
...
...
examples/rust/llmctl/Cargo.toml
View file @
1af7433b
...
...
@@ -23,8 +23,8 @@ homepage.workspace = true
repository.workspace
=
true
[dependencies]
triton-distributed
-runtime
=
{
workspace
=
true
}
triton-distributed
-llm
=
{
workspace
=
true
}
dynemo
-runtime
=
{
workspace
=
true
}
dynemo
-llm
=
{
workspace
=
true
}
serde
=
{
workspace
=
true
}
serde_json
=
{
workspace
=
true
}
...
...
examples/rust/llmctl/src/main.rs
View file @
1af7433b
...
...
@@ -16,8 +16,8 @@
use
clap
::{
Parser
,
Subcommand
};
use
tracing
as
log
;
use
triton_distributed
_llm
::{
http
::
service
::
discovery
::
ModelEntry
,
model_type
::
ModelType
};
use
triton_distributed
_runtime
::{
use
dynemo
_llm
::{
http
::
service
::
discovery
::
ModelEntry
,
model_type
::
ModelType
};
use
dynemo
_runtime
::{
distributed
::
DistributedConfig
,
logging
,
protocols
::
Endpoint
,
raise
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
};
...
...
examples/rust/service_metrics/Cargo.toml
View file @
1af7433b
...
...
@@ -23,10 +23,10 @@ homepage.workspace = true
repository.workspace
=
true
[dependencies]
triton-distributed
-runtime
=
{
workspace
=
true
}
dynemo
-runtime
=
{
workspace
=
true
}
# third-party
futures
=
{
workspace
=
true
}
serde
=
{
workspace
=
true
}
serde_json
=
{
workspace
=
true
}
tokio
=
{
workspace
=
true
}
\ No newline at end of file
tokio
=
{
workspace
=
true
}
examples/rust/service_metrics/README.md
View file @
1af7433b
...
...
@@ -4,14 +4,14 @@ This example extends the hello_world example by calling the `scrape_service` met
with the service name for the request response the client just issued a request.
```
bash
TRD
_LOG
=
debug cargo run
--bin
server
DYN
_LOG
=
debug cargo run
--bin
server
```
The client can now observe some basic statistics about each instance of the service
begin hosted.
```
bash
TRD
_LOG
=
info cargo run
--bin
client
DYN
_LOG
=
info cargo run
--bin
client
```
## Example Output
...
...
@@ -27,7 +27,7 @@ Annotated { data: Some("o"), id: None, event: None, comment: None }
Annotated { data: Some("r"), id: None, event: None, comment: None }
Annotated { data: Some("l"), id: None, event: None, comment: None }
Annotated { data: Some("d"), id: None, event: None, comment: None }
ServiceSet { services: [ServiceInfo { name: "
triton
_init_backend_720278f8", id: "eOHMc4ndRw8s5flv4WOZx7", version: "0.0.1", started: "2025-02-26T18:54:04.917294605Z", endpoints: [EndpointInfo { name: "
triton
_init_backend_720278f8-generate-694d951a80e06abf", subject: "
triton
_init_backend_720278f8.generate-694d951a80e06abf", data: Some(Metrics(Object {"average_processing_time": Number(53662), "data": Object {"val": Number(10)}, "last_error": String(""), "num_errors": Number(0), "num_requests": Number(2), "processing_time": Number(107325), "queue_group": String("q")})) }] }] }
ServiceSet { services: [ServiceInfo { name: "
dynemo
_init_backend_720278f8", id: "eOHMc4ndRw8s5flv4WOZx7", version: "0.0.1", started: "2025-02-26T18:54:04.917294605Z", endpoints: [EndpointInfo { name: "
dynemo
_init_backend_720278f8-generate-694d951a80e06abf", subject: "
dynemo
_init_backend_720278f8.generate-694d951a80e06abf", data: Some(Metrics(Object {"average_processing_time": Number(53662), "data": Object {"val": Number(10)}, "last_error": String(""), "num_errors": Number(0), "num_requests": Number(2), "processing_time": Number(107325), "queue_group": String("q")})) }] }] }
```
Note the following stats in the output demonstrate the custom
...
...
examples/rust/service_metrics/src/bin/client.rs
View file @
1af7433b
...
...
@@ -16,7 +16,7 @@
use
futures
::
StreamExt
;
use
service_metrics
::
DEFAULT_NAMESPACE
;
use
triton_distributed
_runtime
::{
use
dynemo
_runtime
::{
logging
,
protocols
::
annotated
::
Annotated
,
utils
::
Duration
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
};
...
...
examples/rust/service_metrics/src/bin/server.rs
View file @
1af7433b
...
...
@@ -15,8 +15,7 @@
use
service_metrics
::{
MyStats
,
DEFAULT_NAMESPACE
};
use
std
::
sync
::
Arc
;
use
triton_distributed_runtime
::{
use
dynemo_runtime
::{
logging
,
pipeline
::{
async_trait
,
network
::
Ingress
,
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
...
...
@@ -25,6 +24,7 @@ use triton_distributed_runtime::{
protocols
::
annotated
::
Annotated
,
stream
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
};
use
std
::
sync
::
Arc
;
fn
main
()
->
Result
<
()
>
{
logging
::
init
();
...
...
examples/rust/service_metrics/src/lib.rs
View file @
1af7433b
...
...
@@ -15,7 +15,7 @@
use
serde
::{
Deserialize
,
Serialize
};
pub
const
DEFAULT_NAMESPACE
:
&
str
=
"
triton-init
"
;
pub
const
DEFAULT_NAMESPACE
:
&
str
=
"
dynemo
"
;
#[derive(Serialize,
Deserialize)]
// Dummy Stats object to demonstrate how to attach a custom stats handler
...
...
launch/dynemo-run/Cargo.lock
View file @
1af7433b
...
...
@@ -1374,6 +1374,58 @@ dependencies = [
"reborrow",
]
[[package]]
name = "dynemo-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.1",
"bindgen 0.70.1",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"dynemo-runtime",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"llama-cpp-2",
"minijinja",
"minijinja-contrib",
"mistralrs",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum 0.27.1",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie 0.6.28",
"toktrie_hf_tokenizers 0.6.28",
"tracing",
"unicode-segmentation",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "dynemo-run"
version = "0.1.0"
...
...
@@ -1384,6 +1436,8 @@ dependencies = [
"async-trait",
"clap",
"dialoguer",
"dynemo-llm",
"dynemo-runtime",
"futures",
"futures-util",
"libc",
...
...
@@ -1395,8 +1449,50 @@ dependencies = [
"tokio-util",
"tracing",
"tracing-subscriber",
"triton-distributed-llm",
"triton-distributed-runtime",
]
[[package]]
name = "dynemo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix 0.29.0",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
...
...
@@ -5594,102 +5690,6 @@ dependencies = [
"tracing-serde",
]
[[package]]
name = "triton-distributed-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.1",
"bindgen 0.70.1",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"llama-cpp-2",
"minijinja",
"minijinja-contrib",
"mistralrs",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum 0.27.1",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie 0.6.28",
"toktrie_hf_tokenizers 0.6.28",
"tracing",
"triton-distributed-runtime",
"unicode-segmentation",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "triton-distributed-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix 0.29.0",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "try-lock"
version = "0.2.5"
...
...
launch/dynemo-run/Cargo.toml
View file @
1af7433b
...
...
@@ -22,13 +22,13 @@ homepage = "https://github.com/dynemo-ai/dynemo"
license
=
"Apache-2.0"
[features]
mistralrs
=
["
triton-distributed
-llm/mistralrs"]
sglang
=
[
"
triton-distributed
-llm/sglang"
,
"dep:netlink-packet-route"
,
"dep:rtnetlink"
]
vllm
=
[
"
triton-distributed
-llm/vllm"
,
"dep:netlink-packet-route"
,
"dep:rtnetlink"
]
llamacpp
=
["
triton-distributed
-llm/llamacpp"]
trtllm
=
["
triton-distributed
-llm/trtllm"]
cuda
=
["
triton-distributed
-llm/cuda"]
metal
=
["
triton-distributed
-llm/metal"]
mistralrs
=
["
dynemo
-llm/mistralrs"]
sglang
=
[
"
dynemo
-llm/sglang"
,
"dep:netlink-packet-route"
,
"dep:rtnetlink"
]
vllm
=
[
"
dynemo
-llm/vllm"
,
"dep:netlink-packet-route"
,
"dep:rtnetlink"
]
llamacpp
=
["
dynemo
-llm/llamacpp"]
trtllm
=
["
dynemo
-llm/trtllm"]
cuda
=
["
dynemo
-llm/cuda"]
metal
=
["
dynemo
-llm/metal"]
[dependencies]
anyhow
=
"1"
...
...
@@ -48,5 +48,5 @@ tokio = { version = "1", features = ["full"] }
tokio-util
=
{
version
=
"0.7"
,
features
=
[
"codec"
,
"net"
]
}
tracing
=
{
version
=
"0.1"
}
tracing-subscriber
=
{
version
=
"0.3"
,
features
=
[
"env-filter"
,
"local-time"
,
"json"
]
}
triton-distributed
-runtime
=
{
path
=
"../../lib/runtime"
}
triton-distributed
-llm
=
{
path
=
"../../lib/llm"
}
dynemo
-runtime
=
{
path
=
"../../lib/runtime"
}
dynemo
-llm
=
{
path
=
"../../lib/llm"
}
launch/dynemo-run/src/input/endpoint.rs
View file @
1af7433b
...
...
@@ -13,7 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use
triton_distributed
_llm
::{
use
dynemo
_llm
::{
backend
::
Backend
,
http
::
service
::
discovery
::
ModelEntry
,
model_type
::
ModelType
,
...
...
@@ -25,10 +25,10 @@ use triton_distributed_llm::{
Annotated
,
},
};
use
triton_distributed
_runtime
::
pipeline
::{
use
dynemo
_runtime
::
pipeline
::{
network
::
Ingress
,
ManyOut
,
Operator
,
SegmentSource
,
ServiceBackend
,
SingleIn
,
Source
,
};
use
triton_distributed
_runtime
::{
protocols
::
Endpoint
,
DistributedRuntime
,
Runtime
};
use
dynemo
_runtime
::{
protocols
::
Endpoint
,
DistributedRuntime
,
Runtime
};
use
crate
::
EngineConfig
;
...
...
launch/dynemo-run/src/input/http.rs
View file @
1af7433b
...
...
@@ -15,7 +15,7 @@
use
std
::
sync
::
Arc
;
use
triton_distributed
_llm
::{
use
dynemo
_llm
::{
backend
::
Backend
,
http
::
service
::{
discovery
,
service_v2
},
model_type
::
ModelType
,
...
...
@@ -27,7 +27,7 @@ use triton_distributed_llm::{
Annotated
,
},
};
use
triton_distributed
_runtime
::{
use
dynemo
_runtime
::{
pipeline
::{
ManyOut
,
Operator
,
ServiceBackend
,
ServiceFrontend
,
SingleIn
,
Source
},
DistributedRuntime
,
Runtime
,
};
...
...
Prev
1
2
3
4
5
6
7
…
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment