Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
1af7433b
Commit
1af7433b
authored
Mar 05, 2025
by
Neelay Shah
Committed by
GitHub
Mar 05, 2025
Browse files
refactor: rename triton_distributed to dynemo (#22)
Co-authored-by:
Graham King
<
grahamk@nvidia.com
>
parent
ee4ef06b
Changes
165
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
242 additions
and
246 deletions
+242
-246
examples/python_rs/llm/vllm_nixl/worker.py
examples/python_rs/llm/vllm_nixl/worker.py
+3
-7
examples/rust/Cargo.lock
examples/rust/Cargo.lock
+99
-99
examples/rust/Cargo.toml
examples/rust/Cargo.toml
+4
-4
examples/rust/hello_world/Cargo.toml
examples/rust/hello_world/Cargo.toml
+1
-1
examples/rust/hello_world/src/bin/client.rs
examples/rust/hello_world/src/bin/client.rs
+2
-2
examples/rust/hello_world/src/bin/server.rs
examples/rust/hello_world/src/bin/server.rs
+3
-3
examples/rust/hello_world/src/lib.rs
examples/rust/hello_world/src/lib.rs
+1
-1
examples/rust/http/Cargo.toml
examples/rust/http/Cargo.toml
+2
-2
examples/rust/http/src/main.rs
examples/rust/http/src/main.rs
+2
-2
examples/rust/llmctl/Cargo.toml
examples/rust/llmctl/Cargo.toml
+2
-2
examples/rust/llmctl/src/main.rs
examples/rust/llmctl/src/main.rs
+2
-2
examples/rust/service_metrics/Cargo.toml
examples/rust/service_metrics/Cargo.toml
+2
-2
examples/rust/service_metrics/README.md
examples/rust/service_metrics/README.md
+3
-3
examples/rust/service_metrics/src/bin/client.rs
examples/rust/service_metrics/src/bin/client.rs
+1
-1
examples/rust/service_metrics/src/bin/server.rs
examples/rust/service_metrics/src/bin/server.rs
+2
-2
examples/rust/service_metrics/src/lib.rs
examples/rust/service_metrics/src/lib.rs
+1
-1
launch/dynemo-run/Cargo.lock
launch/dynemo-run/Cargo.lock
+98
-98
launch/dynemo-run/Cargo.toml
launch/dynemo-run/Cargo.toml
+9
-9
launch/dynemo-run/src/input/endpoint.rs
launch/dynemo-run/src/input/endpoint.rs
+3
-3
launch/dynemo-run/src/input/http.rs
launch/dynemo-run/src/input/http.rs
+2
-2
No files found.
examples/python_rs/llm/vllm_nixl/worker.py
View file @
1af7433b
...
@@ -33,11 +33,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
...
@@ -33,11 +33,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from
vllm.entrypoints.openai.serving_models
import
BaseModelPath
,
OpenAIServingModels
from
vllm.entrypoints.openai.serving_models
import
BaseModelPath
,
OpenAIServingModels
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
vllm.remote_prefill
import
RemotePrefillParams
,
RemotePrefillRequest
from
triton_distributed.runtime
import
(
from
dynemo.runtime
import
DistributedRuntime
,
dynemo_endpoint
,
dynemo_worker
DistributedRuntime
,
triton_endpoint
,
triton_worker
,
)
class
RequestHandler
:
class
RequestHandler
:
...
@@ -87,7 +83,7 @@ class RequestHandler:
...
@@ -87,7 +83,7 @@ class RequestHandler:
return
callback
return
callback
@
triton
_endpoint
(
ChatCompletionRequest
,
ChatCompletionStreamResponse
)
@
dynemo
_endpoint
(
ChatCompletionRequest
,
ChatCompletionStreamResponse
)
async
def
generate
(
self
,
request
):
async
def
generate
(
self
,
request
):
if
not
self
.
initialized
:
if
not
self
.
initialized
:
await
self
.
init
()
await
self
.
init
()
...
@@ -113,7 +109,7 @@ class RequestHandler:
...
@@ -113,7 +109,7 @@ class RequestHandler:
yield
response
yield
response
@
triton
_worker
()
@
dynemo
_worker
()
async
def
worker
(
runtime
:
DistributedRuntime
,
engine_args
:
AsyncEngineArgs
):
async
def
worker
(
runtime
:
DistributedRuntime
,
engine_args
:
AsyncEngineArgs
):
component
=
runtime
.
namespace
(
"test-nixl"
).
component
(
"vllm"
)
component
=
runtime
.
namespace
(
"test-nixl"
).
component
(
"vllm"
)
await
component
.
create_service
()
await
component
.
create_service
()
...
...
examples/rust/Cargo.lock
View file @
1af7433b
...
@@ -955,6 +955,99 @@ dependencies = [
...
@@ -955,6 +955,99 @@ dependencies = [
"syn 2.0.98",
"syn 2.0.98",
]
]
[[package]]
name = "dynemo-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"axum 0.8.1",
"bindgen",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"dynemo-runtime",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"minijinja",
"minijinja-contrib",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie",
"toktrie_hf_tokenizers",
"tracing",
"unicode-segmentation",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "dynemo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
[[package]]
name = "ed25519"
name = "ed25519"
version = "2.2.3"
version = "2.2.3"
...
@@ -1370,7 +1463,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
...
@@ -1370,7 +1463,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
name = "hello_world"
name = "hello_world"
version = "0.2.0"
version = "0.2.0"
dependencies = [
dependencies = [
"
triton-distributed
-runtime",
"
dynemo
-runtime",
]
]
[[package]]
[[package]]
...
@@ -1395,11 +1488,11 @@ name = "http"
...
@@ -1395,11 +1488,11 @@ name = "http"
version = "0.2.0"
version = "0.2.0"
dependencies = [
dependencies = [
"clap",
"clap",
"dynemo-llm",
"dynemo-runtime",
"serde",
"serde",
"serde_json",
"serde_json",
"tokio",
"tokio",
"triton-distributed-llm",
"triton-distributed-runtime",
]
]
[[package]]
[[package]]
...
@@ -1895,13 +1988,13 @@ name = "llmctl"
...
@@ -1895,13 +1988,13 @@ name = "llmctl"
version = "0.2.0"
version = "0.2.0"
dependencies = [
dependencies = [
"clap",
"clap",
"dynemo-llm",
"dynemo-runtime",
"serde",
"serde",
"serde_json",
"serde_json",
"tabled",
"tabled",
"tokio",
"tokio",
"tracing",
"tracing",
"triton-distributed-llm",
"triton-distributed-runtime",
]
]
[[package]]
[[package]]
...
@@ -3297,11 +3390,11 @@ dependencies = [
...
@@ -3297,11 +3390,11 @@ dependencies = [
name = "service_metrics"
name = "service_metrics"
version = "0.2.0"
version = "0.2.0"
dependencies = [
dependencies = [
"dynemo-runtime",
"futures",
"futures",
"serde",
"serde",
"serde_json",
"serde_json",
"tokio",
"tokio",
"triton-distributed-runtime",
]
]
[[package]]
[[package]]
...
@@ -4000,99 +4093,6 @@ dependencies = [
...
@@ -4000,99 +4093,6 @@ dependencies = [
"tracing-serde",
"tracing-serde",
]
]
[[package]]
name = "triton-distributed-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"axum 0.8.1",
"bindgen",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"minijinja",
"minijinja-contrib",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie",
"toktrie_hf_tokenizers",
"tracing",
"triton-distributed-runtime",
"unicode-segmentation",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "triton-distributed-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
[[package]]
name = "try-lock"
name = "try-lock"
version = "0.2.5"
version = "0.2.5"
...
...
examples/rust/Cargo.toml
View file @
1af7433b
...
@@ -27,14 +27,14 @@ version = "0.2.0"
...
@@ -27,14 +27,14 @@ version = "0.2.0"
edition
=
"2021"
edition
=
"2021"
authors
=
["NVIDIA"]
authors
=
["NVIDIA"]
license
=
"Apache-2.0"
license
=
"Apache-2.0"
homepage
=
"https://github.com/
triton-inference-server/triton_distributed
"
homepage
=
"https://github.com/
dynemo-ai/dynemo
"
repository
=
"https://github.com/
triton-inference-server/triton_distributed
"
repository
=
"https://github.com/
dynemo-ai/dynemo.git
"
[workspace.dependencies]
[workspace.dependencies]
# local or crates.io
# local or crates.io
triton-distributed
-runtime
=
{
path
=
"../../lib/runtime"
}
dynemo
-runtime
=
{
path
=
"../../lib/runtime"
}
triton-distributed
-llm
=
{
path
=
"../../lib/llm"
}
dynemo
-llm
=
{
path
=
"../../lib/llm"
}
# crates.io
# crates.io
anyhow
=
{
version
=
"1"
}
anyhow
=
{
version
=
"1"
}
...
...
examples/rust/hello_world/Cargo.toml
View file @
1af7433b
...
@@ -22,6 +22,6 @@ license.workspace = true
...
@@ -22,6 +22,6 @@ license.workspace = true
homepage.workspace
=
true
homepage.workspace
=
true
[dependencies]
[dependencies]
triton-distributed
-runtime
=
{
workspace
=
true
}
dynemo
-runtime
=
{
workspace
=
true
}
# third-party
# third-party
examples/rust/hello_world/src/bin/client.rs
View file @
1af7433b
...
@@ -13,11 +13,11 @@
...
@@ -13,11 +13,11 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
use
hello_world
::
DEFAULT_NAMESPACE
;
use
dynemo_runtime
::{
use
triton_distributed_runtime
::{
logging
,
protocols
::
annotated
::
Annotated
,
stream
::
StreamExt
,
DistributedRuntime
,
Result
,
logging
,
protocols
::
annotated
::
Annotated
,
stream
::
StreamExt
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
Runtime
,
Worker
,
};
};
use
hello_world
::
DEFAULT_NAMESPACE
;
fn
main
()
->
Result
<
()
>
{
fn
main
()
->
Result
<
()
>
{
logging
::
init
();
logging
::
init
();
...
...
examples/rust/hello_world/src/bin/server.rs
View file @
1af7433b
...
@@ -13,9 +13,7 @@
...
@@ -13,9 +13,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
use
hello_world
::
DEFAULT_NAMESPACE
;
use
dynemo_runtime
::{
use
std
::
sync
::
Arc
;
use
triton_distributed_runtime
::{
logging
,
logging
,
pipeline
::{
pipeline
::{
async_trait
,
network
::
Ingress
,
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
async_trait
,
network
::
Ingress
,
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
...
@@ -24,6 +22,8 @@ use triton_distributed_runtime::{
...
@@ -24,6 +22,8 @@ use triton_distributed_runtime::{
protocols
::
annotated
::
Annotated
,
protocols
::
annotated
::
Annotated
,
stream
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
stream
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
};
};
use
hello_world
::
DEFAULT_NAMESPACE
;
use
std
::
sync
::
Arc
;
fn
main
()
->
Result
<
()
>
{
fn
main
()
->
Result
<
()
>
{
logging
::
init
();
logging
::
init
();
...
...
examples/rust/hello_world/src/lib.rs
View file @
1af7433b
...
@@ -13,4 +13,4 @@
...
@@ -13,4 +13,4 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
pub
const
DEFAULT_NAMESPACE
:
&
str
=
"
triton-init
"
;
pub
const
DEFAULT_NAMESPACE
:
&
str
=
"
dynemo
"
;
examples/rust/http/Cargo.toml
View file @
1af7433b
...
@@ -24,8 +24,8 @@ homepage.workspace = true
...
@@ -24,8 +24,8 @@ homepage.workspace = true
repository.workspace
=
true
repository.workspace
=
true
[dependencies]
[dependencies]
triton-distributed
-runtime
=
{
workspace
=
true
}
dynemo
-runtime
=
{
workspace
=
true
}
triton-distributed
-llm
=
{
workspace
=
true
}
dynemo
-llm
=
{
workspace
=
true
}
clap
=
{
version
=
"4.5"
,
features
=
["derive"]
}
clap
=
{
version
=
"4.5"
,
features
=
["derive"]
}
serde
=
{
workspace
=
true
}
serde
=
{
workspace
=
true
}
...
...
examples/rust/http/src/main.rs
View file @
1af7433b
...
@@ -16,14 +16,14 @@
...
@@ -16,14 +16,14 @@
use
clap
::
Parser
;
use
clap
::
Parser
;
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
triton_distributed
_llm
::{
use
dynemo
_llm
::{
http
::
service
::{
http
::
service
::{
discovery
::{
model_watcher
,
ModelWatchState
},
discovery
::{
model_watcher
,
ModelWatchState
},
service_v2
::
HttpService
,
service_v2
::
HttpService
,
},
},
model_type
::
ModelType
,
model_type
::
ModelType
,
};
};
use
triton_distributed
_runtime
::{
use
dynemo
_runtime
::{
logging
,
transports
::
etcd
::
PrefixWatcher
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
logging
,
transports
::
etcd
::
PrefixWatcher
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
};
};
...
...
examples/rust/llmctl/Cargo.toml
View file @
1af7433b
...
@@ -23,8 +23,8 @@ homepage.workspace = true
...
@@ -23,8 +23,8 @@ homepage.workspace = true
repository.workspace
=
true
repository.workspace
=
true
[dependencies]
[dependencies]
triton-distributed
-runtime
=
{
workspace
=
true
}
dynemo
-runtime
=
{
workspace
=
true
}
triton-distributed
-llm
=
{
workspace
=
true
}
dynemo
-llm
=
{
workspace
=
true
}
serde
=
{
workspace
=
true
}
serde
=
{
workspace
=
true
}
serde_json
=
{
workspace
=
true
}
serde_json
=
{
workspace
=
true
}
...
...
examples/rust/llmctl/src/main.rs
View file @
1af7433b
...
@@ -16,8 +16,8 @@
...
@@ -16,8 +16,8 @@
use
clap
::{
Parser
,
Subcommand
};
use
clap
::{
Parser
,
Subcommand
};
use
tracing
as
log
;
use
tracing
as
log
;
use
triton_distributed
_llm
::{
http
::
service
::
discovery
::
ModelEntry
,
model_type
::
ModelType
};
use
dynemo
_llm
::{
http
::
service
::
discovery
::
ModelEntry
,
model_type
::
ModelType
};
use
triton_distributed
_runtime
::{
use
dynemo
_runtime
::{
distributed
::
DistributedConfig
,
logging
,
protocols
::
Endpoint
,
raise
,
DistributedRuntime
,
distributed
::
DistributedConfig
,
logging
,
protocols
::
Endpoint
,
raise
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
Result
,
Runtime
,
Worker
,
};
};
...
...
examples/rust/service_metrics/Cargo.toml
View file @
1af7433b
...
@@ -23,10 +23,10 @@ homepage.workspace = true
...
@@ -23,10 +23,10 @@ homepage.workspace = true
repository.workspace
=
true
repository.workspace
=
true
[dependencies]
[dependencies]
triton-distributed
-runtime
=
{
workspace
=
true
}
dynemo
-runtime
=
{
workspace
=
true
}
# third-party
# third-party
futures
=
{
workspace
=
true
}
futures
=
{
workspace
=
true
}
serde
=
{
workspace
=
true
}
serde
=
{
workspace
=
true
}
serde_json
=
{
workspace
=
true
}
serde_json
=
{
workspace
=
true
}
tokio
=
{
workspace
=
true
}
tokio
=
{
workspace
=
true
}
\ No newline at end of file
examples/rust/service_metrics/README.md
View file @
1af7433b
...
@@ -4,14 +4,14 @@ This example extends the hello_world example by calling the `scrape_service` met
...
@@ -4,14 +4,14 @@ This example extends the hello_world example by calling the `scrape_service` met
with the service name for the request response the client just issued a request.
with the service name for the request response the client just issued a request.
```
bash
```
bash
TRD
_LOG
=
debug cargo run
--bin
server
DYN
_LOG
=
debug cargo run
--bin
server
```
```
The client can now observe some basic statistics about each instance of the service
The client can now observe some basic statistics about each instance of the service
begin hosted.
begin hosted.
```
bash
```
bash
TRD
_LOG
=
info cargo run
--bin
client
DYN
_LOG
=
info cargo run
--bin
client
```
```
## Example Output
## Example Output
...
@@ -27,7 +27,7 @@ Annotated { data: Some("o"), id: None, event: None, comment: None }
...
@@ -27,7 +27,7 @@ Annotated { data: Some("o"), id: None, event: None, comment: None }
Annotated { data: Some("r"), id: None, event: None, comment: None }
Annotated { data: Some("r"), id: None, event: None, comment: None }
Annotated { data: Some("l"), id: None, event: None, comment: None }
Annotated { data: Some("l"), id: None, event: None, comment: None }
Annotated { data: Some("d"), id: None, event: None, comment: None }
Annotated { data: Some("d"), id: None, event: None, comment: None }
ServiceSet { services: [ServiceInfo { name: "
triton
_init_backend_720278f8", id: "eOHMc4ndRw8s5flv4WOZx7", version: "0.0.1", started: "2025-02-26T18:54:04.917294605Z", endpoints: [EndpointInfo { name: "
triton
_init_backend_720278f8-generate-694d951a80e06abf", subject: "
triton
_init_backend_720278f8.generate-694d951a80e06abf", data: Some(Metrics(Object {"average_processing_time": Number(53662), "data": Object {"val": Number(10)}, "last_error": String(""), "num_errors": Number(0), "num_requests": Number(2), "processing_time": Number(107325), "queue_group": String("q")})) }] }] }
ServiceSet { services: [ServiceInfo { name: "
dynemo
_init_backend_720278f8", id: "eOHMc4ndRw8s5flv4WOZx7", version: "0.0.1", started: "2025-02-26T18:54:04.917294605Z", endpoints: [EndpointInfo { name: "
dynemo
_init_backend_720278f8-generate-694d951a80e06abf", subject: "
dynemo
_init_backend_720278f8.generate-694d951a80e06abf", data: Some(Metrics(Object {"average_processing_time": Number(53662), "data": Object {"val": Number(10)}, "last_error": String(""), "num_errors": Number(0), "num_requests": Number(2), "processing_time": Number(107325), "queue_group": String("q")})) }] }] }
```
```
Note the following stats in the output demonstrate the custom
Note the following stats in the output demonstrate the custom
...
...
examples/rust/service_metrics/src/bin/client.rs
View file @
1af7433b
...
@@ -16,7 +16,7 @@
...
@@ -16,7 +16,7 @@
use
futures
::
StreamExt
;
use
futures
::
StreamExt
;
use
service_metrics
::
DEFAULT_NAMESPACE
;
use
service_metrics
::
DEFAULT_NAMESPACE
;
use
triton_distributed
_runtime
::{
use
dynemo
_runtime
::{
logging
,
protocols
::
annotated
::
Annotated
,
utils
::
Duration
,
DistributedRuntime
,
Result
,
Runtime
,
logging
,
protocols
::
annotated
::
Annotated
,
utils
::
Duration
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
Worker
,
};
};
...
...
examples/rust/service_metrics/src/bin/server.rs
View file @
1af7433b
...
@@ -15,8 +15,7 @@
...
@@ -15,8 +15,7 @@
use
service_metrics
::{
MyStats
,
DEFAULT_NAMESPACE
};
use
service_metrics
::{
MyStats
,
DEFAULT_NAMESPACE
};
use
std
::
sync
::
Arc
;
use
dynemo_runtime
::{
use
triton_distributed_runtime
::{
logging
,
logging
,
pipeline
::{
pipeline
::{
async_trait
,
network
::
Ingress
,
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
async_trait
,
network
::
Ingress
,
AsyncEngine
,
AsyncEngineContextProvider
,
Error
,
ManyOut
,
...
@@ -25,6 +24,7 @@ use triton_distributed_runtime::{
...
@@ -25,6 +24,7 @@ use triton_distributed_runtime::{
protocols
::
annotated
::
Annotated
,
protocols
::
annotated
::
Annotated
,
stream
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
stream
,
DistributedRuntime
,
Result
,
Runtime
,
Worker
,
};
};
use
std
::
sync
::
Arc
;
fn
main
()
->
Result
<
()
>
{
fn
main
()
->
Result
<
()
>
{
logging
::
init
();
logging
::
init
();
...
...
examples/rust/service_metrics/src/lib.rs
View file @
1af7433b
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
use
serde
::{
Deserialize
,
Serialize
};
use
serde
::{
Deserialize
,
Serialize
};
pub
const
DEFAULT_NAMESPACE
:
&
str
=
"
triton-init
"
;
pub
const
DEFAULT_NAMESPACE
:
&
str
=
"
dynemo
"
;
#[derive(Serialize,
Deserialize)]
#[derive(Serialize,
Deserialize)]
// Dummy Stats object to demonstrate how to attach a custom stats handler
// Dummy Stats object to demonstrate how to attach a custom stats handler
...
...
launch/dynemo-run/Cargo.lock
View file @
1af7433b
...
@@ -1374,6 +1374,58 @@ dependencies = [
...
@@ -1374,6 +1374,58 @@ dependencies = [
"reborrow",
"reborrow",
]
]
[[package]]
name = "dynemo-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.1",
"bindgen 0.70.1",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"dynemo-runtime",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"llama-cpp-2",
"minijinja",
"minijinja-contrib",
"mistralrs",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum 0.27.1",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie 0.6.28",
"toktrie_hf_tokenizers 0.6.28",
"tracing",
"unicode-segmentation",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
[[package]]
name = "dynemo-run"
name = "dynemo-run"
version = "0.1.0"
version = "0.1.0"
...
@@ -1384,6 +1436,8 @@ dependencies = [
...
@@ -1384,6 +1436,8 @@ dependencies = [
"async-trait",
"async-trait",
"clap",
"clap",
"dialoguer",
"dialoguer",
"dynemo-llm",
"dynemo-runtime",
"futures",
"futures",
"futures-util",
"futures-util",
"libc",
"libc",
...
@@ -1395,8 +1449,50 @@ dependencies = [
...
@@ -1395,8 +1449,50 @@ dependencies = [
"tokio-util",
"tokio-util",
"tracing",
"tracing",
"tracing-subscriber",
"tracing-subscriber",
"triton-distributed-llm",
]
"triton-distributed-runtime",
[[package]]
name = "dynemo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix 0.29.0",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
]
[[package]]
[[package]]
...
@@ -5594,102 +5690,6 @@ dependencies = [
...
@@ -5594,102 +5690,6 @@ dependencies = [
"tracing-serde",
"tracing-serde",
]
]
[[package]]
name = "triton-distributed-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.1",
"bindgen 0.70.1",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"llama-cpp-2",
"minijinja",
"minijinja-contrib",
"mistralrs",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum 0.27.1",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie 0.6.28",
"toktrie_hf_tokenizers 0.6.28",
"tracing",
"triton-distributed-runtime",
"unicode-segmentation",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "triton-distributed-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix 0.29.0",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
[[package]]
name = "try-lock"
name = "try-lock"
version = "0.2.5"
version = "0.2.5"
...
...
launch/dynemo-run/Cargo.toml
View file @
1af7433b
...
@@ -22,13 +22,13 @@ homepage = "https://github.com/dynemo-ai/dynemo"
...
@@ -22,13 +22,13 @@ homepage = "https://github.com/dynemo-ai/dynemo"
license
=
"Apache-2.0"
license
=
"Apache-2.0"
[features]
[features]
mistralrs
=
["
triton-distributed
-llm/mistralrs"]
mistralrs
=
["
dynemo
-llm/mistralrs"]
sglang
=
[
"
triton-distributed
-llm/sglang"
,
"dep:netlink-packet-route"
,
"dep:rtnetlink"
]
sglang
=
[
"
dynemo
-llm/sglang"
,
"dep:netlink-packet-route"
,
"dep:rtnetlink"
]
vllm
=
[
"
triton-distributed
-llm/vllm"
,
"dep:netlink-packet-route"
,
"dep:rtnetlink"
]
vllm
=
[
"
dynemo
-llm/vllm"
,
"dep:netlink-packet-route"
,
"dep:rtnetlink"
]
llamacpp
=
["
triton-distributed
-llm/llamacpp"]
llamacpp
=
["
dynemo
-llm/llamacpp"]
trtllm
=
["
triton-distributed
-llm/trtllm"]
trtllm
=
["
dynemo
-llm/trtllm"]
cuda
=
["
triton-distributed
-llm/cuda"]
cuda
=
["
dynemo
-llm/cuda"]
metal
=
["
triton-distributed
-llm/metal"]
metal
=
["
dynemo
-llm/metal"]
[dependencies]
[dependencies]
anyhow
=
"1"
anyhow
=
"1"
...
@@ -48,5 +48,5 @@ tokio = { version = "1", features = ["full"] }
...
@@ -48,5 +48,5 @@ tokio = { version = "1", features = ["full"] }
tokio-util
=
{
version
=
"0.7"
,
features
=
[
"codec"
,
"net"
]
}
tokio-util
=
{
version
=
"0.7"
,
features
=
[
"codec"
,
"net"
]
}
tracing
=
{
version
=
"0.1"
}
tracing
=
{
version
=
"0.1"
}
tracing-subscriber
=
{
version
=
"0.3"
,
features
=
[
"env-filter"
,
"local-time"
,
"json"
]
}
tracing-subscriber
=
{
version
=
"0.3"
,
features
=
[
"env-filter"
,
"local-time"
,
"json"
]
}
triton-distributed
-runtime
=
{
path
=
"../../lib/runtime"
}
dynemo
-runtime
=
{
path
=
"../../lib/runtime"
}
triton-distributed
-llm
=
{
path
=
"../../lib/llm"
}
dynemo
-llm
=
{
path
=
"../../lib/llm"
}
launch/dynemo-run/src/input/endpoint.rs
View file @
1af7433b
...
@@ -13,7 +13,7 @@
...
@@ -13,7 +13,7 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
use
triton_distributed
_llm
::{
use
dynemo
_llm
::{
backend
::
Backend
,
backend
::
Backend
,
http
::
service
::
discovery
::
ModelEntry
,
http
::
service
::
discovery
::
ModelEntry
,
model_type
::
ModelType
,
model_type
::
ModelType
,
...
@@ -25,10 +25,10 @@ use triton_distributed_llm::{
...
@@ -25,10 +25,10 @@ use triton_distributed_llm::{
Annotated
,
Annotated
,
},
},
};
};
use
triton_distributed
_runtime
::
pipeline
::{
use
dynemo
_runtime
::
pipeline
::{
network
::
Ingress
,
ManyOut
,
Operator
,
SegmentSource
,
ServiceBackend
,
SingleIn
,
Source
,
network
::
Ingress
,
ManyOut
,
Operator
,
SegmentSource
,
ServiceBackend
,
SingleIn
,
Source
,
};
};
use
triton_distributed
_runtime
::{
protocols
::
Endpoint
,
DistributedRuntime
,
Runtime
};
use
dynemo
_runtime
::{
protocols
::
Endpoint
,
DistributedRuntime
,
Runtime
};
use
crate
::
EngineConfig
;
use
crate
::
EngineConfig
;
...
...
launch/dynemo-run/src/input/http.rs
View file @
1af7433b
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
use
std
::
sync
::
Arc
;
use
std
::
sync
::
Arc
;
use
triton_distributed
_llm
::{
use
dynemo
_llm
::{
backend
::
Backend
,
backend
::
Backend
,
http
::
service
::{
discovery
,
service_v2
},
http
::
service
::{
discovery
,
service_v2
},
model_type
::
ModelType
,
model_type
::
ModelType
,
...
@@ -27,7 +27,7 @@ use triton_distributed_llm::{
...
@@ -27,7 +27,7 @@ use triton_distributed_llm::{
Annotated
,
Annotated
,
},
},
};
};
use
triton_distributed
_runtime
::{
use
dynemo
_runtime
::{
pipeline
::{
ManyOut
,
Operator
,
ServiceBackend
,
ServiceFrontend
,
SingleIn
,
Source
},
pipeline
::{
ManyOut
,
Operator
,
ServiceBackend
,
ServiceFrontend
,
SingleIn
,
Source
},
DistributedRuntime
,
Runtime
,
DistributedRuntime
,
Runtime
,
};
};
...
...
Prev
1
2
3
4
5
6
7
…
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment