Commit 1af7433b authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: rename triton_distributed to dynemo (#22)


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent ee4ef06b
......@@ -33,11 +33,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.remote_prefill import RemotePrefillParams, RemotePrefillRequest
from triton_distributed.runtime import (
DistributedRuntime,
triton_endpoint,
triton_worker,
)
from dynemo.runtime import DistributedRuntime, dynemo_endpoint, dynemo_worker
class RequestHandler:
......@@ -87,7 +83,7 @@ class RequestHandler:
return callback
@triton_endpoint(ChatCompletionRequest, ChatCompletionStreamResponse)
@dynemo_endpoint(ChatCompletionRequest, ChatCompletionStreamResponse)
async def generate(self, request):
if not self.initialized:
await self.init()
......@@ -113,7 +109,7 @@ class RequestHandler:
yield response
@triton_worker()
@dynemo_worker()
async def worker(runtime: DistributedRuntime, engine_args: AsyncEngineArgs):
component = runtime.namespace("test-nixl").component("vllm")
await component.create_service()
......
......@@ -955,6 +955,99 @@ dependencies = [
"syn 2.0.98",
]
[[package]]
name = "dynemo-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"axum 0.8.1",
"bindgen",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"dynemo-runtime",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"minijinja",
"minijinja-contrib",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie",
"toktrie_hf_tokenizers",
"tracing",
"unicode-segmentation",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "dynemo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "ed25519"
version = "2.2.3"
......@@ -1370,7 +1463,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
name = "hello_world"
version = "0.2.0"
dependencies = [
"triton-distributed-runtime",
"dynemo-runtime",
]
[[package]]
......@@ -1395,11 +1488,11 @@ name = "http"
version = "0.2.0"
dependencies = [
"clap",
"dynemo-llm",
"dynemo-runtime",
"serde",
"serde_json",
"tokio",
"triton-distributed-llm",
"triton-distributed-runtime",
]
[[package]]
......@@ -1895,13 +1988,13 @@ name = "llmctl"
version = "0.2.0"
dependencies = [
"clap",
"dynemo-llm",
"dynemo-runtime",
"serde",
"serde_json",
"tabled",
"tokio",
"tracing",
"triton-distributed-llm",
"triton-distributed-runtime",
]
[[package]]
......@@ -3297,11 +3390,11 @@ dependencies = [
name = "service_metrics"
version = "0.2.0"
dependencies = [
"dynemo-runtime",
"futures",
"serde",
"serde_json",
"tokio",
"triton-distributed-runtime",
]
[[package]]
......@@ -4000,99 +4093,6 @@ dependencies = [
"tracing-serde",
]
[[package]]
name = "triton-distributed-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"axum 0.8.1",
"bindgen",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"minijinja",
"minijinja-contrib",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie",
"toktrie_hf_tokenizers",
"tracing",
"triton-distributed-runtime",
"unicode-segmentation",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "triton-distributed-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "try-lock"
version = "0.2.5"
......
......@@ -27,14 +27,14 @@ version = "0.2.0"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/triton-inference-server/triton_distributed"
repository = "https://github.com/triton-inference-server/triton_distributed"
homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/dynemo-ai/dynemo.git"
[workspace.dependencies]
# local or crates.io
triton-distributed-runtime = { path = "../../lib/runtime" }
triton-distributed-llm = { path = "../../lib/llm" }
dynemo-runtime = { path = "../../lib/runtime" }
dynemo-llm = { path = "../../lib/llm" }
# crates.io
anyhow = { version = "1" }
......
......@@ -22,6 +22,6 @@ license.workspace = true
homepage.workspace = true
[dependencies]
triton-distributed-runtime = { workspace = true }
dynemo-runtime = { workspace = true }
# third-party
......@@ -13,11 +13,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use hello_world::DEFAULT_NAMESPACE;
use triton_distributed_runtime::{
use dynemo_runtime::{
logging, protocols::annotated::Annotated, stream::StreamExt, DistributedRuntime, Result,
Runtime, Worker,
};
use hello_world::DEFAULT_NAMESPACE;
fn main() -> Result<()> {
logging::init();
......
......@@ -13,9 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use hello_world::DEFAULT_NAMESPACE;
use std::sync::Arc;
use triton_distributed_runtime::{
use dynemo_runtime::{
logging,
pipeline::{
async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
......@@ -24,6 +22,8 @@ use triton_distributed_runtime::{
protocols::annotated::Annotated,
stream, DistributedRuntime, Result, Runtime, Worker,
};
use hello_world::DEFAULT_NAMESPACE;
use std::sync::Arc;
fn main() -> Result<()> {
logging::init();
......
......@@ -13,4 +13,4 @@
// See the License for the specific language governing permissions and
// limitations under the License.
pub const DEFAULT_NAMESPACE: &str = "triton-init";
pub const DEFAULT_NAMESPACE: &str = "dynemo";
......@@ -24,8 +24,8 @@ homepage.workspace = true
repository.workspace = true
[dependencies]
triton-distributed-runtime = { workspace = true}
triton-distributed-llm = { workspace = true}
dynemo-runtime = { workspace = true}
dynemo-llm = { workspace = true}
clap = { version = "4.5", features = ["derive"] }
serde = { workspace = true }
......
......@@ -16,14 +16,14 @@
use clap::Parser;
use std::sync::Arc;
use triton_distributed_llm::{
use dynemo_llm::{
http::service::{
discovery::{model_watcher, ModelWatchState},
service_v2::HttpService,
},
model_type::ModelType,
};
use triton_distributed_runtime::{
use dynemo_runtime::{
logging, transports::etcd::PrefixWatcher, DistributedRuntime, Result, Runtime, Worker,
};
......
......@@ -23,8 +23,8 @@ homepage.workspace = true
repository.workspace = true
[dependencies]
triton-distributed-runtime = { workspace = true}
triton-distributed-llm = { workspace = true}
dynemo-runtime = { workspace = true}
dynemo-llm = { workspace = true}
serde = { workspace = true }
serde_json = { workspace = true }
......
......@@ -16,8 +16,8 @@
use clap::{Parser, Subcommand};
use tracing as log;
use triton_distributed_llm::{http::service::discovery::ModelEntry, model_type::ModelType};
use triton_distributed_runtime::{
use dynemo_llm::{http::service::discovery::ModelEntry, model_type::ModelType};
use dynemo_runtime::{
distributed::DistributedConfig, logging, protocols::Endpoint, raise, DistributedRuntime,
Result, Runtime, Worker,
};
......
......@@ -23,10 +23,10 @@ homepage.workspace = true
repository.workspace = true
[dependencies]
triton-distributed-runtime = { workspace = true }
dynemo-runtime = { workspace = true }
# third-party
futures = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
\ No newline at end of file
tokio = { workspace = true }
......@@ -4,14 +4,14 @@ This example extends the hello_world example by calling the `scrape_service` met
with the service name for the request response the client just issued a request.
```bash
TRD_LOG=debug cargo run --bin server
DYN_LOG=debug cargo run --bin server
```
The client can now observe some basic statistics about each instance of the service
begin hosted.
```bash
TRD_LOG=info cargo run --bin client
DYN_LOG=info cargo run --bin client
```
## Example Output
......@@ -27,7 +27,7 @@ Annotated { data: Some("o"), id: None, event: None, comment: None }
Annotated { data: Some("r"), id: None, event: None, comment: None }
Annotated { data: Some("l"), id: None, event: None, comment: None }
Annotated { data: Some("d"), id: None, event: None, comment: None }
ServiceSet { services: [ServiceInfo { name: "triton_init_backend_720278f8", id: "eOHMc4ndRw8s5flv4WOZx7", version: "0.0.1", started: "2025-02-26T18:54:04.917294605Z", endpoints: [EndpointInfo { name: "triton_init_backend_720278f8-generate-694d951a80e06abf", subject: "triton_init_backend_720278f8.generate-694d951a80e06abf", data: Some(Metrics(Object {"average_processing_time": Number(53662), "data": Object {"val": Number(10)}, "last_error": String(""), "num_errors": Number(0), "num_requests": Number(2), "processing_time": Number(107325), "queue_group": String("q")})) }] }] }
ServiceSet { services: [ServiceInfo { name: "dynemo_init_backend_720278f8", id: "eOHMc4ndRw8s5flv4WOZx7", version: "0.0.1", started: "2025-02-26T18:54:04.917294605Z", endpoints: [EndpointInfo { name: "dynemo_init_backend_720278f8-generate-694d951a80e06abf", subject: "dynemo_init_backend_720278f8.generate-694d951a80e06abf", data: Some(Metrics(Object {"average_processing_time": Number(53662), "data": Object {"val": Number(10)}, "last_error": String(""), "num_errors": Number(0), "num_requests": Number(2), "processing_time": Number(107325), "queue_group": String("q")})) }] }] }
```
Note the following stats in the output demonstrate the custom
......
......@@ -16,7 +16,7 @@
use futures::StreamExt;
use service_metrics::DEFAULT_NAMESPACE;
use triton_distributed_runtime::{
use dynemo_runtime::{
logging, protocols::annotated::Annotated, utils::Duration, DistributedRuntime, Result, Runtime,
Worker,
};
......
......@@ -15,8 +15,7 @@
use service_metrics::{MyStats, DEFAULT_NAMESPACE};
use std::sync::Arc;
use triton_distributed_runtime::{
use dynemo_runtime::{
logging,
pipeline::{
async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
......@@ -25,6 +24,7 @@ use triton_distributed_runtime::{
protocols::annotated::Annotated,
stream, DistributedRuntime, Result, Runtime, Worker,
};
use std::sync::Arc;
fn main() -> Result<()> {
logging::init();
......
......@@ -15,7 +15,7 @@
use serde::{Deserialize, Serialize};
pub const DEFAULT_NAMESPACE: &str = "triton-init";
pub const DEFAULT_NAMESPACE: &str = "dynemo";
#[derive(Serialize, Deserialize)]
// Dummy Stats object to demonstrate how to attach a custom stats handler
......
......@@ -1374,6 +1374,58 @@ dependencies = [
"reborrow",
]
[[package]]
name = "dynemo-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.1",
"bindgen 0.70.1",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"dynemo-runtime",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"llama-cpp-2",
"minijinja",
"minijinja-contrib",
"mistralrs",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum 0.27.1",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie 0.6.28",
"toktrie_hf_tokenizers 0.6.28",
"tracing",
"unicode-segmentation",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "dynemo-run"
version = "0.1.0"
......@@ -1384,6 +1436,8 @@ dependencies = [
"async-trait",
"clap",
"dialoguer",
"dynemo-llm",
"dynemo-runtime",
"futures",
"futures-util",
"libc",
......@@ -1395,8 +1449,50 @@ dependencies = [
"tokio-util",
"tracing",
"tracing-subscriber",
"triton-distributed-llm",
"triton-distributed-runtime",
]
[[package]]
name = "dynemo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix 0.29.0",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
......@@ -5594,102 +5690,6 @@ dependencies = [
"tracing-serde",
]
[[package]]
name = "triton-distributed-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.1",
"bindgen 0.70.1",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"llama-cpp-2",
"minijinja",
"minijinja-contrib",
"mistralrs",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum 0.27.1",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie 0.6.28",
"toktrie_hf_tokenizers 0.6.28",
"tracing",
"triton-distributed-runtime",
"unicode-segmentation",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "triton-distributed-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix 0.29.0",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "try-lock"
version = "0.2.5"
......
......@@ -22,13 +22,13 @@ homepage = "https://github.com/dynemo-ai/dynemo"
license = "Apache-2.0"
[features]
mistralrs = ["triton-distributed-llm/mistralrs"]
sglang = ["triton-distributed-llm/sglang", "dep:netlink-packet-route", "dep:rtnetlink"]
vllm = ["triton-distributed-llm/vllm", "dep:netlink-packet-route", "dep:rtnetlink"]
llamacpp = ["triton-distributed-llm/llamacpp"]
trtllm = ["triton-distributed-llm/trtllm"]
cuda = ["triton-distributed-llm/cuda"]
metal = ["triton-distributed-llm/metal"]
mistralrs = ["dynemo-llm/mistralrs"]
sglang = ["dynemo-llm/sglang", "dep:netlink-packet-route", "dep:rtnetlink"]
vllm = ["dynemo-llm/vllm", "dep:netlink-packet-route", "dep:rtnetlink"]
llamacpp = ["dynemo-llm/llamacpp"]
trtllm = ["dynemo-llm/trtllm"]
cuda = ["dynemo-llm/cuda"]
metal = ["dynemo-llm/metal"]
[dependencies]
anyhow = "1"
......@@ -48,5 +48,5 @@ tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["codec", "net"] }
tracing = { version = "0.1" }
tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time", "json"] }
triton-distributed-runtime = { path = "../../lib/runtime" }
triton-distributed-llm = { path = "../../lib/llm" }
dynemo-runtime = { path = "../../lib/runtime" }
dynemo-llm = { path = "../../lib/llm" }
......@@ -13,7 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use triton_distributed_llm::{
use dynemo_llm::{
backend::Backend,
http::service::discovery::ModelEntry,
model_type::ModelType,
......@@ -25,10 +25,10 @@ use triton_distributed_llm::{
Annotated,
},
};
use triton_distributed_runtime::pipeline::{
use dynemo_runtime::pipeline::{
network::Ingress, ManyOut, Operator, SegmentSource, ServiceBackend, SingleIn, Source,
};
use triton_distributed_runtime::{protocols::Endpoint, DistributedRuntime, Runtime};
use dynemo_runtime::{protocols::Endpoint, DistributedRuntime, Runtime};
use crate::EngineConfig;
......
......@@ -15,7 +15,7 @@
use std::sync::Arc;
use triton_distributed_llm::{
use dynemo_llm::{
backend::Backend,
http::service::{discovery, service_v2},
model_type::ModelType,
......@@ -27,7 +27,7 @@ use triton_distributed_llm::{
Annotated,
},
};
use triton_distributed_runtime::{
use dynemo_runtime::{
pipeline::{ManyOut, Operator, ServiceBackend, ServiceFrontend, SingleIn, Source},
DistributedRuntime, Runtime,
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment