Commit 1af7433b authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: rename triton_distributed to dynemo (#22)


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent ee4ef06b
...@@ -33,11 +33,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat ...@@ -33,11 +33,7 @@ from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.remote_prefill import RemotePrefillParams, RemotePrefillRequest from vllm.remote_prefill import RemotePrefillParams, RemotePrefillRequest
from triton_distributed.runtime import ( from dynemo.runtime import DistributedRuntime, dynemo_endpoint, dynemo_worker
DistributedRuntime,
triton_endpoint,
triton_worker,
)
class RequestHandler: class RequestHandler:
...@@ -87,7 +83,7 @@ class RequestHandler: ...@@ -87,7 +83,7 @@ class RequestHandler:
return callback return callback
@triton_endpoint(ChatCompletionRequest, ChatCompletionStreamResponse) @dynemo_endpoint(ChatCompletionRequest, ChatCompletionStreamResponse)
async def generate(self, request): async def generate(self, request):
if not self.initialized: if not self.initialized:
await self.init() await self.init()
...@@ -113,7 +109,7 @@ class RequestHandler: ...@@ -113,7 +109,7 @@ class RequestHandler:
yield response yield response
@triton_worker() @dynemo_worker()
async def worker(runtime: DistributedRuntime, engine_args: AsyncEngineArgs): async def worker(runtime: DistributedRuntime, engine_args: AsyncEngineArgs):
component = runtime.namespace("test-nixl").component("vllm") component = runtime.namespace("test-nixl").component("vllm")
await component.create_service() await component.create_service()
......
...@@ -955,6 +955,99 @@ dependencies = [ ...@@ -955,6 +955,99 @@ dependencies = [
"syn 2.0.98", "syn 2.0.98",
] ]
[[package]]
name = "dynemo-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"axum 0.8.1",
"bindgen",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"dynemo-runtime",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"minijinja",
"minijinja-contrib",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie",
"toktrie_hf_tokenizers",
"tracing",
"unicode-segmentation",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "dynemo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"validator",
"xxhash-rust",
]
[[package]] [[package]]
name = "ed25519" name = "ed25519"
version = "2.2.3" version = "2.2.3"
...@@ -1370,7 +1463,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" ...@@ -1370,7 +1463,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
name = "hello_world" name = "hello_world"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"triton-distributed-runtime", "dynemo-runtime",
] ]
[[package]] [[package]]
...@@ -1395,11 +1488,11 @@ name = "http" ...@@ -1395,11 +1488,11 @@ name = "http"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"clap", "clap",
"dynemo-llm",
"dynemo-runtime",
"serde", "serde",
"serde_json", "serde_json",
"tokio", "tokio",
"triton-distributed-llm",
"triton-distributed-runtime",
] ]
[[package]] [[package]]
...@@ -1895,13 +1988,13 @@ name = "llmctl" ...@@ -1895,13 +1988,13 @@ name = "llmctl"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"clap", "clap",
"dynemo-llm",
"dynemo-runtime",
"serde", "serde",
"serde_json", "serde_json",
"tabled", "tabled",
"tokio", "tokio",
"tracing", "tracing",
"triton-distributed-llm",
"triton-distributed-runtime",
] ]
[[package]] [[package]]
...@@ -3297,11 +3390,11 @@ dependencies = [ ...@@ -3297,11 +3390,11 @@ dependencies = [
name = "service_metrics" name = "service_metrics"
version = "0.2.0" version = "0.2.0"
dependencies = [ dependencies = [
"dynemo-runtime",
"futures", "futures",
"serde", "serde",
"serde_json", "serde_json",
"tokio", "tokio",
"triton-distributed-runtime",
] ]
[[package]] [[package]]
...@@ -4000,99 +4093,6 @@ dependencies = [ ...@@ -4000,99 +4093,6 @@ dependencies = [
"tracing-serde", "tracing-serde",
] ]
[[package]]
name = "triton-distributed-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"axum 0.8.1",
"bindgen",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"minijinja",
"minijinja-contrib",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie",
"toktrie_hf_tokenizers",
"tracing",
"triton-distributed-runtime",
"unicode-segmentation",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "triton-distributed-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"validator",
"xxhash-rust",
]
[[package]] [[package]]
name = "try-lock" name = "try-lock"
version = "0.2.5" version = "0.2.5"
......
...@@ -27,14 +27,14 @@ version = "0.2.0" ...@@ -27,14 +27,14 @@ version = "0.2.0"
edition = "2021" edition = "2021"
authors = ["NVIDIA"] authors = ["NVIDIA"]
license = "Apache-2.0" license = "Apache-2.0"
homepage = "https://github.com/triton-inference-server/triton_distributed" homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/triton-inference-server/triton_distributed" repository = "https://github.com/dynemo-ai/dynemo.git"
[workspace.dependencies] [workspace.dependencies]
# local or crates.io # local or crates.io
triton-distributed-runtime = { path = "../../lib/runtime" } dynemo-runtime = { path = "../../lib/runtime" }
triton-distributed-llm = { path = "../../lib/llm" } dynemo-llm = { path = "../../lib/llm" }
# crates.io # crates.io
anyhow = { version = "1" } anyhow = { version = "1" }
......
...@@ -22,6 +22,6 @@ license.workspace = true ...@@ -22,6 +22,6 @@ license.workspace = true
homepage.workspace = true homepage.workspace = true
[dependencies] [dependencies]
triton-distributed-runtime = { workspace = true } dynemo-runtime = { workspace = true }
# third-party # third-party
...@@ -13,11 +13,11 @@ ...@@ -13,11 +13,11 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use hello_world::DEFAULT_NAMESPACE; use dynemo_runtime::{
use triton_distributed_runtime::{
logging, protocols::annotated::Annotated, stream::StreamExt, DistributedRuntime, Result, logging, protocols::annotated::Annotated, stream::StreamExt, DistributedRuntime, Result,
Runtime, Worker, Runtime, Worker,
}; };
use hello_world::DEFAULT_NAMESPACE;
fn main() -> Result<()> { fn main() -> Result<()> {
logging::init(); logging::init();
......
...@@ -13,9 +13,7 @@ ...@@ -13,9 +13,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use hello_world::DEFAULT_NAMESPACE; use dynemo_runtime::{
use std::sync::Arc;
use triton_distributed_runtime::{
logging, logging,
pipeline::{ pipeline::{
async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut, async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
...@@ -24,6 +22,8 @@ use triton_distributed_runtime::{ ...@@ -24,6 +22,8 @@ use triton_distributed_runtime::{
protocols::annotated::Annotated, protocols::annotated::Annotated,
stream, DistributedRuntime, Result, Runtime, Worker, stream, DistributedRuntime, Result, Runtime, Worker,
}; };
use hello_world::DEFAULT_NAMESPACE;
use std::sync::Arc;
fn main() -> Result<()> { fn main() -> Result<()> {
logging::init(); logging::init();
......
...@@ -13,4 +13,4 @@ ...@@ -13,4 +13,4 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
pub const DEFAULT_NAMESPACE: &str = "triton-init"; pub const DEFAULT_NAMESPACE: &str = "dynemo";
...@@ -24,8 +24,8 @@ homepage.workspace = true ...@@ -24,8 +24,8 @@ homepage.workspace = true
repository.workspace = true repository.workspace = true
[dependencies] [dependencies]
triton-distributed-runtime = { workspace = true} dynemo-runtime = { workspace = true}
triton-distributed-llm = { workspace = true} dynemo-llm = { workspace = true}
clap = { version = "4.5", features = ["derive"] } clap = { version = "4.5", features = ["derive"] }
serde = { workspace = true } serde = { workspace = true }
......
...@@ -16,14 +16,14 @@ ...@@ -16,14 +16,14 @@
use clap::Parser; use clap::Parser;
use std::sync::Arc; use std::sync::Arc;
use triton_distributed_llm::{ use dynemo_llm::{
http::service::{ http::service::{
discovery::{model_watcher, ModelWatchState}, discovery::{model_watcher, ModelWatchState},
service_v2::HttpService, service_v2::HttpService,
}, },
model_type::ModelType, model_type::ModelType,
}; };
use triton_distributed_runtime::{ use dynemo_runtime::{
logging, transports::etcd::PrefixWatcher, DistributedRuntime, Result, Runtime, Worker, logging, transports::etcd::PrefixWatcher, DistributedRuntime, Result, Runtime, Worker,
}; };
......
...@@ -23,8 +23,8 @@ homepage.workspace = true ...@@ -23,8 +23,8 @@ homepage.workspace = true
repository.workspace = true repository.workspace = true
[dependencies] [dependencies]
triton-distributed-runtime = { workspace = true} dynemo-runtime = { workspace = true}
triton-distributed-llm = { workspace = true} dynemo-llm = { workspace = true}
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
......
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use tracing as log; use tracing as log;
use triton_distributed_llm::{http::service::discovery::ModelEntry, model_type::ModelType}; use dynemo_llm::{http::service::discovery::ModelEntry, model_type::ModelType};
use triton_distributed_runtime::{ use dynemo_runtime::{
distributed::DistributedConfig, logging, protocols::Endpoint, raise, DistributedRuntime, distributed::DistributedConfig, logging, protocols::Endpoint, raise, DistributedRuntime,
Result, Runtime, Worker, Result, Runtime, Worker,
}; };
......
...@@ -23,10 +23,10 @@ homepage.workspace = true ...@@ -23,10 +23,10 @@ homepage.workspace = true
repository.workspace = true repository.workspace = true
[dependencies] [dependencies]
triton-distributed-runtime = { workspace = true } dynemo-runtime = { workspace = true }
# third-party # third-party
futures = { workspace = true } futures = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
\ No newline at end of file
...@@ -4,14 +4,14 @@ This example extends the hello_world example by calling the `scrape_service` met ...@@ -4,14 +4,14 @@ This example extends the hello_world example by calling the `scrape_service` met
with the service name for the request response the client just issued a request. with the service name for the request response the client just issued a request.
```bash ```bash
TRD_LOG=debug cargo run --bin server DYN_LOG=debug cargo run --bin server
``` ```
The client can now observe some basic statistics about each instance of the service The client can now observe some basic statistics about each instance of the service
begin hosted. begin hosted.
```bash ```bash
TRD_LOG=info cargo run --bin client DYN_LOG=info cargo run --bin client
``` ```
## Example Output ## Example Output
...@@ -27,7 +27,7 @@ Annotated { data: Some("o"), id: None, event: None, comment: None } ...@@ -27,7 +27,7 @@ Annotated { data: Some("o"), id: None, event: None, comment: None }
Annotated { data: Some("r"), id: None, event: None, comment: None } Annotated { data: Some("r"), id: None, event: None, comment: None }
Annotated { data: Some("l"), id: None, event: None, comment: None } Annotated { data: Some("l"), id: None, event: None, comment: None }
Annotated { data: Some("d"), id: None, event: None, comment: None } Annotated { data: Some("d"), id: None, event: None, comment: None }
ServiceSet { services: [ServiceInfo { name: "triton_init_backend_720278f8", id: "eOHMc4ndRw8s5flv4WOZx7", version: "0.0.1", started: "2025-02-26T18:54:04.917294605Z", endpoints: [EndpointInfo { name: "triton_init_backend_720278f8-generate-694d951a80e06abf", subject: "triton_init_backend_720278f8.generate-694d951a80e06abf", data: Some(Metrics(Object {"average_processing_time": Number(53662), "data": Object {"val": Number(10)}, "last_error": String(""), "num_errors": Number(0), "num_requests": Number(2), "processing_time": Number(107325), "queue_group": String("q")})) }] }] } ServiceSet { services: [ServiceInfo { name: "dynemo_init_backend_720278f8", id: "eOHMc4ndRw8s5flv4WOZx7", version: "0.0.1", started: "2025-02-26T18:54:04.917294605Z", endpoints: [EndpointInfo { name: "dynemo_init_backend_720278f8-generate-694d951a80e06abf", subject: "dynemo_init_backend_720278f8.generate-694d951a80e06abf", data: Some(Metrics(Object {"average_processing_time": Number(53662), "data": Object {"val": Number(10)}, "last_error": String(""), "num_errors": Number(0), "num_requests": Number(2), "processing_time": Number(107325), "queue_group": String("q")})) }] }] }
``` ```
Note the following stats in the output demonstrate the custom Note the following stats in the output demonstrate the custom
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
use futures::StreamExt; use futures::StreamExt;
use service_metrics::DEFAULT_NAMESPACE; use service_metrics::DEFAULT_NAMESPACE;
use triton_distributed_runtime::{ use dynemo_runtime::{
logging, protocols::annotated::Annotated, utils::Duration, DistributedRuntime, Result, Runtime, logging, protocols::annotated::Annotated, utils::Duration, DistributedRuntime, Result, Runtime,
Worker, Worker,
}; };
......
...@@ -15,8 +15,7 @@ ...@@ -15,8 +15,7 @@
use service_metrics::{MyStats, DEFAULT_NAMESPACE}; use service_metrics::{MyStats, DEFAULT_NAMESPACE};
use std::sync::Arc; use dynemo_runtime::{
use triton_distributed_runtime::{
logging, logging,
pipeline::{ pipeline::{
async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut, async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
...@@ -25,6 +24,7 @@ use triton_distributed_runtime::{ ...@@ -25,6 +24,7 @@ use triton_distributed_runtime::{
protocols::annotated::Annotated, protocols::annotated::Annotated,
stream, DistributedRuntime, Result, Runtime, Worker, stream, DistributedRuntime, Result, Runtime, Worker,
}; };
use std::sync::Arc;
fn main() -> Result<()> { fn main() -> Result<()> {
logging::init(); logging::init();
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
pub const DEFAULT_NAMESPACE: &str = "triton-init"; pub const DEFAULT_NAMESPACE: &str = "dynemo";
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
// Dummy Stats object to demonstrate how to attach a custom stats handler // Dummy Stats object to demonstrate how to attach a custom stats handler
......
...@@ -1374,6 +1374,58 @@ dependencies = [ ...@@ -1374,6 +1374,58 @@ dependencies = [
"reborrow", "reborrow",
] ]
[[package]]
name = "dynemo-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.1",
"bindgen 0.70.1",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"dynemo-runtime",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"llama-cpp-2",
"minijinja",
"minijinja-contrib",
"mistralrs",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum 0.27.1",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie 0.6.28",
"toktrie_hf_tokenizers 0.6.28",
"tracing",
"unicode-segmentation",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]] [[package]]
name = "dynemo-run" name = "dynemo-run"
version = "0.1.0" version = "0.1.0"
...@@ -1384,6 +1436,8 @@ dependencies = [ ...@@ -1384,6 +1436,8 @@ dependencies = [
"async-trait", "async-trait",
"clap", "clap",
"dialoguer", "dialoguer",
"dynemo-llm",
"dynemo-runtime",
"futures", "futures",
"futures-util", "futures-util",
"libc", "libc",
...@@ -1395,8 +1449,50 @@ dependencies = [ ...@@ -1395,8 +1449,50 @@ dependencies = [
"tokio-util", "tokio-util",
"tracing", "tracing",
"tracing-subscriber", "tracing-subscriber",
"triton-distributed-llm", ]
"triton-distributed-runtime",
[[package]]
name = "dynemo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix 0.29.0",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.14.0",
"validator",
"xxhash-rust",
] ]
[[package]] [[package]]
...@@ -5594,102 +5690,6 @@ dependencies = [ ...@@ -5594,102 +5690,6 @@ dependencies = [
"tracing-serde", "tracing-serde",
] ]
[[package]]
name = "triton-distributed-llm"
version = "0.2.1"
dependencies = [
"anyhow",
"async-openai",
"async-stream",
"async-trait",
"async_zmq",
"axum 0.8.1",
"bindgen 0.70.1",
"blake3",
"bs62",
"bytes",
"chrono",
"cmake",
"derive_builder",
"either",
"erased-serde",
"futures",
"galil-seiferas",
"indexmap 2.7.1",
"itertools 0.14.0",
"libc",
"llama-cpp-2",
"minijinja",
"minijinja-contrib",
"mistralrs",
"prometheus",
"pyo3",
"regex",
"semver",
"serde",
"serde-pickle",
"serde_json",
"serde_repr",
"strum 0.27.1",
"thiserror 2.0.11",
"tokenizers",
"tokio",
"tokio-stream",
"tokio-util",
"toktrie 0.6.28",
"toktrie_hf_tokenizers 0.6.28",
"tracing",
"triton-distributed-runtime",
"unicode-segmentation",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]]
name = "triton-distributed-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix 0.29.0",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"serde",
"serde_json",
"socket2",
"thiserror 1.0.69",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid 1.14.0",
"validator",
"xxhash-rust",
]
[[package]] [[package]]
name = "try-lock" name = "try-lock"
version = "0.2.5" version = "0.2.5"
......
...@@ -22,13 +22,13 @@ homepage = "https://github.com/dynemo-ai/dynemo" ...@@ -22,13 +22,13 @@ homepage = "https://github.com/dynemo-ai/dynemo"
license = "Apache-2.0" license = "Apache-2.0"
[features] [features]
mistralrs = ["triton-distributed-llm/mistralrs"] mistralrs = ["dynemo-llm/mistralrs"]
sglang = ["triton-distributed-llm/sglang", "dep:netlink-packet-route", "dep:rtnetlink"] sglang = ["dynemo-llm/sglang", "dep:netlink-packet-route", "dep:rtnetlink"]
vllm = ["triton-distributed-llm/vllm", "dep:netlink-packet-route", "dep:rtnetlink"] vllm = ["dynemo-llm/vllm", "dep:netlink-packet-route", "dep:rtnetlink"]
llamacpp = ["triton-distributed-llm/llamacpp"] llamacpp = ["dynemo-llm/llamacpp"]
trtllm = ["triton-distributed-llm/trtllm"] trtllm = ["dynemo-llm/trtllm"]
cuda = ["triton-distributed-llm/cuda"] cuda = ["dynemo-llm/cuda"]
metal = ["triton-distributed-llm/metal"] metal = ["dynemo-llm/metal"]
[dependencies] [dependencies]
anyhow = "1" anyhow = "1"
...@@ -48,5 +48,5 @@ tokio = { version = "1", features = ["full"] } ...@@ -48,5 +48,5 @@ tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["codec", "net"] } tokio-util = { version = "0.7", features = ["codec", "net"] }
tracing = { version = "0.1" } tracing = { version = "0.1" }
tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time", "json"] } tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time", "json"] }
triton-distributed-runtime = { path = "../../lib/runtime" } dynemo-runtime = { path = "../../lib/runtime" }
triton-distributed-llm = { path = "../../lib/llm" } dynemo-llm = { path = "../../lib/llm" }
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
use triton_distributed_llm::{ use dynemo_llm::{
backend::Backend, backend::Backend,
http::service::discovery::ModelEntry, http::service::discovery::ModelEntry,
model_type::ModelType, model_type::ModelType,
...@@ -25,10 +25,10 @@ use triton_distributed_llm::{ ...@@ -25,10 +25,10 @@ use triton_distributed_llm::{
Annotated, Annotated,
}, },
}; };
use triton_distributed_runtime::pipeline::{ use dynemo_runtime::pipeline::{
network::Ingress, ManyOut, Operator, SegmentSource, ServiceBackend, SingleIn, Source, network::Ingress, ManyOut, Operator, SegmentSource, ServiceBackend, SingleIn, Source,
}; };
use triton_distributed_runtime::{protocols::Endpoint, DistributedRuntime, Runtime}; use dynemo_runtime::{protocols::Endpoint, DistributedRuntime, Runtime};
use crate::EngineConfig; use crate::EngineConfig;
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
use std::sync::Arc; use std::sync::Arc;
use triton_distributed_llm::{ use dynemo_llm::{
backend::Backend, backend::Backend,
http::service::{discovery, service_v2}, http::service::{discovery, service_v2},
model_type::ModelType, model_type::ModelType,
...@@ -27,7 +27,7 @@ use triton_distributed_llm::{ ...@@ -27,7 +27,7 @@ use triton_distributed_llm::{
Annotated, Annotated,
}, },
}; };
use triton_distributed_runtime::{ use dynemo_runtime::{
pipeline::{ManyOut, Operator, ServiceBackend, ServiceFrontend, SingleIn, Source}, pipeline::{ManyOut, Operator, ServiceBackend, ServiceFrontend, SingleIn, Source},
DistributedRuntime, Runtime, DistributedRuntime, Runtime,
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment