Commit 1af7433b authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: rename triton_distributed to dynemo (#22)


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent ee4ef06b
...@@ -18,11 +18,11 @@ version = "0.2.1" ...@@ -18,11 +18,11 @@ version = "0.2.1"
edition = "2021" edition = "2021"
authors = ["NVIDIA"] authors = ["NVIDIA"]
license = "Apache-2.0" license = "Apache-2.0"
homepage = "https://github.com/triton-inference-server/triton_distributed" homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/triton-inference-server/triton_distributed" repository = "https://github.com/dynemo-ai/dynemo.git"
[package] [package]
name = "triton-distributed-llm" name = "dynemo-llm"
version.workspace = true version.workspace = true
edition.workspace = true edition.workspace = true
authors.workspace = true authors.workspace = true
...@@ -43,7 +43,7 @@ vulkan = ["llama-cpp-2/vulkan"] ...@@ -43,7 +43,7 @@ vulkan = ["llama-cpp-2/vulkan"]
[workspace.dependencies] [workspace.dependencies]
# local or crates.io # local or crates.io
triton-distributed-runtime = { version = "0.2.0", path = "../runtime" } dynemo-runtime = { version = "0.2.0", path = "../runtime" }
# crates.io # crates.io
anyhow = { version = "1" } anyhow = { version = "1" }
...@@ -66,7 +66,7 @@ strum = { version = "0.27", features = ["derive"] } ...@@ -66,7 +66,7 @@ strum = { version = "0.27", features = ["derive"] }
[dependencies] [dependencies]
# repo # repo
triton-distributed-runtime = { workspace = true } dynemo-runtime = { workspace = true }
# workspace # workspace
anyhow = { workspace = true } anyhow = { workspace = true }
......
...@@ -34,7 +34,7 @@ use futures::stream::{self, StreamExt}; ...@@ -34,7 +34,7 @@ use futures::stream::{self, StreamExt};
use tracing as log; use tracing as log;
use crate::model_card::model::{ModelDeploymentCard, TokenizerKind}; use crate::model_card::model::{ModelDeploymentCard, TokenizerKind};
use triton_distributed_runtime::{ use dynemo_runtime::{
pipeline::{ pipeline::{
async_trait, AsyncEngineContextProvider, ManyOut, Operator, ResponseStream, async_trait, AsyncEngineContextProvider, ManyOut, Operator, ResponseStream,
ServerStreamingEngine, SingleIn, ServerStreamingEngine, SingleIn,
......
...@@ -22,6 +22,11 @@ use std::{ ...@@ -22,6 +22,11 @@ use std::{
use anyhow::Context; use anyhow::Context;
use async_stream::stream; use async_stream::stream;
use async_trait::async_trait; use async_trait::async_trait;
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::error as pipeline_error;
use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated;
use dynemo_runtime::CancellationToken;
use llama_cpp_2::{ use llama_cpp_2::{
context::{params::LlamaContextParams, LlamaContext}, context::{params::LlamaContextParams, LlamaContext},
llama_backend::LlamaBackend, llama_backend::LlamaBackend,
...@@ -30,11 +35,6 @@ use llama_cpp_2::{ ...@@ -30,11 +35,6 @@ use llama_cpp_2::{
sampling::LlamaSampler, sampling::LlamaSampler,
token::LlamaToken, token::LlamaToken,
}; };
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::error as pipeline_error;
use triton_distributed_runtime::pipeline::{Error, ManyOut, SingleIn};
use triton_distributed_runtime::protocols::annotated::Annotated;
use triton_distributed_runtime::CancellationToken;
use crate::backend::ExecutionContext; use crate::backend::ExecutionContext;
use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput}; use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
......
...@@ -28,10 +28,10 @@ use mistralrs::{ ...@@ -28,10 +28,10 @@ use mistralrs::{
}; };
use tokio::sync::mpsc::channel; use tokio::sync::mpsc::channel;
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream}; use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::error as pipeline_error; use dynemo_runtime::pipeline::error as pipeline_error;
use triton_distributed_runtime::pipeline::{Error, ManyOut, SingleIn}; use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
use triton_distributed_runtime::protocols::annotated::Annotated; use dynemo_runtime::protocols::annotated::Annotated;
use crate::protocols::openai::chat_completions::{ use crate::protocols::openai::chat_completions::{
NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse, NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
......
...@@ -17,8 +17,8 @@ use std::path::Path; ...@@ -17,8 +17,8 @@ use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
use crate::backend::ExecutionContext; use crate::backend::ExecutionContext;
use triton_distributed_runtime::pipeline::error as pipeline_error; use dynemo_runtime::pipeline::error as pipeline_error;
use triton_distributed_runtime::CancellationToken; use dynemo_runtime::CancellationToken;
mod worker; mod worker;
......
...@@ -19,10 +19,10 @@ use async_stream::stream; ...@@ -19,10 +19,10 @@ use async_stream::stream;
use async_trait::async_trait; use async_trait::async_trait;
use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput}; use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream}; use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::{Error, ManyOut, SingleIn}; use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
use triton_distributed_runtime::protocols::annotated::Annotated; use dynemo_runtime::protocols::annotated::Annotated;
use triton_distributed_runtime::runtime::CancellationToken; use dynemo_runtime::runtime::CancellationToken;
use crate::engines::MultiNodeConfig; use crate::engines::MultiNodeConfig;
......
...@@ -37,8 +37,8 @@ use tokio::sync::mpsc::Sender; ...@@ -37,8 +37,8 @@ use tokio::sync::mpsc::Sender;
use tokio::{io::AsyncBufReadExt, sync::mpsc::error::SendError}; use tokio::{io::AsyncBufReadExt, sync::mpsc::error::SendError};
use tokio::{io::AsyncReadExt as _, task::JoinHandle}; use tokio::{io::AsyncReadExt as _, task::JoinHandle};
use triton_distributed_runtime::protocols::annotated::Annotated; use dynemo_runtime::protocols::annotated::Annotated;
use triton_distributed_runtime::runtime::CancellationToken; use dynemo_runtime::runtime::CancellationToken;
use crate::engines::sglang::MultiGPUConfig; use crate::engines::sglang::MultiGPUConfig;
use crate::engines::MultiNodeConfig; use crate::engines::MultiNodeConfig;
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
use std::sync::Arc; use std::sync::Arc;
use crate::backend::ExecutionContext; use crate::backend::ExecutionContext;
use triton_distributed_runtime::pipeline::error as pipeline_error; use dynemo_runtime::pipeline::error as pipeline_error;
pub mod executor; pub mod executor;
......
...@@ -15,12 +15,12 @@ ...@@ -15,12 +15,12 @@
use anyhow::{Error, Result}; use anyhow::{Error, Result};
use async_trait::async_trait; use async_trait::async_trait;
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::{ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated;
use futures::stream; use futures::stream;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::{ManyOut, SingleIn};
use triton_distributed_runtime::protocols::annotated::Annotated;
use super::Executor; use super::Executor;
use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput}; use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
......
...@@ -19,8 +19,8 @@ use std::pin::Pin; ...@@ -19,8 +19,8 @@ use std::pin::Pin;
use std::sync::Arc; use std::sync::Arc;
use std::task::{Context, Poll}; use std::task::{Context, Poll};
use triton_distributed_runtime::pipeline::error as pipeline_error; use dynemo_runtime::pipeline::error as pipeline_error;
use triton_distributed_runtime::CancellationToken; use dynemo_runtime::CancellationToken;
use crate::backend::ExecutionContext; use crate::backend::ExecutionContext;
use crate::engines::MultiNodeConfig; use crate::engines::MultiNodeConfig;
......
...@@ -21,10 +21,10 @@ use async_trait::async_trait; ...@@ -21,10 +21,10 @@ use async_trait::async_trait;
use crate::engines::vllm::worker; use crate::engines::vllm::worker;
use crate::engines::MultiNodeConfig; use crate::engines::MultiNodeConfig;
use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput}; use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream}; use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::{Error, ManyOut, SingleIn}; use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
use triton_distributed_runtime::protocols::annotated::Annotated; use dynemo_runtime::protocols::annotated::Annotated;
use triton_distributed_runtime::runtime::CancellationToken; use dynemo_runtime::runtime::CancellationToken;
pub struct VllmEngine { pub struct VllmEngine {
cancel_token: CancellationToken, cancel_token: CancellationToken,
......
...@@ -24,7 +24,7 @@ use tokio::select; ...@@ -24,7 +24,7 @@ use tokio::select;
use tokio::time; use tokio::time;
use tracing; use tracing;
use triton_distributed_runtime::CancellationToken; use dynemo_runtime::CancellationToken;
/// Default is 16 seconds, we make it a bit shorter /// Default is 16 seconds, we make it a bit shorter
const RAY_STOP_TIMEOUT_SECS: u32 = 10; const RAY_STOP_TIMEOUT_SECS: u32 = 10;
......
...@@ -19,6 +19,8 @@ use std::{ ...@@ -19,6 +19,8 @@ use std::{
}; };
use async_zmq::{SinkExt, StreamExt}; use async_zmq::{SinkExt, StreamExt};
use dynemo_runtime::protocols::annotated::Annotated;
use dynemo_runtime::CancellationToken;
use pyo3::{ use pyo3::{
prelude::*, prelude::*,
types::{IntoPyDict, PyBytes, PyString}, types::{IntoPyDict, PyBytes, PyString},
...@@ -26,8 +28,6 @@ use pyo3::{ ...@@ -26,8 +28,6 @@ use pyo3::{
use tokio::sync::mpsc::Sender; use tokio::sync::mpsc::Sender;
use tokio::task::JoinHandle; use tokio::task::JoinHandle;
use tokio::{io::AsyncBufReadExt, sync::mpsc::error::SendError}; use tokio::{io::AsyncBufReadExt, sync::mpsc::error::SendError};
use triton_distributed_runtime::protocols::annotated::Annotated;
use triton_distributed_runtime::CancellationToken;
use crate::engines::MultiNodeConfig; use crate::engines::MultiNodeConfig;
use crate::protocols::common::llm_backend::LLMEngineOutput; use crate::protocols::common::llm_backend::LLMEngineOutput;
......
...@@ -18,7 +18,7 @@ use std::sync::Arc; ...@@ -18,7 +18,7 @@ use std::sync::Arc;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use tokio::sync::mpsc::Receiver; use tokio::sync::mpsc::Receiver;
use triton_distributed_runtime::{ use dynemo_runtime::{
protocols::{self, annotated::Annotated}, protocols::{self, annotated::Annotated},
raise, raise,
transports::etcd::{KeyValue, WatchEvent}, transports::etcd::{KeyValue, WatchEvent},
......
...@@ -48,7 +48,7 @@ use crate::types::{ ...@@ -48,7 +48,7 @@ use crate::types::{
Annotated, Annotated,
}; };
use triton_distributed_runtime::pipeline::{AsyncEngineContext, Context}; use dynemo_runtime::pipeline::{AsyncEngineContext, Context};
#[derive(Serialize, Deserialize)] #[derive(Serialize, Deserialize)]
pub(crate) struct ErrorResponse { pub(crate) struct ErrorResponse {
...@@ -91,7 +91,7 @@ impl ErrorResponse { ...@@ -91,7 +91,7 @@ impl ErrorResponse {
) )
} }
/// The OAI endpoints call an [`triton_distributed_runtime::engine::AsyncEngine`] which are specialized to return /// The OAI endpoints call an [`dynemo_runtime::engine::AsyncEngine`] which are specialized to return
/// an [`anyhow::Error`]. This method will convert the [`anyhow::Error`] into an [`HttpError`]. /// an [`anyhow::Error`]. This method will convert the [`anyhow::Error`] into an [`HttpError`].
/// If successful, it will return the [`HttpError`] as an [`ErrorResponse::internal_server_error`] /// If successful, it will return the [`HttpError`] as an [`ErrorResponse::internal_server_error`]
/// with the details of the error. /// with the details of the error.
...@@ -516,7 +516,7 @@ pub fn list_models_router( ...@@ -516,7 +516,7 @@ pub fn list_models_router(
path: Option<String>, path: Option<String>,
) -> (Vec<RouteDoc>, Router) { ) -> (Vec<RouteDoc>, Router) {
// TODO: Why do we have this endpoint? // TODO: Why do we have this endpoint?
let custom_path = path.unwrap_or("/triton/alpha/list-models".to_string()); let custom_path = path.unwrap_or("/dynemo/alpha/list-models".to_string());
let doc_for_custom = RouteDoc::new(axum::http::Method::GET, &custom_path); let doc_for_custom = RouteDoc::new(axum::http::Method::GET, &custom_path);
// Standard OpenAI compatible list models endpoint // Standard OpenAI compatible list models endpoint
......
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
// limitations under the License. // limitations under the License.
use anyhow::Result; use anyhow::Result;
use dynemo_runtime::{component::Component, DistributedRuntime};
use futures::stream::StreamExt; use futures::stream::StreamExt;
use std::{sync::Arc, time::Duration}; use std::{sync::Arc, time::Duration};
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing; use tracing;
use triton_distributed_runtime::{component::Component, DistributedRuntime};
pub mod indexer; pub mod indexer;
pub mod protocols; pub mod protocols;
...@@ -62,7 +62,7 @@ impl KvRouter { ...@@ -62,7 +62,7 @@ impl KvRouter {
} }
pub async fn new( pub async fn new(
nats_client: triton_distributed_runtime::transports::nats::Client, nats_client: dynemo_runtime::transports::nats::Client,
service_name: String, service_name: String,
kv_subject: String, kv_subject: String,
) -> Result<Arc<Self>> { ) -> Result<Arc<Self>> {
...@@ -135,7 +135,7 @@ impl KvRouter { ...@@ -135,7 +135,7 @@ impl KvRouter {
} }
async fn collect_endpoints( async fn collect_endpoints(
nats_client: triton_distributed_runtime::transports::nats::Client, nats_client: dynemo_runtime::transports::nats::Client,
service_name: String, service_name: String,
ep_tx: tokio::sync::mpsc::Sender<ProcessedEndpoints>, ep_tx: tokio::sync::mpsc::Sender<ProcessedEndpoints>,
cancel: CancellationToken, cancel: CancellationToken,
......
...@@ -15,11 +15,7 @@ ...@@ -15,11 +15,7 @@
use crate::kv_router::{indexer::RouterEvent, protocols::*, KV_EVENT_SUBJECT}; use crate::kv_router::{indexer::RouterEvent, protocols::*, KV_EVENT_SUBJECT};
use async_trait::async_trait; use async_trait::async_trait;
use futures::stream; use dynemo_runtime::{
use std::sync::Arc;
use tokio::sync::mpsc;
use tracing as log;
use triton_distributed_runtime::{
component::Component, component::Component,
pipeline::{ pipeline::{
network::Ingress, AsyncEngine, AsyncEngineContextProvider, ManyOut, ResponseStream, network::Ingress, AsyncEngine, AsyncEngineContextProvider, ManyOut, ResponseStream,
...@@ -28,6 +24,10 @@ use triton_distributed_runtime::{ ...@@ -28,6 +24,10 @@ use triton_distributed_runtime::{
protocols::annotated::Annotated, protocols::annotated::Annotated,
DistributedRuntime, Error, Result, DistributedRuntime, Error, Result,
}; };
use futures::stream;
use std::sync::Arc;
use tokio::sync::mpsc;
use tracing as log;
pub struct KvEventPublisher { pub struct KvEventPublisher {
tx: mpsc::UnboundedSender<KvCacheEvent>, tx: mpsc::UnboundedSender<KvCacheEvent>,
......
...@@ -19,12 +19,12 @@ pub use crate::kv_router::protocols::ForwardPassMetrics; ...@@ -19,12 +19,12 @@ pub use crate::kv_router::protocols::ForwardPassMetrics;
use anyhow::Result; use anyhow::Result;
use derive_builder::Builder; use derive_builder::Builder;
use triton_distributed_runtime::pipeline::network::{ use dynemo_runtime::pipeline::network::{
ingress::push_endpoint::PushEndpoint, ingress::push_endpoint::PushEndpoint,
PushWorkHandler, PushWorkHandler,
}; };
use triton_distributed_runtime::transports::nats::{self, ServiceExt}; use dynemo_runtime::transports::nats::{self, ServiceExt};
use tokio::sync::watch; use tokio::sync::watch;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
......
...@@ -13,9 +13,9 @@ ...@@ -13,9 +13,9 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
//! # Triton LLM //! # Dynemo LLM
//! //!
//! The `triton-llm` crate is a Rust library that provides a set of traits and types for building //! The `dynemo-llm` crate is a Rust library that provides a set of traits and types for building
//! distributed LLM inference solutions. //! distributed LLM inference solutions.
pub mod backend; pub mod backend;
......
...@@ -37,7 +37,7 @@ use std::time::Duration; ...@@ -37,7 +37,7 @@ use std::time::Duration;
use derive_builder::Builder; use derive_builder::Builder;
use triton_distributed_runtime::slug::Slug; use dynemo_runtime::slug::Slug;
pub const BUCKET_NAME: &str = "mdc"; pub const BUCKET_NAME: &str = "mdc";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment