Commit 1af7433b authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: rename triton_distributed to dynemo (#22)


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent ee4ef06b
......@@ -18,11 +18,11 @@ version = "0.2.1"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/triton-inference-server/triton_distributed"
repository = "https://github.com/triton-inference-server/triton_distributed"
homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/dynemo-ai/dynemo.git"
[package]
name = "triton-distributed-llm"
name = "dynemo-llm"
version.workspace = true
edition.workspace = true
authors.workspace = true
......@@ -43,7 +43,7 @@ vulkan = ["llama-cpp-2/vulkan"]
[workspace.dependencies]
# local or crates.io
triton-distributed-runtime = { version = "0.2.0", path = "../runtime" }
dynemo-runtime = { version = "0.2.0", path = "../runtime" }
# crates.io
anyhow = { version = "1" }
......@@ -66,7 +66,7 @@ strum = { version = "0.27", features = ["derive"] }
[dependencies]
# repo
triton-distributed-runtime = { workspace = true }
dynemo-runtime = { workspace = true }
# workspace
anyhow = { workspace = true }
......
......@@ -34,7 +34,7 @@ use futures::stream::{self, StreamExt};
use tracing as log;
use crate::model_card::model::{ModelDeploymentCard, TokenizerKind};
use triton_distributed_runtime::{
use dynemo_runtime::{
pipeline::{
async_trait, AsyncEngineContextProvider, ManyOut, Operator, ResponseStream,
ServerStreamingEngine, SingleIn,
......
......@@ -22,6 +22,11 @@ use std::{
use anyhow::Context;
use async_stream::stream;
use async_trait::async_trait;
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::error as pipeline_error;
use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated;
use dynemo_runtime::CancellationToken;
use llama_cpp_2::{
context::{params::LlamaContextParams, LlamaContext},
llama_backend::LlamaBackend,
......@@ -30,11 +35,6 @@ use llama_cpp_2::{
sampling::LlamaSampler,
token::LlamaToken,
};
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::error as pipeline_error;
use triton_distributed_runtime::pipeline::{Error, ManyOut, SingleIn};
use triton_distributed_runtime::protocols::annotated::Annotated;
use triton_distributed_runtime::CancellationToken;
use crate::backend::ExecutionContext;
use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
......
......@@ -28,10 +28,10 @@ use mistralrs::{
};
use tokio::sync::mpsc::channel;
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::error as pipeline_error;
use triton_distributed_runtime::pipeline::{Error, ManyOut, SingleIn};
use triton_distributed_runtime::protocols::annotated::Annotated;
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::error as pipeline_error;
use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated;
use crate::protocols::openai::chat_completions::{
NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
......
......@@ -17,8 +17,8 @@ use std::path::Path;
use std::sync::Arc;
use crate::backend::ExecutionContext;
use triton_distributed_runtime::pipeline::error as pipeline_error;
use triton_distributed_runtime::CancellationToken;
use dynemo_runtime::pipeline::error as pipeline_error;
use dynemo_runtime::CancellationToken;
mod worker;
......
......@@ -19,10 +19,10 @@ use async_stream::stream;
use async_trait::async_trait;
use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::{Error, ManyOut, SingleIn};
use triton_distributed_runtime::protocols::annotated::Annotated;
use triton_distributed_runtime::runtime::CancellationToken;
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated;
use dynemo_runtime::runtime::CancellationToken;
use crate::engines::MultiNodeConfig;
......
......@@ -37,8 +37,8 @@ use tokio::sync::mpsc::Sender;
use tokio::{io::AsyncBufReadExt, sync::mpsc::error::SendError};
use tokio::{io::AsyncReadExt as _, task::JoinHandle};
use triton_distributed_runtime::protocols::annotated::Annotated;
use triton_distributed_runtime::runtime::CancellationToken;
use dynemo_runtime::protocols::annotated::Annotated;
use dynemo_runtime::runtime::CancellationToken;
use crate::engines::sglang::MultiGPUConfig;
use crate::engines::MultiNodeConfig;
......
......@@ -16,7 +16,7 @@
use std::sync::Arc;
use crate::backend::ExecutionContext;
use triton_distributed_runtime::pipeline::error as pipeline_error;
use dynemo_runtime::pipeline::error as pipeline_error;
pub mod executor;
......
......@@ -15,12 +15,12 @@
use anyhow::{Error, Result};
use async_trait::async_trait;
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::{ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated;
use futures::stream;
use tokio::sync::mpsc;
use tokio_util::sync::CancellationToken;
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::{ManyOut, SingleIn};
use triton_distributed_runtime::protocols::annotated::Annotated;
use super::Executor;
use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
......
......@@ -19,8 +19,8 @@ use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use triton_distributed_runtime::pipeline::error as pipeline_error;
use triton_distributed_runtime::CancellationToken;
use dynemo_runtime::pipeline::error as pipeline_error;
use dynemo_runtime::CancellationToken;
use crate::backend::ExecutionContext;
use crate::engines::MultiNodeConfig;
......
......@@ -21,10 +21,10 @@ use async_trait::async_trait;
use crate::engines::vllm::worker;
use crate::engines::MultiNodeConfig;
use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
use triton_distributed_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use triton_distributed_runtime::pipeline::{Error, ManyOut, SingleIn};
use triton_distributed_runtime::protocols::annotated::Annotated;
use triton_distributed_runtime::runtime::CancellationToken;
use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
use dynemo_runtime::protocols::annotated::Annotated;
use dynemo_runtime::runtime::CancellationToken;
pub struct VllmEngine {
cancel_token: CancellationToken,
......
......@@ -24,7 +24,7 @@ use tokio::select;
use tokio::time;
use tracing;
use triton_distributed_runtime::CancellationToken;
use dynemo_runtime::CancellationToken;
/// Default is 16 seconds, we make it a bit shorter
const RAY_STOP_TIMEOUT_SECS: u32 = 10;
......
......@@ -19,6 +19,8 @@ use std::{
};
use async_zmq::{SinkExt, StreamExt};
use dynemo_runtime::protocols::annotated::Annotated;
use dynemo_runtime::CancellationToken;
use pyo3::{
prelude::*,
types::{IntoPyDict, PyBytes, PyString},
......@@ -26,8 +28,6 @@ use pyo3::{
use tokio::sync::mpsc::Sender;
use tokio::task::JoinHandle;
use tokio::{io::AsyncBufReadExt, sync::mpsc::error::SendError};
use triton_distributed_runtime::protocols::annotated::Annotated;
use triton_distributed_runtime::CancellationToken;
use crate::engines::MultiNodeConfig;
use crate::protocols::common::llm_backend::LLMEngineOutput;
......
......@@ -18,7 +18,7 @@ use std::sync::Arc;
use serde::{Deserialize, Serialize};
use tokio::sync::mpsc::Receiver;
use triton_distributed_runtime::{
use dynemo_runtime::{
protocols::{self, annotated::Annotated},
raise,
transports::etcd::{KeyValue, WatchEvent},
......
......@@ -48,7 +48,7 @@ use crate::types::{
Annotated,
};
use triton_distributed_runtime::pipeline::{AsyncEngineContext, Context};
use dynemo_runtime::pipeline::{AsyncEngineContext, Context};
#[derive(Serialize, Deserialize)]
pub(crate) struct ErrorResponse {
......@@ -91,7 +91,7 @@ impl ErrorResponse {
)
}
/// The OAI endpoints call an [`triton_distributed_runtime::engine::AsyncEngine`] which are specialized to return
/// The OAI endpoints call an [`dynemo_runtime::engine::AsyncEngine`] which are specialized to return
/// an [`anyhow::Error`]. This method will convert the [`anyhow::Error`] into an [`HttpError`].
/// If successful, it will return the [`HttpError`] as an [`ErrorResponse::internal_server_error`]
/// with the details of the error.
......@@ -516,7 +516,7 @@ pub fn list_models_router(
path: Option<String>,
) -> (Vec<RouteDoc>, Router) {
// TODO: Why do we have this endpoint?
let custom_path = path.unwrap_or("/triton/alpha/list-models".to_string());
let custom_path = path.unwrap_or("/dynemo/alpha/list-models".to_string());
let doc_for_custom = RouteDoc::new(axum::http::Method::GET, &custom_path);
// Standard OpenAI compatible list models endpoint
......
......@@ -14,11 +14,11 @@
// limitations under the License.
use anyhow::Result;
use dynemo_runtime::{component::Component, DistributedRuntime};
use futures::stream::StreamExt;
use std::{sync::Arc, time::Duration};
use tokio_util::sync::CancellationToken;
use tracing;
use triton_distributed_runtime::{component::Component, DistributedRuntime};
pub mod indexer;
pub mod protocols;
......@@ -62,7 +62,7 @@ impl KvRouter {
}
pub async fn new(
nats_client: triton_distributed_runtime::transports::nats::Client,
nats_client: dynemo_runtime::transports::nats::Client,
service_name: String,
kv_subject: String,
) -> Result<Arc<Self>> {
......@@ -135,7 +135,7 @@ impl KvRouter {
}
async fn collect_endpoints(
nats_client: triton_distributed_runtime::transports::nats::Client,
nats_client: dynemo_runtime::transports::nats::Client,
service_name: String,
ep_tx: tokio::sync::mpsc::Sender<ProcessedEndpoints>,
cancel: CancellationToken,
......
......@@ -15,11 +15,7 @@
use crate::kv_router::{indexer::RouterEvent, protocols::*, KV_EVENT_SUBJECT};
use async_trait::async_trait;
use futures::stream;
use std::sync::Arc;
use tokio::sync::mpsc;
use tracing as log;
use triton_distributed_runtime::{
use dynemo_runtime::{
component::Component,
pipeline::{
network::Ingress, AsyncEngine, AsyncEngineContextProvider, ManyOut, ResponseStream,
......@@ -28,6 +24,10 @@ use triton_distributed_runtime::{
protocols::annotated::Annotated,
DistributedRuntime, Error, Result,
};
use futures::stream;
use std::sync::Arc;
use tokio::sync::mpsc;
use tracing as log;
pub struct KvEventPublisher {
tx: mpsc::UnboundedSender<KvCacheEvent>,
......
......@@ -19,12 +19,12 @@ pub use crate::kv_router::protocols::ForwardPassMetrics;
use anyhow::Result;
use derive_builder::Builder;
use triton_distributed_runtime::pipeline::network::{
use dynemo_runtime::pipeline::network::{
ingress::push_endpoint::PushEndpoint,
PushWorkHandler,
};
use triton_distributed_runtime::transports::nats::{self, ServiceExt};
use dynemo_runtime::transports::nats::{self, ServiceExt};
use tokio::sync::watch;
use tokio_util::sync::CancellationToken;
......
......@@ -13,9 +13,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! # Triton LLM
//! # Dynemo LLM
//!
//! The `triton-llm` crate is a Rust library that provides a set of traits and types for building
//! The `dynemo-llm` crate is a Rust library that provides a set of traits and types for building
//! distributed LLM inference solutions.
pub mod backend;
......
......@@ -37,7 +37,7 @@ use std::time::Duration;
use derive_builder::Builder;
use triton_distributed_runtime::slug::Slug;
use dynemo_runtime::slug::Slug;
pub const BUCKET_NAME: &str = "mdc";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment