Commit 1af7433b authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: rename triton_distributed to dynemo (#22)


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent ee4ef06b
--- ---
source: triton-llm/tests/openai_completions.rs source: dynemo-llm/tests/openai_completions.rs
description: should have only prompt and model fields description: should have only prompt and model fields
expression: sample.request expression: sample.request
--- ---
......
--- ---
source: triton-llm/tests/preprocessor.rs source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt expression: formatted_prompt
info: info:
messages: messages:
......
--- ---
source: triton-llm/tests/preprocessor.rs source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt expression: formatted_prompt
info: info:
messages: messages:
......
--- ---
source: triton-llm/tests/preprocessor.rs source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt expression: formatted_prompt
info: info:
messages: messages:
......
--- ---
source: triton-llm/tests/preprocessor.rs source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt expression: formatted_prompt
info: info:
messages: messages:
......
--- ---
source: triton-llm/tests/preprocessor.rs source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt expression: formatted_prompt
info: info:
messages: messages:
......
--- ---
source: triton-llm/tests/preprocessor.rs source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt expression: formatted_prompt
info: info:
messages: messages:
......
--- ---
source: triton-llm/tests/preprocessor.rs source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt expression: formatted_prompt
info: info:
messages: messages:
......
--- ---
source: triton-llm/tests/preprocessor.rs source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt expression: formatted_prompt
info: info:
messages: messages:
......
...@@ -25,10 +25,10 @@ ...@@ -25,10 +25,10 @@
//! in a hashmap. We will then use these hashes to test that the tokenizer is working correctly. This //! in a hashmap. We will then use these hashes to test that the tokenizer is working correctly. This
//! will detect if upstream dependency changes result in different/new behavior. //! will detect if upstream dependency changes result in different/new behavior.
use dynemo_llm::tokenizers::traits::{Decoder, Encoder};
use dynemo_llm::tokenizers::*;
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use triton_distributed_llm::tokenizers::traits::{Decoder, Encoder};
use triton_distributed_llm::tokenizers::*;
const TEST_PROMPTS: [&str; 4] = [ const TEST_PROMPTS: [&str; 4] = [
"deep learning is", "deep learning is",
......
...@@ -695,6 +695,54 @@ dependencies = [ ...@@ -695,6 +695,54 @@ dependencies = [
"syn 2.0.98", "syn 2.0.98",
] ]
[[package]]
name = "dynemo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"assert_matches",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"env_logger",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"rstest",
"serde",
"serde_json",
"socket2",
"temp-env",
"thiserror",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"validator",
"xxhash-rust",
]
[[package]] [[package]]
name = "ed25519" name = "ed25519"
version = "2.2.3" version = "2.2.3"
...@@ -2930,54 +2978,6 @@ dependencies = [ ...@@ -2930,54 +2978,6 @@ dependencies = [
"tracing-serde", "tracing-serde",
] ]
[[package]]
name = "triton-distributed-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"assert_matches",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"env_logger",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"rstest",
"serde",
"serde_json",
"socket2",
"temp-env",
"thiserror",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"validator",
"xxhash-rust",
]
[[package]] [[package]]
name = "try-lock" name = "try-lock"
version = "0.2.5" version = "0.2.5"
......
...@@ -14,16 +14,16 @@ ...@@ -14,16 +14,16 @@
# limitations under the License. # limitations under the License.
[package] [package]
name = "triton-distributed-runtime" name = "dynemo-runtime"
description = "Distributed Inference Framework" description = "Distributed Inference Framework"
readme = "README.md" readme = "README.md"
version = "0.2.1" # TODO: Centralize Version Automation version = "0.2.1" # TODO: Centralize Version Automation
edition = "2021" edition = "2021"
authors = ["NVIDIA"] authors = ["NVIDIA"]
license = "Apache-2.0" license = "Apache-2.0"
homepage = "https://github.com/triton-inference-server/triton_distributed" homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/triton-inference-server/triton_distributed" repository = "https://github.com/dynemo-ai/dynemo.git"
keywords = ["llm", "genai", "inference", "nvidia", "distributed", "triton"] keywords = ["llm", "genai", "inference", "nvidia", "distributed", "dynemo"]
[features] [features]
default = [] default = []
......
...@@ -70,8 +70,7 @@ above, you can manually launch each: ...@@ -70,8 +70,7 @@ above, you can manually launch each:
When developing or running examples, any process or user that shared your core-services (`etcd` and `nats.io`) will When developing or running examples, any process or user that shared your core-services (`etcd` and `nats.io`) will
be operating within your distributed runtime. be operating within your distributed runtime.
The current examples use a hard-coded `namespace`. We will address the `namespace` collisions in this The current examples use a hard-coded `namespace`. We will address the `namespace` collisions later.
[issue](https://github.com/triton-inference-server/triton_distributed/issues/114).
All examples require the `etcd` and `nats.io` pre-requisites to be running and available. All examples require the `etcd` and `nats.io` pre-requisites to be running and available.
......
...@@ -35,7 +35,7 @@ impl WorkerConfig { ...@@ -35,7 +35,7 @@ impl WorkerConfig {
// All calls should be global and thread safe. // All calls should be global and thread safe.
Figment::new() Figment::new()
.merge(Serialized::defaults(Self::default())) .merge(Serialized::defaults(Self::default()))
.merge(Env::prefixed("TRD_WORKER_")) .merge(Env::prefixed("DYN_WORKER_"))
.extract() .extract()
.unwrap() // safety: Called on startup, so panic is reasonable .unwrap() // safety: Called on startup, so panic is reasonable
} }
...@@ -81,10 +81,10 @@ impl RuntimeConfig { ...@@ -81,10 +81,10 @@ impl RuntimeConfig {
pub(crate) fn figment() -> Figment { pub(crate) fn figment() -> Figment {
Figment::new() Figment::new()
.merge(Serialized::defaults(RuntimeConfig::default())) .merge(Serialized::defaults(RuntimeConfig::default()))
.merge(Toml::file("/opt/triton/defaults/runtime.toml")) .merge(Toml::file("/opt/dynemo/defaults/runtime.toml"))
.merge(Toml::file("/opt/triton/etc/runtime.toml")) .merge(Toml::file("/opt/dynemo/etc/runtime.toml"))
.merge(Env::prefixed("TRD_RUNTIME_").filter_map(|k| { .merge(Env::prefixed("DYN_RUNTIME_").filter_map(|k| {
let full_key = format!("TRD_RUNTIME_{}", k.as_str()); let full_key = format!("DYN_RUNTIME_{}", k.as_str());
// filters out empty environment variables // filters out empty environment variables
match std::env::var(&full_key) { match std::env::var(&full_key) {
Ok(v) if !v.is_empty() => Some(k.into()), Ok(v) if !v.is_empty() => Some(k.into()),
...@@ -97,10 +97,10 @@ impl RuntimeConfig { ...@@ -97,10 +97,10 @@ impl RuntimeConfig {
/// Configuration is priorities in the following order, where the last has the lowest priority: /// Configuration is priorities in the following order, where the last has the lowest priority:
/// 1. Environment variables (top priority) /// 1. Environment variables (top priority)
/// TO DO: Add documentation for configuration files. Paths should be configurable. /// TO DO: Add documentation for configuration files. Paths should be configurable.
/// 2. /opt/triton/etc/runtime.toml /// 2. /opt/dynemo/etc/runtime.toml
/// 3. /opt/triton/defaults/runtime.toml (lowest priority) /// 3. /opt/dynemo/defaults/runtime.toml (lowest priority)
/// ///
/// Environment variables are prefixed with `TRD_RUNTIME_` /// Environment variables are prefixed with `DYN_RUNTIME_`
pub fn from_settings() -> Result<RuntimeConfig> { pub fn from_settings() -> Result<RuntimeConfig> {
let config: RuntimeConfig = Self::figment().extract()?; let config: RuntimeConfig = Self::figment().extract()?;
config.validate()?; config.validate()?;
...@@ -159,15 +159,15 @@ pub fn is_truthy(val: &str) -> bool { ...@@ -159,15 +159,15 @@ pub fn is_truthy(val: &str) -> bool {
} }
/// Check whether JSONL logging enabled /// Check whether JSONL logging enabled
/// Set the `TRD_LOGGING_JSONL` environment variable a [`is_truthy`] value /// Set the `DYN_LOGGING_JSONL` environment variable a [`is_truthy`] value
pub fn jsonl_logging_enabled() -> bool { pub fn jsonl_logging_enabled() -> bool {
env_is_truthy("TRD_LOGGING_JSONL") env_is_truthy("DYN_LOGGING_JSONL")
} }
/// Check whether logging with ANSI terminal escape codes and colors is disabled. /// Check whether logging with ANSI terminal escape codes and colors is disabled.
/// Set the `TRD_SDK_DISABLE_ANSI_LOGGING` environment variable a [`is_truthy`] value /// Set the `DYN_SDK_DISABLE_ANSI_LOGGING` environment variable a [`is_truthy`] value
pub fn disable_ansi_logging() -> bool { pub fn disable_ansi_logging() -> bool {
env_is_truthy("TRD_SDK_DISABLE_ANSI_LOGGING") env_is_truthy("DYN_SDK_DISABLE_ANSI_LOGGING")
} }
#[cfg(test)] #[cfg(test)]
...@@ -178,8 +178,8 @@ mod tests { ...@@ -178,8 +178,8 @@ mod tests {
fn test_runtime_config_with_env_vars() -> Result<()> { fn test_runtime_config_with_env_vars() -> Result<()> {
temp_env::with_vars( temp_env::with_vars(
vec![ vec![
("TRD_RUNTIME_NUM_WORKER_THREADS", Some("24")), ("DYN_RUNTIME_NUM_WORKER_THREADS", Some("24")),
("TRD_RUNTIME_MAX_BLOCKING_THREADS", Some("32")), ("DYN_RUNTIME_MAX_BLOCKING_THREADS", Some("32")),
], ],
|| { || {
let config = RuntimeConfig::from_settings()?; let config = RuntimeConfig::from_settings()?;
...@@ -194,8 +194,8 @@ mod tests { ...@@ -194,8 +194,8 @@ mod tests {
fn test_runtime_config_defaults() -> Result<()> { fn test_runtime_config_defaults() -> Result<()> {
temp_env::with_vars( temp_env::with_vars(
vec![ vec![
("TRD_RUNTIME_NUM_WORKER_THREADS", None::<&str>), ("DYN_RUNTIME_NUM_WORKER_THREADS", None::<&str>),
("TRD_RUNTIME_MAX_BLOCKING_THREADS", Some("")), ("DYN_RUNTIME_MAX_BLOCKING_THREADS", Some("")),
], ],
|| { || {
let config = RuntimeConfig::from_settings()?; let config = RuntimeConfig::from_settings()?;
...@@ -215,8 +215,8 @@ mod tests { ...@@ -215,8 +215,8 @@ mod tests {
fn test_runtime_config_rejects_invalid_thread_count() -> Result<()> { fn test_runtime_config_rejects_invalid_thread_count() -> Result<()> {
temp_env::with_vars( temp_env::with_vars(
vec![ vec![
("TRD_RUNTIME_NUM_WORKER_THREADS", Some("0")), ("DYN_RUNTIME_NUM_WORKER_THREADS", Some("0")),
("TRD_RUNTIME_MAX_BLOCKING_THREADS", Some("0")), ("DYN_RUNTIME_MAX_BLOCKING_THREADS", Some("0")),
], ],
|| { || {
let result = RuntimeConfig::from_settings(); let result = RuntimeConfig::from_settings();
......
...@@ -13,17 +13,17 @@ ...@@ -13,17 +13,17 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
//! Triton Distributed Logging Module. //! Dynemo Distributed Logging Module.
//! //!
//! - Configuration loaded from: //! - Configuration loaded from:
//! 1. Environment variables (highest priority). //! 1. Environment variables (highest priority).
//! 2. Optional TOML file pointed to by the `TRD_LOGGING_CONFIG_PATH` environment variable. //! 2. Optional TOML file pointed to by the `DYN_LOGGING_CONFIG_PATH` environment variable.
//! 3. `/opt/triton/etc/logging.toml`. //! 3. `/opt/dynemo/etc/logging.toml`.
//! //!
//! Logging can take two forms: `READABLE` or `JSONL`. The default is `READABLE`. `JSONL` //! Logging can take two forms: `READABLE` or `JSONL`. The default is `READABLE`. `JSONL`
//! can be enabled by setting the `TRD_LOGGING_JSONL` environment variable to `1`. //! can be enabled by setting the `DYN_LOGGING_JSONL` environment variable to `1`.
//! //!
//! Filters can be configured using the `TRD_LOG` environment variable or by setting the `filters` //! Filters can be configured using the `DYN_LOG` environment variable or by setting the `filters`
//! key in the TOML configuration file. Filters are comma-separated key-value pairs where the key //! key in the TOML configuration file. Filters are comma-separated key-value pairs where the key
//! is the crate or module name and the value is the log level. The default log level is `error`. //! is the crate or module name and the value is the log level. The default log level is `error`.
//! //!
...@@ -53,13 +53,13 @@ use tracing_subscriber::EnvFilter; ...@@ -53,13 +53,13 @@ use tracing_subscriber::EnvFilter;
use tracing_subscriber::{filter::Directive, fmt}; use tracing_subscriber::{filter::Directive, fmt};
/// ENV used to set the log level /// ENV used to set the log level
const FILTER_ENV: &str = "TRD_LOG"; const FILTER_ENV: &str = "DYN_LOG";
/// Default log level /// Default log level
const DEFAULT_FILTER_LEVEL: &str = "error"; const DEFAULT_FILTER_LEVEL: &str = "error";
/// ENV used to set the path to the logging configuration file /// ENV used to set the path to the logging configuration file
const CONFIG_PATH_ENV: &str = "TRD_LOGGING_CONFIG_PATH"; const CONFIG_PATH_ENV: &str = "DYN_LOGGING_CONFIG_PATH";
/// Once instance to ensure the logger is only initialized once /// Once instance to ensure the logger is only initialized once
static INIT: Once = Once::new(); static INIT: Once = Once::new();
...@@ -155,7 +155,7 @@ fn load_config() -> LoggingConfig { ...@@ -155,7 +155,7 @@ fn load_config() -> LoggingConfig {
let config_path = std::env::var(CONFIG_PATH_ENV).unwrap_or_else(|_| "".to_string()); let config_path = std::env::var(CONFIG_PATH_ENV).unwrap_or_else(|_| "".to_string());
let figment = Figment::new() let figment = Figment::new()
.merge(Serialized::defaults(LoggingConfig::default())) .merge(Serialized::defaults(LoggingConfig::default()))
.merge(Toml::file("/opt/triton/etc/logging.toml")) .merge(Toml::file("/opt/dynemo/etc/logging.toml"))
.merge(Toml::file(config_path)); .merge(Toml::file(config_path));
figment.extract().unwrap() figment.extract().unwrap()
......
...@@ -299,7 +299,7 @@ impl<T: Send + Sync + 'static> From<Context<T>> for StreamContext { ...@@ -299,7 +299,7 @@ impl<T: Send + Sync + 'static> From<Context<T>> for StreamContext {
} }
} }
// TODO - refactor here - this came from the triton-llm-async-engine crate // TODO - refactor here - this came from the dynemo-llm-async-engine crate
use tokio::sync::watch::{channel, Receiver, Sender}; use tokio::sync::watch::{channel, Receiver, Sender};
......
...@@ -22,7 +22,7 @@ use std::sync::Arc; ...@@ -22,7 +22,7 @@ use std::sync::Arc;
/// # Examples /// # Examples
/// ///
/// ``` /// ```
/// use triton_distributed_runtime::pipeline::registry::Registry; /// use dynemo_runtime::pipeline::registry::Registry;
/// ///
/// let mut registry = Registry::new(); /// let mut registry = Registry::new();
/// ///
......
...@@ -95,7 +95,7 @@ impl From<&str> for Endpoint { ...@@ -95,7 +95,7 @@ impl From<&str> for Endpoint {
/// ///
/// # Examples /// # Examples
/// ```ignore /// ```ignore
/// use triton_distributed::protocols::Endpoint; /// use dynemo_runtime:protocols::Endpoint;
/// ///
/// let endpoint = Endpoint::from("namespace/component/endpoint"); /// let endpoint = Endpoint::from("namespace/component/endpoint");
/// assert_eq!(endpoint.namespace, "namespace"); /// assert_eq!(endpoint.namespace, "namespace");
...@@ -150,7 +150,7 @@ impl FromStr for Endpoint { ...@@ -150,7 +150,7 @@ impl FromStr for Endpoint {
/// # Examples /// # Examples
/// ```ignore /// ```ignore
/// use std::str::FromStr; /// use std::str::FromStr;
/// use triton_distributed::protocols::Endpoint; /// use dynemo_runtime:protocols::Endpoint;
/// ///
/// let endpoint: Endpoint = "namespace/component/endpoint".parse().unwrap(); /// let endpoint: Endpoint = "namespace/component/endpoint".parse().unwrap();
/// assert_eq!(endpoint.namespace, "namespace"); /// assert_eq!(endpoint.namespace, "namespace");
......
...@@ -18,17 +18,17 @@ ...@@ -18,17 +18,17 @@
//! //!
//! In the future, the [Worker] should probably be moved to a procedural macro similar //! In the future, the [Worker] should probably be moved to a procedural macro similar
//! to the `#[tokio::main]` attribute, where we might annotate an async main function with //! to the `#[tokio::main]` attribute, where we might annotate an async main function with
//! `#[triton::main]` or similar. //! `#[dynemo::main]` or similar.
//! //!
//! The [Worker::execute] method is designed to be called once from main and will block //! The [Worker::execute] method is designed to be called once from main and will block
//! the calling thread until the application completes or is canceled. The method initialized //! the calling thread until the application completes or is canceled. The method initialized
//! the signal handler used to trap `SIGINT` and `SIGTERM` signals and trigger a graceful shutdown. //! the signal handler used to trap `SIGINT` and `SIGTERM` signals and trigger a graceful shutdown.
//! //!
//! On termination, the user application is given a graceful shutdown period of controlled by //! On termination, the user application is given a graceful shutdown period of controlled by
//! the [TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT] environment variable. If the application does not //! the [DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT] environment variable. If the application does not
//! shutdown in time, the worker will terminate the application with an exit code of 911. //! shutdown in time, the worker will terminate the application with an exit code of 911.
//! //!
//! The default values of [TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT] differ between the development //! The default values of [DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT] differ between the development
//! and release builds. In development, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_DEBUG] and //! and release builds. In development, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_DEBUG] and
//! in release, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_RELEASE]. //! in release, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_RELEASE].
...@@ -45,10 +45,10 @@ static INIT: OnceCell<Mutex<Option<tokio::task::JoinHandle<Result<()>>>>> = Once ...@@ -45,10 +45,10 @@ static INIT: OnceCell<Mutex<Option<tokio::task::JoinHandle<Result<()>>>>> = Once
const SHUTDOWN_MESSAGE: &str = const SHUTDOWN_MESSAGE: &str =
"Application received shutdown signal; attempting to gracefully shutdown"; "Application received shutdown signal; attempting to gracefully shutdown";
const SHUTDOWN_TIMEOUT_MESSAGE: &str = const SHUTDOWN_TIMEOUT_MESSAGE: &str =
"Use TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT to control the graceful shutdown timeout"; "Use DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT to control the graceful shutdown timeout";
/// Environment variable to control the graceful shutdown timeout /// Environment variable to control the graceful shutdown timeout
pub const TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT: &str = "TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT"; pub const DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT: &str = "DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT";
/// Default graceful shutdown timeout in seconds in debug mode /// Default graceful shutdown timeout in seconds in debug mode
pub const DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_DEBUG: u64 = 5; pub const DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_DEBUG: u64 = 5;
...@@ -106,7 +106,7 @@ impl Worker { ...@@ -106,7 +106,7 @@ impl Worker {
let primary = runtime.primary(); let primary = runtime.primary();
let secondary = runtime.secondary(); let secondary = runtime.secondary();
let timeout = std::env::var(TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT) let timeout = std::env::var(DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT)
.ok() .ok()
.and_then(|s| s.parse::<u64>().ok()) .and_then(|s| s.parse::<u64>().ok())
.unwrap_or({ .unwrap_or({
......
...@@ -21,12 +21,12 @@ use async_trait::async_trait; ...@@ -21,12 +21,12 @@ use async_trait::async_trait;
use futures::Stream; use futures::Stream;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use triton_distributed_runtime::engine::{ use dynemo_runtime::engine::{
AsyncEngine, AsyncEngineContext, AsyncEngineContextProvider, AsyncEngineStream, AsyncEngine, AsyncEngineContext, AsyncEngineContextProvider, AsyncEngineStream,
Data as DataType, Engine, EngineStream, Data as DataType, Engine, EngineStream,
}; };
use triton_distributed_runtime::pipeline::{ use dynemo_runtime::pipeline::{
context::{Context, StreamContext}, context::{Context, StreamContext},
Error, ManyOut, SingleIn, Error, ManyOut, SingleIn,
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment