Commit 1af7433b authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: rename triton_distributed to dynemo (#22)


Co-authored-by: default avatarGraham King <grahamk@nvidia.com>
parent ee4ef06b
---
source: triton-llm/tests/openai_completions.rs
source: dynemo-llm/tests/openai_completions.rs
description: should have only prompt and model fields
expression: sample.request
---
......
---
source: triton-llm/tests/preprocessor.rs
source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt
info:
messages:
......
---
source: triton-llm/tests/preprocessor.rs
source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt
info:
messages:
......
---
source: triton-llm/tests/preprocessor.rs
source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt
info:
messages:
......
---
source: triton-llm/tests/preprocessor.rs
source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt
info:
messages:
......
---
source: triton-llm/tests/preprocessor.rs
source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt
info:
messages:
......
---
source: triton-llm/tests/preprocessor.rs
source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt
info:
messages:
......
---
source: triton-llm/tests/preprocessor.rs
source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt
info:
messages:
......
---
source: triton-llm/tests/preprocessor.rs
source: dynemo-llm/tests/preprocessor.rs
expression: formatted_prompt
info:
messages:
......
......@@ -25,10 +25,10 @@
//! in a hashmap. We will then use these hashes to test that the tokenizer is working correctly. This
//! will detect if upstream dependency changes result in different/new behavior.
use dynemo_llm::tokenizers::traits::{Decoder, Encoder};
use dynemo_llm::tokenizers::*;
use std::collections::HashMap;
use std::sync::Arc;
use triton_distributed_llm::tokenizers::traits::{Decoder, Encoder};
use triton_distributed_llm::tokenizers::*;
const TEST_PROMPTS: [&str; 4] = [
"deep learning is",
......
......@@ -695,6 +695,54 @@ dependencies = [
"syn 2.0.98",
]
[[package]]
name = "dynemo-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"assert_matches",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"env_logger",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"rstest",
"serde",
"serde_json",
"socket2",
"temp-env",
"thiserror",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "ed25519"
version = "2.2.3"
......@@ -2930,54 +2978,6 @@ dependencies = [
"tracing-serde",
]
[[package]]
name = "triton-distributed-runtime"
version = "0.2.1"
dependencies = [
"anyhow",
"assert_matches",
"async-nats",
"async-once-cell",
"async-stream",
"async-trait",
"async_zmq",
"blake3",
"bytes",
"chrono",
"derive-getters",
"derive_builder",
"educe",
"either",
"env_logger",
"etcd-client",
"figment",
"futures",
"humantime",
"local-ip-address",
"log",
"nid",
"nix",
"nuid",
"once_cell",
"prometheus",
"rand",
"regex",
"rstest",
"serde",
"serde_json",
"socket2",
"temp-env",
"thiserror",
"tokio",
"tokio-stream",
"tokio-util",
"tracing",
"tracing-subscriber",
"uuid",
"validator",
"xxhash-rust",
]
[[package]]
name = "try-lock"
version = "0.2.5"
......
......@@ -14,16 +14,16 @@
# limitations under the License.
[package]
name = "triton-distributed-runtime"
name = "dynemo-runtime"
description = "Distributed Inference Framework"
readme = "README.md"
version = "0.2.1" # TODO: Centralize Version Automation
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/triton-inference-server/triton_distributed"
repository = "https://github.com/triton-inference-server/triton_distributed"
keywords = ["llm", "genai", "inference", "nvidia", "distributed", "triton"]
homepage = "https://github.com/dynemo-ai/dynemo"
repository = "https://github.com/dynemo-ai/dynemo.git"
keywords = ["llm", "genai", "inference", "nvidia", "distributed", "dynemo"]
[features]
default = []
......
......@@ -70,8 +70,7 @@ above, you can manually launch each:
When developing or running examples, any process or user that shared your core-services (`etcd` and `nats.io`) will
be operating within your distributed runtime.
The current examples use a hard-coded `namespace`. We will address the `namespace` collisions in this
[issue](https://github.com/triton-inference-server/triton_distributed/issues/114).
The current examples use a hard-coded `namespace`. We will address the `namespace` collisions later.
All examples require the `etcd` and `nats.io` pre-requisites to be running and available.
......
......@@ -35,7 +35,7 @@ impl WorkerConfig {
// All calls should be global and thread safe.
Figment::new()
.merge(Serialized::defaults(Self::default()))
.merge(Env::prefixed("TRD_WORKER_"))
.merge(Env::prefixed("DYN_WORKER_"))
.extract()
.unwrap() // safety: Called on startup, so panic is reasonable
}
......@@ -81,10 +81,10 @@ impl RuntimeConfig {
pub(crate) fn figment() -> Figment {
Figment::new()
.merge(Serialized::defaults(RuntimeConfig::default()))
.merge(Toml::file("/opt/triton/defaults/runtime.toml"))
.merge(Toml::file("/opt/triton/etc/runtime.toml"))
.merge(Env::prefixed("TRD_RUNTIME_").filter_map(|k| {
let full_key = format!("TRD_RUNTIME_{}", k.as_str());
.merge(Toml::file("/opt/dynemo/defaults/runtime.toml"))
.merge(Toml::file("/opt/dynemo/etc/runtime.toml"))
.merge(Env::prefixed("DYN_RUNTIME_").filter_map(|k| {
let full_key = format!("DYN_RUNTIME_{}", k.as_str());
// filters out empty environment variables
match std::env::var(&full_key) {
Ok(v) if !v.is_empty() => Some(k.into()),
......@@ -97,10 +97,10 @@ impl RuntimeConfig {
/// Configuration is priorities in the following order, where the last has the lowest priority:
/// 1. Environment variables (top priority)
/// TO DO: Add documentation for configuration files. Paths should be configurable.
/// 2. /opt/triton/etc/runtime.toml
/// 3. /opt/triton/defaults/runtime.toml (lowest priority)
/// 2. /opt/dynemo/etc/runtime.toml
/// 3. /opt/dynemo/defaults/runtime.toml (lowest priority)
///
/// Environment variables are prefixed with `TRD_RUNTIME_`
/// Environment variables are prefixed with `DYN_RUNTIME_`
pub fn from_settings() -> Result<RuntimeConfig> {
let config: RuntimeConfig = Self::figment().extract()?;
config.validate()?;
......@@ -159,15 +159,15 @@ pub fn is_truthy(val: &str) -> bool {
}
/// Check whether JSONL logging enabled
/// Set the `TRD_LOGGING_JSONL` environment variable a [`is_truthy`] value
/// Set the `DYN_LOGGING_JSONL` environment variable a [`is_truthy`] value
pub fn jsonl_logging_enabled() -> bool {
env_is_truthy("TRD_LOGGING_JSONL")
env_is_truthy("DYN_LOGGING_JSONL")
}
/// Check whether logging with ANSI terminal escape codes and colors is disabled.
/// Set the `TRD_SDK_DISABLE_ANSI_LOGGING` environment variable a [`is_truthy`] value
/// Set the `DYN_SDK_DISABLE_ANSI_LOGGING` environment variable a [`is_truthy`] value
pub fn disable_ansi_logging() -> bool {
env_is_truthy("TRD_SDK_DISABLE_ANSI_LOGGING")
env_is_truthy("DYN_SDK_DISABLE_ANSI_LOGGING")
}
#[cfg(test)]
......@@ -178,8 +178,8 @@ mod tests {
fn test_runtime_config_with_env_vars() -> Result<()> {
temp_env::with_vars(
vec![
("TRD_RUNTIME_NUM_WORKER_THREADS", Some("24")),
("TRD_RUNTIME_MAX_BLOCKING_THREADS", Some("32")),
("DYN_RUNTIME_NUM_WORKER_THREADS", Some("24")),
("DYN_RUNTIME_MAX_BLOCKING_THREADS", Some("32")),
],
|| {
let config = RuntimeConfig::from_settings()?;
......@@ -194,8 +194,8 @@ mod tests {
fn test_runtime_config_defaults() -> Result<()> {
temp_env::with_vars(
vec![
("TRD_RUNTIME_NUM_WORKER_THREADS", None::<&str>),
("TRD_RUNTIME_MAX_BLOCKING_THREADS", Some("")),
("DYN_RUNTIME_NUM_WORKER_THREADS", None::<&str>),
("DYN_RUNTIME_MAX_BLOCKING_THREADS", Some("")),
],
|| {
let config = RuntimeConfig::from_settings()?;
......@@ -215,8 +215,8 @@ mod tests {
fn test_runtime_config_rejects_invalid_thread_count() -> Result<()> {
temp_env::with_vars(
vec![
("TRD_RUNTIME_NUM_WORKER_THREADS", Some("0")),
("TRD_RUNTIME_MAX_BLOCKING_THREADS", Some("0")),
("DYN_RUNTIME_NUM_WORKER_THREADS", Some("0")),
("DYN_RUNTIME_MAX_BLOCKING_THREADS", Some("0")),
],
|| {
let result = RuntimeConfig::from_settings();
......
......@@ -13,17 +13,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! Triton Distributed Logging Module.
//! Dynemo Distributed Logging Module.
//!
//! - Configuration loaded from:
//! 1. Environment variables (highest priority).
//! 2. Optional TOML file pointed to by the `TRD_LOGGING_CONFIG_PATH` environment variable.
//! 3. `/opt/triton/etc/logging.toml`.
//! 2. Optional TOML file pointed to by the `DYN_LOGGING_CONFIG_PATH` environment variable.
//! 3. `/opt/dynemo/etc/logging.toml`.
//!
//! Logging can take two forms: `READABLE` or `JSONL`. The default is `READABLE`. `JSONL`
//! can be enabled by setting the `TRD_LOGGING_JSONL` environment variable to `1`.
//! can be enabled by setting the `DYN_LOGGING_JSONL` environment variable to `1`.
//!
//! Filters can be configured using the `TRD_LOG` environment variable or by setting the `filters`
//! Filters can be configured using the `DYN_LOG` environment variable or by setting the `filters`
//! key in the TOML configuration file. Filters are comma-separated key-value pairs where the key
//! is the crate or module name and the value is the log level. The default log level is `error`.
//!
......@@ -53,13 +53,13 @@ use tracing_subscriber::EnvFilter;
use tracing_subscriber::{filter::Directive, fmt};
/// ENV used to set the log level
const FILTER_ENV: &str = "TRD_LOG";
const FILTER_ENV: &str = "DYN_LOG";
/// Default log level
const DEFAULT_FILTER_LEVEL: &str = "error";
/// ENV used to set the path to the logging configuration file
const CONFIG_PATH_ENV: &str = "TRD_LOGGING_CONFIG_PATH";
const CONFIG_PATH_ENV: &str = "DYN_LOGGING_CONFIG_PATH";
/// Once instance to ensure the logger is only initialized once
static INIT: Once = Once::new();
......@@ -155,7 +155,7 @@ fn load_config() -> LoggingConfig {
let config_path = std::env::var(CONFIG_PATH_ENV).unwrap_or_else(|_| "".to_string());
let figment = Figment::new()
.merge(Serialized::defaults(LoggingConfig::default()))
.merge(Toml::file("/opt/triton/etc/logging.toml"))
.merge(Toml::file("/opt/dynemo/etc/logging.toml"))
.merge(Toml::file(config_path));
figment.extract().unwrap()
......
......@@ -299,7 +299,7 @@ impl<T: Send + Sync + 'static> From<Context<T>> for StreamContext {
}
}
// TODO - refactor here - this came from the triton-llm-async-engine crate
// TODO - refactor here - this came from the dynemo-llm-async-engine crate
use tokio::sync::watch::{channel, Receiver, Sender};
......
......@@ -22,7 +22,7 @@ use std::sync::Arc;
/// # Examples
///
/// ```
/// use triton_distributed_runtime::pipeline::registry::Registry;
/// use dynemo_runtime::pipeline::registry::Registry;
///
/// let mut registry = Registry::new();
///
......
......@@ -95,7 +95,7 @@ impl From<&str> for Endpoint {
///
/// # Examples
/// ```ignore
/// use triton_distributed::protocols::Endpoint;
/// use dynemo_runtime:protocols::Endpoint;
///
/// let endpoint = Endpoint::from("namespace/component/endpoint");
/// assert_eq!(endpoint.namespace, "namespace");
......@@ -150,7 +150,7 @@ impl FromStr for Endpoint {
/// # Examples
/// ```ignore
/// use std::str::FromStr;
/// use triton_distributed::protocols::Endpoint;
/// use dynemo_runtime:protocols::Endpoint;
///
/// let endpoint: Endpoint = "namespace/component/endpoint".parse().unwrap();
/// assert_eq!(endpoint.namespace, "namespace");
......
......@@ -18,17 +18,17 @@
//!
//! In the future, the [Worker] should probably be moved to a procedural macro similar
//! to the `#[tokio::main]` attribute, where we might annotate an async main function with
//! `#[triton::main]` or similar.
//! `#[dynemo::main]` or similar.
//!
//! The [Worker::execute] method is designed to be called once from main and will block
//! the calling thread until the application completes or is canceled. The method initialized
//! the signal handler used to trap `SIGINT` and `SIGTERM` signals and trigger a graceful shutdown.
//!
//! On termination, the user application is given a graceful shutdown period of controlled by
//! the [TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT] environment variable. If the application does not
//! the [DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT] environment variable. If the application does not
//! shutdown in time, the worker will terminate the application with an exit code of 911.
//!
//! The default values of [TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT] differ between the development
//! The default values of [DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT] differ between the development
//! and release builds. In development, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_DEBUG] and
//! in release, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_RELEASE].
......@@ -45,10 +45,10 @@ static INIT: OnceCell<Mutex<Option<tokio::task::JoinHandle<Result<()>>>>> = Once
const SHUTDOWN_MESSAGE: &str =
"Application received shutdown signal; attempting to gracefully shutdown";
const SHUTDOWN_TIMEOUT_MESSAGE: &str =
"Use TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT to control the graceful shutdown timeout";
"Use DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT to control the graceful shutdown timeout";
/// Environment variable to control the graceful shutdown timeout
pub const TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT: &str = "TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT";
pub const DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT: &str = "DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT";
/// Default graceful shutdown timeout in seconds in debug mode
pub const DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_DEBUG: u64 = 5;
......@@ -106,7 +106,7 @@ impl Worker {
let primary = runtime.primary();
let secondary = runtime.secondary();
let timeout = std::env::var(TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT)
let timeout = std::env::var(DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT)
.ok()
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or({
......
......@@ -21,12 +21,12 @@ use async_trait::async_trait;
use futures::Stream;
use tokio::sync::mpsc;
use triton_distributed_runtime::engine::{
use dynemo_runtime::engine::{
AsyncEngine, AsyncEngineContext, AsyncEngineContextProvider, AsyncEngineStream,
Data as DataType, Engine, EngineStream,
};
use triton_distributed_runtime::pipeline::{
use dynemo_runtime::pipeline::{
context::{Context, StreamContext},
Error, ManyOut, SingleIn,
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment