refactor: rename triton_distributed to dynemo (#22)

Co-authored-by: Graham King <grahamk@nvidia.com>

refactor: rename triton_distributed to dynemo (#22)
Co-authored-by: Graham King <grahamk@nvidia.com>
1af7433b · Neelay Shah · GitHub · ee4ef06b · 1af7433b · 1af7433b
Commit 1af7433b authored Mar 05, 2025 by Neelay Shah Committed by GitHub Mar 05, 2025
20 changed files
--- a/lib/llm/tests/snapshots/openai_completions__valid_samples.snap
+++ b/lib/llm/tests/snapshots/openai_completions__valid_samples.snap
 ---
-source: triton-llm/tests/openai_completions.rs
+source: dynemo-llm/tests/openai_completions.rs
 description: should have only prompt and model fields
 expression: sample.request
 ---

--- a/lib/llm/tests/snapshots/preprocessor__mulit_turn_with_continuation@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
+++ b/lib/llm/tests/snapshots/preprocessor__mulit_turn_with_continuation@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
 ---
-source: triton-llm/tests/preprocessor.rs
+source: dynemo-llm/tests/preprocessor.rs
 expression: formatted_prompt
 info:
  messages:

--- a/lib/llm/tests/snapshots/preprocessor__mulit_turn_with_system@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
+++ b/lib/llm/tests/snapshots/preprocessor__mulit_turn_with_system@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
 ---
-source: triton-llm/tests/preprocessor.rs
+source: dynemo-llm/tests/preprocessor.rs
 expression: formatted_prompt
 info:
  messages:

--- a/lib/llm/tests/snapshots/preprocessor__mulit_turn_with_system_with_tools@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
+++ b/lib/llm/tests/snapshots/preprocessor__mulit_turn_with_system_with_tools@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
 ---
-source: triton-llm/tests/preprocessor.rs
+source: dynemo-llm/tests/preprocessor.rs
 expression: formatted_prompt
 info:
  messages:

--- a/lib/llm/tests/snapshots/preprocessor__mulit_turn_without_system@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
+++ b/lib/llm/tests/snapshots/preprocessor__mulit_turn_without_system@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
 ---
-source: triton-llm/tests/preprocessor.rs
+source: dynemo-llm/tests/preprocessor.rs
 expression: formatted_prompt
 info:
  messages:

--- a/lib/llm/tests/snapshots/preprocessor__multi_turn_with_continuation@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
+++ b/lib/llm/tests/snapshots/preprocessor__multi_turn_with_continuation@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
 ---
-source: triton-llm/tests/preprocessor.rs
+source: dynemo-llm/tests/preprocessor.rs
 expression: formatted_prompt
 info:
  messages:

--- a/lib/llm/tests/snapshots/preprocessor__multi_turn_with_system_with_tools@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
+++ b/lib/llm/tests/snapshots/preprocessor__multi_turn_with_system_with_tools@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
 ---
-source: triton-llm/tests/preprocessor.rs
+source: dynemo-llm/tests/preprocessor.rs
 expression: formatted_prompt
 info:
  messages:

--- a/lib/llm/tests/snapshots/preprocessor__single_turn@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
+++ b/lib/llm/tests/snapshots/preprocessor__single_turn@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
 ---
-source: triton-llm/tests/preprocessor.rs
+source: dynemo-llm/tests/preprocessor.rs
 expression: formatted_prompt
 info:
  messages:

--- a/lib/llm/tests/snapshots/preprocessor__single_turn_with_tools@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
+++ b/lib/llm/tests/snapshots/preprocessor__single_turn_with_tools@meta_llama_llama_3_1_70b_instruct__1605565_e45e5991.snap
 ---
-source: triton-llm/tests/preprocessor.rs
+source: dynemo-llm/tests/preprocessor.rs
 expression: formatted_prompt
 info:
  messages:

--- a/lib/llm/tests/tokenizers.rs
+++ b/lib/llm/tests/tokenizers.rs
@@ -25,10 +25,10 @@
 //! in a hashmap. We will then use these hashes to test that the tokenizer is working correctly. This
 //! will detect if upstream dependency changes result in different/new behavior.

+use dynemo_llm::tokenizers::traits::{Decoder, Encoder};
+use dynemo_llm::tokenizers::*;
 use std::collections::HashMap;
 use std::sync::Arc;
-use triton_distributed_llm::tokenizers::traits::{Decoder, Encoder};
-use triton_distributed_llm::tokenizers::*;

 const TEST_PROMPTS: [&str; 4] = [
    "deep learning is",

--- a/lib/runtime/Cargo.lock
+++ b/lib/runtime/Cargo.lock
@@ -695,6 +695,54 @@ dependencies = [
 "syn 2.0.98",
 ]

+[[package]]
+name = "dynemo-runtime"
+version = "0.2.1"
+dependencies = [
+ "anyhow",
+ "assert_matches",
+ "async-nats",
+ "async-once-cell",
+ "async-stream",
+ "async-trait",
+ "async_zmq",
+ "blake3",
+ "bytes",
+ "chrono",
+ "derive-getters",
+ "derive_builder",
+ "educe",
+ "either",
+ "env_logger",
+ "etcd-client",
+ "figment",
+ "futures",
+ "humantime",
+ "local-ip-address",
+ "log",
+ "nid",
+ "nix",
+ "nuid",
+ "once_cell",
+ "prometheus",
+ "rand",
+ "regex",
+ "rstest",
+ "serde",
+ "serde_json",
+ "socket2",
+ "temp-env",
+ "thiserror",
+ "tokio",
+ "tokio-stream",
+ "tokio-util",
+ "tracing",
+ "tracing-subscriber",
+ "uuid",
+ "validator",
+ "xxhash-rust",
+]
+
 [[package]]
 name = "ed25519"
 version = "2.2.3"
@@ -2930,54 +2978,6 @@ dependencies = [
 "tracing-serde",
 ]

-[[package]]
-name = "triton-distributed-runtime"
-version = "0.2.1"
-dependencies = [
- "anyhow",
- "assert_matches",
- "async-nats",
- "async-once-cell",
- "async-stream",
- "async-trait",
- "async_zmq",
- "blake3",
- "bytes",
- "chrono",
- "derive-getters",
- "derive_builder",
- "educe",
- "either",
- "env_logger",
- "etcd-client",
- "figment",
- "futures",
- "humantime",
- "local-ip-address",
- "log",
- "nid",
- "nix",
- "nuid",
- "once_cell",
- "prometheus",
- "rand",
- "regex",
- "rstest",
- "serde",
- "serde_json",
- "socket2",
- "temp-env",
- "thiserror",
- "tokio",
- "tokio-stream",
- "tokio-util",
- "tracing",
- "tracing-subscriber",
- "uuid",
- "validator",
- "xxhash-rust",
-]
-
 [[package]]
 name = "try-lock"
 version = "0.2.5"

--- a/lib/runtime/Cargo.toml
+++ b/lib/runtime/Cargo.toml
@@ -14,16 +14,16 @@
 # limitations under the License.

 [package]
-name = "triton-distributed-runtime"
+name = "dynemo-runtime"
 description = "Distributed Inference Framework"
 readme = "README.md"
 version = "0.2.1" # TODO: Centralize Version Automation
 edition = "2021"
 authors = ["NVIDIA"]
 license = "Apache-2.0"
-homepage = "https://github.com/triton-inference-server/triton_distributed"
-repository = "https://github.com/triton-inference-server/triton_distributed"
-keywords = ["llm", "genai", "inference", "nvidia", "distributed", "triton"]
+homepage = "https://github.com/dynemo-ai/dynemo"
+repository = "https://github.com/dynemo-ai/dynemo.git"
+keywords = ["llm", "genai", "inference", "nvidia", "distributed", "dynemo"]

 [features]
 default = []

--- a/lib/runtime/README.md
+++ b/lib/runtime/README.md
@@ -70,8 +70,7 @@ above, you can manually launch each:
 When developing or running examples, any process or user that shared your core-services (`etcd` and `nats.io`) will
 be operating within your distributed runtime.

-The current examples use a hard-coded `namespace`. We will address the `namespace` collisions in this
-[issue](https://github.com/triton-inference-server/triton_distributed/issues/114).
+The current examples use a hard-coded `namespace`. We will address the `namespace` collisions later.

 All examples require the `etcd` and `nats.io` pre-requisites to be running and available.


--- a/lib/runtime/src/config.rs
+++ b/lib/runtime/src/config.rs
@@ -35,7 +35,7 @@ impl WorkerConfig {
        // All calls should be global and thread safe.
        Figment::new()
            .merge(Serialized::defaults(Self::default()))
-            .merge(Env::prefixed("TRD_WORKER_"))
+            .merge(Env::prefixed("DYN_WORKER_"))
            .extract()
            .unwrap() // safety: Called on startup, so panic is reasonable
    }
@@ -81,10 +81,10 @@ impl RuntimeConfig {
    pub(crate) fn figment() -> Figment {
        Figment::new()
            .merge(Serialized::defaults(RuntimeConfig::default()))
-            .merge(Toml::file("/opt/triton/defaults/runtime.toml"))
-            .merge(Toml::file("/opt/triton/etc/runtime.toml"))
-            .merge(Env::prefixed("TRD_RUNTIME_").filter_map(|k| {
-                let full_key = format!("TRD_RUNTIME_{}", k.as_str());
+            .merge(Toml::file("/opt/dynemo/defaults/runtime.toml"))
+            .merge(Toml::file("/opt/dynemo/etc/runtime.toml"))
+            .merge(Env::prefixed("DYN_RUNTIME_").filter_map(|k| {
+                let full_key = format!("DYN_RUNTIME_{}", k.as_str());
                // filters out empty environment variables
                match std::env::var(&full_key) {
                    Ok(v) if !v.is_empty() => Some(k.into()),
@@ -97,10 +97,10 @@ impl RuntimeConfig {
    /// Configuration is priorities in the following order, where the last has the lowest priority:
    /// 1. Environment variables (top priority)
    ///     TO DO: Add documentation for configuration files. Paths should be configurable.
-    /// 2. /opt/triton/etc/runtime.toml
-    /// 3. /opt/triton/defaults/runtime.toml (lowest priority)
+    /// 2. /opt/dynemo/etc/runtime.toml
+    /// 3. /opt/dynemo/defaults/runtime.toml (lowest priority)
    ///
-    /// Environment variables are prefixed with `TRD_RUNTIME_`
+    /// Environment variables are prefixed with `DYN_RUNTIME_`
    pub fn from_settings() -> Result<RuntimeConfig> {
        let config: RuntimeConfig = Self::figment().extract()?;
        config.validate()?;
@@ -159,15 +159,15 @@ pub fn is_truthy(val: &str) -> bool {
 }

 /// Check whether JSONL logging enabled
-/// Set the `TRD_LOGGING_JSONL` environment variable a [`is_truthy`] value
+/// Set the `DYN_LOGGING_JSONL` environment variable a [`is_truthy`] value
 pub fn jsonl_logging_enabled() -> bool {
-    env_is_truthy("TRD_LOGGING_JSONL")
+    env_is_truthy("DYN_LOGGING_JSONL")
 }

 /// Check whether logging with ANSI terminal escape codes and colors is disabled.
-/// Set the `TRD_SDK_DISABLE_ANSI_LOGGING` environment variable a [`is_truthy`] value
+/// Set the `DYN_SDK_DISABLE_ANSI_LOGGING` environment variable a [`is_truthy`] value
 pub fn disable_ansi_logging() -> bool {
-    env_is_truthy("TRD_SDK_DISABLE_ANSI_LOGGING")
+    env_is_truthy("DYN_SDK_DISABLE_ANSI_LOGGING")
 }

 #[cfg(test)]
@@ -178,8 +178,8 @@ mod tests {
    fn test_runtime_config_with_env_vars() -> Result<()> {
        temp_env::with_vars(
            vec![
-                ("TRD_RUNTIME_NUM_WORKER_THREADS", Some("24")),
-                ("TRD_RUNTIME_MAX_BLOCKING_THREADS", Some("32")),
+                ("DYN_RUNTIME_NUM_WORKER_THREADS", Some("24")),
+                ("DYN_RUNTIME_MAX_BLOCKING_THREADS", Some("32")),
            ],
            || {
                let config = RuntimeConfig::from_settings()?;
@@ -194,8 +194,8 @@ mod tests {
    fn test_runtime_config_defaults() -> Result<()> {
        temp_env::with_vars(
            vec![
-                ("TRD_RUNTIME_NUM_WORKER_THREADS", None::<&str>),
-                ("TRD_RUNTIME_MAX_BLOCKING_THREADS", Some("")),
+                ("DYN_RUNTIME_NUM_WORKER_THREADS", None::<&str>),
+                ("DYN_RUNTIME_MAX_BLOCKING_THREADS", Some("")),
            ],
            || {
                let config = RuntimeConfig::from_settings()?;
@@ -215,8 +215,8 @@ mod tests {
    fn test_runtime_config_rejects_invalid_thread_count() -> Result<()> {
        temp_env::with_vars(
            vec![
-                ("TRD_RUNTIME_NUM_WORKER_THREADS", Some("0")),
-                ("TRD_RUNTIME_MAX_BLOCKING_THREADS", Some("0")),
+                ("DYN_RUNTIME_NUM_WORKER_THREADS", Some("0")),
+                ("DYN_RUNTIME_MAX_BLOCKING_THREADS", Some("0")),
            ],
            || {
                let result = RuntimeConfig::from_settings();

--- a/lib/runtime/src/logging.rs
+++ b/lib/runtime/src/logging.rs
@@ -13,17 +13,17 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-//! Triton Distributed Logging Module.
+//! Dynemo Distributed Logging Module.
 //!
 //! - Configuration loaded from:
 //!   1. Environment variables (highest priority).
-//!   2. Optional TOML file pointed to by the `TRD_LOGGING_CONFIG_PATH` environment variable.
-//!   3. `/opt/triton/etc/logging.toml`.
+//!   2. Optional TOML file pointed to by the `DYN_LOGGING_CONFIG_PATH` environment variable.
+//!   3. `/opt/dynemo/etc/logging.toml`.
 //!
 //! Logging can take two forms: `READABLE` or `JSONL`. The default is `READABLE`. `JSONL`
-//! can be enabled by setting the `TRD_LOGGING_JSONL` environment variable to `1`.
+//! can be enabled by setting the `DYN_LOGGING_JSONL` environment variable to `1`.
 //!
-//! Filters can be configured using the `TRD_LOG` environment variable or by setting the `filters`
+//! Filters can be configured using the `DYN_LOG` environment variable or by setting the `filters`
 //! key in the TOML configuration file. Filters are comma-separated key-value pairs where the key
 //! is the crate or module name and the value is the log level. The default log level is `error`.
 //!
@@ -53,13 +53,13 @@ use tracing_subscriber::EnvFilter;
 use tracing_subscriber::{filter::Directive, fmt};

 /// ENV used to set the log level
-const FILTER_ENV: &str = "TRD_LOG";
+const FILTER_ENV: &str = "DYN_LOG";

 /// Default log level
 const DEFAULT_FILTER_LEVEL: &str = "error";

 /// ENV used to set the path to the logging configuration file
-const CONFIG_PATH_ENV: &str = "TRD_LOGGING_CONFIG_PATH";
+const CONFIG_PATH_ENV: &str = "DYN_LOGGING_CONFIG_PATH";

 /// Once instance to ensure the logger is only initialized once
 static INIT: Once = Once::new();
@@ -155,7 +155,7 @@ fn load_config() -> LoggingConfig {
    let config_path = std::env::var(CONFIG_PATH_ENV).unwrap_or_else(|_| "".to_string());
    let figment = Figment::new()
        .merge(Serialized::defaults(LoggingConfig::default()))
-        .merge(Toml::file("/opt/triton/etc/logging.toml"))
+        .merge(Toml::file("/opt/dynemo/etc/logging.toml"))
        .merge(Toml::file(config_path));

    figment.extract().unwrap()

--- a/lib/runtime/src/pipeline/context.rs
+++ b/lib/runtime/src/pipeline/context.rs
@@ -299,7 +299,7 @@ impl<T: Send + Sync + 'static> From<Context<T>> for StreamContext {
    }
 }

-// TODO - refactor here - this came from the triton-llm-async-engine crate
+// TODO - refactor here - this came from the dynemo-llm-async-engine crate

 use tokio::sync::watch::{channel, Receiver, Sender};


--- a/lib/runtime/src/pipeline/registry.rs
+++ b/lib/runtime/src/pipeline/registry.rs
@@ -22,7 +22,7 @@ use std::sync::Arc;
 /// # Examples
 ///
 /// ```
-/// use triton_distributed_runtime::pipeline::registry::Registry;
+/// use dynemo_runtime::pipeline::registry::Registry;
 ///
 /// let mut registry = Registry::new();
 ///

--- a/lib/runtime/src/protocols.rs
+++ b/lib/runtime/src/protocols.rs
@@ -95,7 +95,7 @@ impl From<&str> for Endpoint {
    ///
    /// # Examples
    /// ```ignore
-    /// use triton_distributed::protocols::Endpoint;
+    /// use dynemo_runtime:protocols::Endpoint;
    ///
    /// let endpoint = Endpoint::from("namespace/component/endpoint");
    /// assert_eq!(endpoint.namespace, "namespace");
@@ -150,7 +150,7 @@ impl FromStr for Endpoint {
    /// # Examples
    /// ```ignore
    /// use std::str::FromStr;
-    /// use triton_distributed::protocols::Endpoint;
+    /// use dynemo_runtime:protocols::Endpoint;
    ///
    /// let endpoint: Endpoint = "namespace/component/endpoint".parse().unwrap();
    /// assert_eq!(endpoint.namespace, "namespace");

--- a/lib/runtime/src/worker.rs
+++ b/lib/runtime/src/worker.rs
@@ -18,17 +18,17 @@
 //!
 //! In the future, the [Worker] should probably be moved to a procedural macro similar
 //! to the `#[tokio::main]` attribute, where we might annotate an async main function with
-//! `#[triton::main]` or similar.
+//! `#[dynemo::main]` or similar.
 //!
 //! The [Worker::execute] method is designed to be called once from main and will block
 //! the calling thread until the application completes or is canceled. The method initialized
 //! the signal handler used to trap `SIGINT` and `SIGTERM` signals and trigger a graceful shutdown.
 //!
 //! On termination, the user application is given a graceful shutdown period of controlled by
-//! the [TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT] environment variable. If the application does not
+//! the [DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT] environment variable. If the application does not
 //! shutdown in time, the worker will terminate the application with an exit code of 911.
 //!
-//! The default values of [TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT] differ between the development
+//! The default values of [DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT] differ between the development
 //! and release builds. In development, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_DEBUG] and
 //! in release, the default is [DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_RELEASE].

@@ -45,10 +45,10 @@ static INIT: OnceCell<Mutex<Option<tokio::task::JoinHandle<Result<()>>>>> = Once
 const SHUTDOWN_MESSAGE: &str =
    "Application received shutdown signal; attempting to gracefully shutdown";
 const SHUTDOWN_TIMEOUT_MESSAGE: &str =
-    "Use TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT to control the graceful shutdown timeout";
+    "Use DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT to control the graceful shutdown timeout";

 /// Environment variable to control the graceful shutdown timeout
-pub const TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT: &str = "TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT";
+pub const DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT: &str = "DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT";

 /// Default graceful shutdown timeout in seconds in debug mode
 pub const DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_DEBUG: u64 = 5;
@@ -106,7 +106,7 @@ impl Worker {
        let primary = runtime.primary();
        let secondary = runtime.secondary();

-        let timeout = std::env::var(TRD_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT)
+        let timeout = std::env::var(DYN_WORKER_GRACEFUL_SHUTDOWN_TIMEOUT)
            .ok()
            .and_then(|s| s.parse::<u64>().ok())
            .unwrap_or({

--- a/lib/runtime/tests/common/engines.rs
+++ b/lib/runtime/tests/common/engines.rs
@@ -21,12 +21,12 @@ use async_trait::async_trait;
 use futures::Stream;
 use tokio::sync::mpsc;

-use triton_distributed_runtime::engine::{
+use dynemo_runtime::engine::{
    AsyncEngine, AsyncEngineContext, AsyncEngineContextProvider, AsyncEngineStream,
    Data as DataType, Engine, EngineStream,
 };

-use triton_distributed_runtime::pipeline::{
+use dynemo_runtime::pipeline::{
    context::{Context, StreamContext},
    Error, ManyOut, SingleIn,
 };