refactor: rename triton_distributed to dynemo (#22)

Co-authored-by: Graham King <grahamk@nvidia.com>

refactor: rename triton_distributed to dynemo (#22)
Co-authored-by: Graham King <grahamk@nvidia.com>
1af7433b · Neelay Shah · GitHub · ee4ef06b · 1af7433b · 1af7433b
Commit 1af7433b authored Mar 05, 2025 by Neelay Shah Committed by GitHub Mar 05, 2025
20 changed files
--- a/.dockerignore
+++ b/.dockerignore
@@ -19,7 +19,7 @@
 **/*.plan
 **/.cache/*
 **/*onnx*
-# Engine must be allowed because code contains triton_distributed_engine.py
+# Engine must be allowed because code contains dynemo_engine.py
 **/*tensorrtllm_engines*
 **/*tensorrtllm_models*
 **/*tensorrtllm_checkpoints*

--- a/.github/workflows/pre-merge.yml
+++ b/.github/workflows/pre-merge.yml
@@ -22,25 +22,6 @@ on:

 jobs:

-  # icp_validation:
-  #   runs-on: ubuntu-latest
-  #   container:
-  #     image: ghcr.io/triton-inference-server/triton3/python_ci:0.1.9
-  #     env:
-  #       BUILD_NUMBER: ${{ github.job }}
-  #       CUDA_VISIBLE_DEVICES: -1
-  #       PATH: /opt/tritonserver/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/ucx/bin:/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/mpi/bin:/usr/local/sbin
-  #     volumes:
-  #     - ${{ github.workspace }}:/workspace
-  #   permissions:
-  #     contents: read
-  #     packages: read
-  #   steps:
-  #   - uses: actions/checkout@v4
-  #   - run: ./icp/protos/gen_python.sh
-  #   - run: pytest --verbose icp
-  #     timeout-minutes: 3
-
  pre-commit:
    runs-on: ubuntu-latest
    permissions:
@@ -52,41 +33,3 @@ jobs:
      timeout-minutes: 3


-
-  # providers_validation:
-  #   runs-on: ubuntu-latest
-  #   container:
-  #     image: ghcr.io/triton-inference-server/triton3/python_ci:0.1.9
-  #     env:
-  #       BUILD_NUMBER: ${{ github.job }}
-  #       CUDA_VISIBLE_DEVICES: -1
-  #       PATH: /opt/tritonserver/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/ucx/bin:/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/mpi/bin:/usr/local/sbin
-  #       PROTO_OUT: /python/icp/protos
-  #     volumes:
-  #     - ${{ github.workspace }}:/workspace
-  #   permissions:
-  #     contents: read
-  #     packages: read
-  #   steps:
-  #   - uses: actions/checkout@v4
-  #   - run: pytest --verbose providers
-
-  # worker_validation:
-  #   runs-on: ubuntu-latest
-  #   container:
-  #     image: ghcr.io/triton-inference-server/triton3/python_ci:0.1.9
-  #     env:
-  #       BUILD_NUMBER: ${{ github.job }}
-  #       CUDA_VISIBLE_DEVICES: -1
-  #       PATH: /opt/tritonserver/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/ucx/bin:/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/mpi/bin:/usr/local/sbin
-  #       PROTO_OUT: /python/icp/protos
-  #     volumes:
-  #     - ${{ github.workspace }}:/workspace
-  #   permissions:
-  #     contents: read
-  #     packages: read
-  #   steps:
-  #   - uses: actions/checkout@v4
-  #   - run: ./icp/protos/gen_python.sh
-  #   - run: pytest -p no:warnings --verbose worker/python/tests
-  #     timeout-minutes: 2
--- a/ATTRIBUTIONS.md
+++ b/ATTRIBUTIONS.md
@@ -17,7 +17,7 @@ limitations under the License.

 # Open Source License Attribution

-   Triton Distributed uses Open Source components. You can find the details of these open-source projects along with license information below.
+   Dynemo uses Open Source components. You can find the details of these open-source projects along with license information below.
   We are grateful to the developers for their contributions to open source and acknowledge these below.

 ## nats-py - [Apache License 2.0](https://github.com/nats-io/nats.py/blob/main/LICENSE)

--- a/README.md
+++ b/README.md
@@ -71,7 +71,7 @@ The run script offers a few common workflows:
 1. Running a command in a container and exiting.

 ```
-./container/run.sh -- python3 -c "import triton_distributed.runtime; help(triton_distributed.runtime)"
+./container/run.sh -- python3 -c "import dynemo.runtime; help(dynemo.runtime)"
 ```

 2. Starting an interactive shell.

--- a/applications/llm/count/Cargo.lock
+++ b/applications/llm/count/Cargo.lock
@@ -737,6 +737,8 @@ version = "0.1.0"
 dependencies = [
 "axum 0.6.20",
 "clap",
+ "dynemo-llm",
+ "dynemo-runtime",
 "opentelemetry",
 "opentelemetry-prometheus",
 "prometheus",
@@ -747,8 +749,6 @@ dependencies = [
 "thiserror 1.0.69",
 "tokio",
 "tracing",
- "triton-distributed-llm",
- "triton-distributed-runtime",
 ]

 [[package]]
@@ -1024,6 +1024,99 @@ dependencies = [
 "syn 2.0.98",
 ]

+[[package]]
+name = "dynemo-llm"
+version = "0.2.1"
+dependencies = [
+ "anyhow",
+ "async-openai",
+ "async-stream",
+ "async-trait",
+ "axum 0.8.1",
+ "bindgen",
+ "blake3",
+ "bs62",
+ "bytes",
+ "chrono",
+ "cmake",
+ "derive_builder",
+ "dynemo-runtime",
+ "either",
+ "erased-serde",
+ "futures",
+ "galil-seiferas",
+ "indexmap 2.7.1",
+ "itertools 0.14.0",
+ "libc",
+ "minijinja",
+ "minijinja-contrib",
+ "prometheus",
+ "pyo3",
+ "regex",
+ "semver",
+ "serde",
+ "serde-pickle",
+ "serde_json",
+ "serde_repr",
+ "strum",
+ "thiserror 2.0.11",
+ "tokenizers",
+ "tokio",
+ "tokio-stream",
+ "tokio-util",
+ "toktrie",
+ "toktrie_hf_tokenizers",
+ "tracing",
+ "unicode-segmentation",
+ "uuid",
+ "validator",
+ "xxhash-rust",
+]
+
+[[package]]
+name = "dynemo-runtime"
+version = "0.2.1"
+dependencies = [
+ "anyhow",
+ "async-nats",
+ "async-once-cell",
+ "async-stream",
+ "async-trait",
+ "async_zmq",
+ "blake3",
+ "bytes",
+ "chrono",
+ "derive-getters",
+ "derive_builder",
+ "educe",
+ "either",
+ "etcd-client",
+ "figment",
+ "futures",
+ "humantime",
+ "local-ip-address",
+ "log",
+ "nid",
+ "nix",
+ "nuid",
+ "once_cell",
+ "prometheus",
+ "rand",
+ "regex",
+ "serde",
+ "serde_json",
+ "socket2",
+ "thiserror 1.0.69",
+ "tokio",
+ "tokio-stream",
+ "tokio-util",
+ "tracing",
+ "tracing-subscriber",
+ "uuid",
+ "validator",
+ "xxhash-rust",
+]
+
 [[package]]
 name = "ed25519"
 version = "2.2.3"
@@ -4232,99 +4325,6 @@ dependencies = [
 "tracing-serde",
 ]

-[[package]]
-name = "triton-distributed-llm"
-version = "0.2.1"
-dependencies = [
- "anyhow",
- "async-openai",
- "async-stream",
- "async-trait",
- "axum 0.8.1",
- "bindgen",
- "blake3",
- "bs62",
- "bytes",
- "chrono",
- "cmake",
- "derive_builder",
- "either",
- "erased-serde",
- "futures",
- "galil-seiferas",
- "indexmap 2.7.1",
- "itertools 0.14.0",
- "libc",
- "minijinja",
- "minijinja-contrib",
- "prometheus",
- "pyo3",
- "regex",
- "semver",
- "serde",
- "serde-pickle",
- "serde_json",
- "serde_repr",
- "strum",
- "thiserror 2.0.11",
- "tokenizers",
- "tokio",
- "tokio-stream",
- "tokio-util",
- "toktrie",
- "toktrie_hf_tokenizers",
- "tracing",
- "triton-distributed-runtime",
- "unicode-segmentation",
- "uuid",
- "validator",
- "xxhash-rust",
-]
-
-[[package]]
-name = "triton-distributed-runtime"
-version = "0.2.1"
-dependencies = [
- "anyhow",
- "async-nats",
- "async-once-cell",
- "async-stream",
- "async-trait",
- "async_zmq",
- "blake3",
- "bytes",
- "chrono",
- "derive-getters",
- "derive_builder",
- "educe",
- "either",
- "etcd-client",
- "figment",
- "futures",
- "humantime",
- "local-ip-address",
- "log",
- "nid",
- "nix",
- "nuid",
- "once_cell",
- "prometheus",
- "rand",
- "regex",
- "serde",
- "serde_json",
- "socket2",
- "thiserror 1.0.69",
- "tokio",
- "tokio-stream",
- "tokio-util",
- "tracing",
- "tracing-subscriber",
- "uuid",
- "validator",
- "xxhash-rust",
-]
-
 [[package]]
 name = "try-lock"
 version = "0.2.5"

--- a/applications/llm/count/Cargo.toml
+++ b/applications/llm/count/Cargo.toml
@@ -21,8 +21,8 @@ license = "Apache-2.0"

 [dependencies]
 # local
-triton-distributed-runtime = { path = "../../../lib/runtime" }
-triton-distributed-llm = { path = "../../../lib/llm" }
+dynemo-runtime = { path = "../../../lib/runtime" }
+dynemo-llm = { path = "../../../lib/llm" }

 # workspace - todo


--- a/applications/llm/count/README.md
+++ b/applications/llm/count/README.md
@@ -8,17 +8,17 @@ the services associated with that endpoint, do some postprocessing on them,
 and then publish an event with the postprocessed data.

 ```bash
-# For more details, try TRD_LOG=debug
-TRD_LOG=info cargo run --bin count -- --namespace triton-init --component backend --endpoint generate
+# For more details, try DYN_LOG=debug
+DYN_LOG=info cargo run --bin count -- --namespace dynemo --component backend --endpoint generate

-# 2025-02-26T18:45:05.467026Z  INFO count: Creating unique instance of Count at triton-init/components/count/instance
-# 2025-02-26T18:45:05.472146Z  INFO count: Scraping service triton_init_backend_720278f8 and filtering on subject triton_init_backend_720278f8.generate
+# 2025-02-26T18:45:05.467026Z  INFO count: Creating unique instance of Count at dynemo/components/count/instance
+# 2025-02-26T18:45:05.472146Z  INFO count: Scraping service dynemo_init_backend_720278f8 and filtering on subject dynemo_init_backend_720278f8.generate
 # ...
 ```

 With no matching endpoints running, you should see warnings in the logs:
 ```bash
-2025-02-26T18:45:06.474161Z  WARN count: No endpoints found matching subject triton_init_backend_720278f8.generate
+2025-02-26T18:45:06.474161Z  WARN count: No endpoints found matching subject dynemo_init_backend_720278f8.generate
 ```

 To see metrics published to a matching endpoint, you can use the
@@ -35,7 +35,7 @@ since the endpoint will automatically get discovered.
 When stats are found from the target endpoints being listened on, count will
 aggregate and publish some metrics as both an event and to a prometheus web server:
 ```
-2025-02-28T04:05:58.077901Z  INFO count: Aggregated metrics: ProcessedEndpoints { endpoints: [Endpoint { name: "worker-7587884888253033398", subject: "triton_init_backend_720278f8.generate-694d951a80e06bb6", data: ForwardPassMetrics { request_active_slots: 58, request_total_slots: 100, kv_active_blocks: 77, kv_total_blocks: 100 } }, Endpoint { name: "worker-7587884888253033401", subject: "triton_init_backend_720278f8.generate-694d951a80e06bb9", data: ForwardPassMetrics { request_active_slots: 71, request_total_slots: 100, kv_active_blocks: 29, kv_total_blocks: 100 } }], worker_ids: [7587884888253033398, 7587884888253033401], load_avg: 53.0, load_std: 24.0 }
+2025-02-28T04:05:58.077901Z  INFO count: Aggregated metrics: ProcessedEndpoints { endpoints: [Endpoint { name: "worker-7587884888253033398", subject: "dynemo_init_backend_720278f8.generate-694d951a80e06bb6", data: ForwardPassMetrics { request_active_slots: 58, request_total_slots: 100, kv_active_blocks: 77, kv_total_blocks: 100 } }, Endpoint { name: "worker-7587884888253033401", subject: "dynemo_init_backend_720278f8.generate-694d951a80e06bb9", data: ForwardPassMetrics { request_active_slots: 71, request_total_slots: 100, kv_active_blocks: 29, kv_total_blocks: 100 } }], worker_ids: [7587884888253033398, 7587884888253033401], load_avg: 53.0, load_std: 24.0 }
 ```

 To see the metrics being published in prometheus format, you can run:

--- a/applications/llm/count/src/bin/mock_worker.rs
+++ b/applications/llm/count/src/bin/mock_worker.rs
@@ -13,10 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use rand::Rng;
-use std::sync::Arc;
-use triton_distributed_llm::kv_router::protocols::ForwardPassMetrics;
-use triton_distributed_runtime::{
+use dynemo_llm::kv_router::protocols::ForwardPassMetrics;
+use dynemo_runtime::{
    logging,
    pipeline::{
        async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
@@ -25,6 +23,8 @@ use triton_distributed_runtime::{
    protocols::annotated::Annotated,
    stream, DistributedRuntime, Result, Runtime, Worker,
 };
+use rand::Rng;
+use std::sync::Arc;

 fn main() -> Result<()> {
    logging::init();
@@ -69,7 +69,7 @@ async fn backend(runtime: DistributedRuntime) -> Result<()> {
    // we must first create a service, then we can attach one more more endpoints

    runtime
-        .namespace("triton-init")?
+        .namespace("dynemo")?
        .component("backend")?
        .service_builder()
        .create()

--- a/applications/llm/count/src/lib.rs
+++ b/applications/llm/count/src/lib.rs
@@ -20,13 +20,11 @@ use prometheus::register_gauge_vec;
 use serde::{Deserialize, Serialize};
 use std::net::SocketAddr;

-use triton_distributed_llm::kv_router::protocols::ForwardPassMetrics;
-use triton_distributed_llm::kv_router::scheduler::Endpoint;
-use triton_distributed_llm::kv_router::scoring::ProcessedEndpoints;
+use dynemo_llm::kv_router::protocols::ForwardPassMetrics;
+use dynemo_llm::kv_router::scheduler::Endpoint;
+use dynemo_llm::kv_router::scoring::ProcessedEndpoints;

-use triton_distributed_runtime::{
-    distributed::Component, service::EndpointInfo, utils::Duration, Result,
-};
+use dynemo_runtime::{distributed::Component, service::EndpointInfo, utils::Duration, Result};

 /// Configuration for LLM worker load capacity metrics
 #[derive(Debug, Clone, Serialize, Deserialize)]

--- a/applications/llm/count/src/main.rs
+++ b/applications/llm/count/src/main.rs
@@ -24,7 +24,7 @@
 //!   - KV Cache Blocks: [Active, Total]

 use clap::Parser;
-use triton_distributed_runtime::{
+use dynemo_runtime::{
    error, logging,
    traits::events::EventPublisher,
    utils::{Duration, Instant},
@@ -50,7 +50,7 @@ struct Args {
    endpoint: String,

    /// Namespace to operate in
-    #[arg(long, env = "TRD_NAMESPACE", default_value = "triton-init")]
+    #[arg(long, env = "DYN_NAMESPACE", default_value = "dynemo")]
    namespace: String,

    /// Polling interval in seconds (minimum 1 second)
@@ -155,7 +155,7 @@ mod tests {

    #[test]
    fn test_namespace_from_env() {
-        env::set_var("TRD_NAMESPACE", "test-namespace");
+        env::set_var("DYN_NAMESPACE", "test-namespace");
        let args = Args::parse_from(["count", "--component", "comp", "--endpoint", "end"]);
        assert_eq!(args.namespace, "test-namespace");
    }

--- a/container/Dockerfile
+++ b/container/Dockerfile
@@ -16,7 +16,7 @@
 ARG BASE_IMAGE="nvcr.io/nvidia/tritonserver"
 ARG BASE_IMAGE_TAG="25.01-py3"

-FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS triton-distributed
+FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS dynemo

 # TODO: non root user by default

@@ -34,7 +34,7 @@ RUN rustup toolchain install 1.85.0-x86_64-unknown-linux-gnu

 # Install OpenAI-compatible frontend and its dependencies from triton server
 # repository. These are used to have a consistent interface, schema, and FastAPI
-# app between Triton Core and Triton Distributed implementations.
+# app between Triton Core and Dynemo implementations.
 ARG OPENAI_SERVER_TAG="r25.01"
 RUN mkdir -p /opt/tritonserver/python && \
    cd /opt/tritonserver/python && \
@@ -78,7 +78,7 @@ ARG TENSORRTLLM_SKIP_CLONE=
 ENV FRAMEWORK=${FRAMEWORK}
 RUN --mount=type=bind,source=./container/deps/requirements.tensorrtllm.txt,target=/tmp/requirements.txt \
    --mount=type=bind,source=./container/deps/clone_tensorrtllm.sh,target=/tmp/clone_tensorrtllm.sh \
-    if [[ "$FRAMEWORK" == "TENSORRTLLM" ]] ; then pip install --timeout=2000 -r /tmp/requirements.txt; if [ ${TENSORRTLLM_SKIP_CLONE} -ne 1 ] ; then /tmp/clone_tensorrtllm.sh --tensorrtllm-backend-repo-tag ${TENSORRTLLM_BACKEND_REPO_TAG} --tensorrtllm-backend-rebuild ${TENSORRTLLM_BACKEND_REBUILD} --triton-llm-path /opt/triton/llm_binding ; fi ; fi
+    if [[ "$FRAMEWORK" == "TENSORRTLLM" ]] ; then pip install --timeout=2000 -r /tmp/requirements.txt; if [ ${TENSORRTLLM_SKIP_CLONE} -ne 1 ] ; then /tmp/clone_tensorrtllm.sh --tensorrtllm-backend-repo-tag ${TENSORRTLLM_BACKEND_REPO_TAG} --tensorrtllm-backend-rebuild ${TENSORRTLLM_BACKEND_REBUILD} --triton-llm-path /opt/dynemo/llm_binding ; fi ; fi


 RUN --mount=type=bind,source=./container/deps/requirements.standard.txt,target=/tmp/requirements.txt \
@@ -106,7 +106,7 @@ ENV VLLM_GENERATE_WORKERS=${VLLM_FRAMEWORK:+1}
 ENV VLLM_BASELINE_TP_SIZE=${VLLM_FRAMEWORK:+1}
 ENV VLLM_CONTEXT_TP_SIZE=${VLLM_FRAMEWORK:+1}
 ENV VLLM_GENERATE_TP_SIZE=${VLLM_FRAMEWORK:+1}
-ENV VLLM_KV_CAPI_PATH="/opt/triton/llm_binding/lib/libtriton_llm_capi.so"
+ENV VLLM_KV_CAPI_PATH="/opt/dynemo/llm_binding/lib/libdynemo_llm_capi.so"
 ENV PYTHONUNBUFFERED=1

 # Install NATS - pointing toward NATS github instead of binaries.nats.dev due to server instability
@@ -159,27 +159,27 @@ COPY lib/bindings /workspace/lib/bindings
 RUN cd lib/bindings/c/ && \
    cargo build --release --locked && cargo doc --no-deps

-# Install uv, create virtualenv for general use, and build triton_distributed  wheel
+# Install uv, create virtualenv for general use, and build dynemo wheel
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
-RUN mkdir /opt/triton && \
-    uv venv /opt/triton/venv --python 3.12 && \
-    source /opt/triton/venv/bin/activate && \
+RUN mkdir /opt/dynemo && \
+    uv venv /opt/dynemo/venv --python 3.12 && \
+    source /opt/dynemo/venv/bin/activate && \
    uv build --wheel --out-dir /workspace/dist && \
-    uv pip install /workspace/dist/triton_distributed*cp312*.whl
+    uv pip install /workspace/dist/dynemo*cp312*.whl

 # Package the bindings
-RUN mkdir -p /opt/triton/bindings/wheels && \
-    mkdir /opt/triton/bindings/lib && \
-    cp dist/triton_distributed*cp312*.whl /opt/triton/bindings/wheels/. && \
-    cp lib/bindings/c/target/release/libtriton_distributed_llm_capi.so /opt/triton/bindings/lib/. && \
-    cp -r lib/bindings/c/include /opt/triton/bindings/.
+RUN mkdir -p /opt/dynemo/bindings/wheels && \
+    mkdir /opt/dynemo/bindings/lib && \
+    cp dist/dynemo*cp312*.whl /opt/dynemo/bindings/wheels/. && \
+    cp lib/bindings/c/target/release/libdynemo_llm_capi.so /opt/dynemo/bindings/lib/. && \
+    cp -r lib/bindings/c/include /opt/dynemo/bindings/.

-# Install triton_distributed_runtime and triton_distributed_llm wheels globally in container for tests that
+# Install dynemo.runtime and dynemo.llm wheels globally in container for tests that
 # currently run without virtual environment activated.
 # TODO: In future, we may use a virtualenv for everything and remove this.
-RUN pip install /opt/triton/bindings/wheels/triton_distributed*cp312*.whl
+RUN pip install /opt/dynemo/bindings/wheels/dynemo*cp312*.whl

-# Copy everything in after install steps to avoid re-running build/install
+# Copy everything in after ginstall steps to avoid re-running build/install
 # commands on unrelated changes in other dirs.
 COPY . /workspace


--- a/container/Dockerfile.vllm
+++ b/container/Dockerfile.vllm
@@ -24,17 +24,17 @@ ENV PATH=/usr/local/bin/etcd/:$PATH

 # Install uv and create virtualenv
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
-RUN mkdir /opt/triton && \
-    uv venv /opt/triton/venv --python 3.12
+RUN mkdir /opt/dynemo && \
+    uv venv /opt/dynemo/venv --python 3.12

 # Activate virtual environment
-ENV VIRTUAL_ENV=/opt/triton/venv
+ENV VIRTUAL_ENV=/opt/dynemo/venv
 ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

 # Install patched vllm - keep this early in Dockerfile to avoid
 # rebuilds from unrelated source code changes
 ARG VLLM_REF="v0.7.2"
-ARG VLLM_PATCH="vllm_${VLLM_REF}-triton-kv-disagg-patch.patch"
+ARG VLLM_PATCH="vllm_${VLLM_REF}-dynemo-kv-disagg-patch.patch"
 RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
    bash /tmp/deps/vllm/install.sh --patch /tmp/deps/vllm/${VLLM_PATCH} --ref ${VLLM_REF} --install-cmd "uv pip install --editable" --use-precompiled --installation-dir /opt/vllm

@@ -100,25 +100,25 @@ COPY lib/bindings /workspace/lib/bindings
 RUN cd lib/bindings/c && \
 cargo build --release --locked && cargo doc --no-deps

-# Build triton_distributed wheel
-RUN source /opt/triton/venv/bin/activate && \
+# Build dynemo wheel
+RUN source /opt/dynemo/venv/bin/activate && \
    uv build --wheel --out-dir /workspace/dist && \
-    uv pip install /workspace/dist/triton_distributed*cp312*.whl
+    uv pip install /workspace/dist/dynemo*cp312*.whl

 # Package the bindings
-RUN mkdir -p /opt/triton/bindings/wheels && \
-    mkdir /opt/triton/bindings/lib && \
-    cp dist/triton_distributed*cp312*.whl /opt/triton/bindings/wheels/. && \
-    cp lib/bindings/c/target/release/libtriton_distributed_llm_capi.so /opt/triton/bindings/lib/. && \
-    cp -r lib/bindings/c/include /opt/triton/bindings/.
+RUN mkdir -p /opt/dynemo/bindings/wheels && \
+    mkdir /opt/dynemo/bindings/lib && \
+    cp dist/dynemo*cp312*.whl /opt/dynemo/bindings/wheels/. && \
+    cp lib/bindings/c/target/release/libdynemo_llm_capi.so /opt/dynemo/bindings/lib/. && \
+    cp -r lib/bindings/c/include /opt/dynemo/bindings/.

-# Tell vllm to use the Triton LLM C API for KV Cache Routing
-ENV VLLM_KV_CAPI_PATH="/opt/triton/bindings/lib/libtriton_distributed_llm_capi.so"
+# Tell vllm to use the Dynemo LLM C API for KV Cache Routing
+ENV VLLM_KV_CAPI_PATH="/opt/dynemo/bindings/lib/libdynemo_llm_capi.so"

 # FIXME: Copy more specific folders in for dev/debug after directory restructure
 COPY . /workspace

-# FIXME: May want a modification with triton-distributed banner on entry
+# FIXME: May want a modification with dynemo-distributed banner on entry
 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

 CMD []
@@ -136,10 +136,10 @@ RUN apt update -y && \
    echo "set -g mouse on" >> /root/.tmux.conf

 # Set environment variables
-ENV VIRTUAL_ENV=/opt/triton/venv
+ENV VIRTUAL_ENV=/opt/dynemo/venv
 ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
 ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true
-ENV VLLM_KV_CAPI_PATH="/opt/triton/bindings/lib/libtriton_distributed_llm_capi.so"
+ENV VLLM_KV_CAPI_PATH="/opt/dynemo/bindings/lib/libdynemo_llm_capi.so"

 # Copy binaries
 COPY --from=dev /usr/local/bin/http /usr/local/bin/http
@@ -166,7 +166,7 @@ COPY examples/python_rs/llm/vllm /workspace/examples/python_rs/llm/vllm

 WORKDIR /workspace

-# FIXME: May want a modification with triton-distributed banner on entry
+# FIXME: May want a modification with dynemo-distributed banner on entry
 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

 CMD []
--- a/container/Dockerfile.vllm_nixl
+++ b/container/Dockerfile.vllm_nixl
@@ -150,17 +150,17 @@ ENV PATH=/usr/local/bin/etcd/:$PATH

 # Install uv and create virtualenv
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
-RUN mkdir /opt/triton && \
-    uv venv /opt/triton/venv --python 3.12
+RUN mkdir /opt/dynemo && \
+    uv venv /opt/dynemo/venv --python 3.12

 # Activate virtual environment
-ENV VIRTUAL_ENV=/opt/triton/venv
+ENV VIRTUAL_ENV=/opt/dynemo/venv
 ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"

 # Install patched vllm - keep this early in Dockerfile to avoid
 # rebuilds from unrelated source code changes
 ARG VLLM_REF="v0.7.2"
-ARG VLLM_PATCH="vllm_${VLLM_REF}-triton-kv-disagg-patch.patch"
+ARG VLLM_PATCH="vllm_${VLLM_REF}-dynemo-kv-disagg-patch.patch"
 RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
    bash /tmp/deps/vllm/install.sh --patch /tmp/deps/vllm/${VLLM_PATCH} --ref ${VLLM_REF} --install-cmd "uv pip install --editable" --use-precompiled --installation-dir /opt/vllm

@@ -225,25 +225,25 @@ COPY lib/bindings /workspace/lib/bindings
 RUN cd lib/bindings/c && \
    cargo build --release --locked && cargo doc --no-deps

-# Build triton_distributed wheel
-RUN source /opt/triton/venv/bin/activate && \
+# Build dynemo wheel
+RUN source /opt/dynemo/venv/bin/activate && \
    uv build --wheel --out-dir /workspace/dist && \
-    uv pip install /workspace/dist/triton_distributed*cp312*.whl
+    uv pip install /workspace/dist/dynemo*cp312*.whl

 # Package the bindings
-RUN mkdir -p /opt/triton/bindings/wheels && \
-    mkdir /opt/triton/bindings/lib && \
-    cp dist/triton_distributed*cp312*.whl /opt/triton/bindings/wheels/. && \
-    cp lib/bindings/c/target/release/libtriton_distributed_llm_capi.so /opt/triton/bindings/lib/. && \
-    cp -r lib/bindings/c/include /opt/triton/bindings/.
+RUN mkdir -p /opt/dynemo/bindings/wheels && \
+    mkdir /opt/dynemo/bindings/lib && \
+    cp dist/dynemo*cp312*.whl /opt/dynemo/bindings/wheels/. && \
+    cp lib/bindings/c/target/release/libdynemo_llm_capi.so /opt/dynemo/bindings/lib/. && \
+    cp -r lib/bindings/c/include /opt/dynemo/bindings/.

-# Tell vllm to use the Triton LLM C API for KV Cache Routing
-ENV VLLM_KV_CAPI_PATH="/opt/triton/bindings/lib/libtriton_distributed_llm_capi.so"
+# Tell vllm to use the Dynemo LLM C API for KV Cache Routing
+ENV VLLM_KV_CAPI_PATH="/opt/dynemo/bindings/lib/libdynemo_llm_capi.so"

 # FIXME: Copy more specific folders in for dev/debug after directory restructure
 COPY . /workspace

-# FIXME: May want a modification with triton-distributed banner on entry
+# FIXME: May want a modification with dynemo-distributed banner on entry
 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

 CMD []
@@ -261,10 +261,10 @@ RUN apt update -y && \
    echo "set -g mouse on" >> /root/.tmux.conf

 # Set environment variables
-ENV VIRTUAL_ENV=/opt/triton/venv
+ENV VIRTUAL_ENV=/opt/dynemo/venv
 ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
 ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true
-ENV VLLM_KV_CAPI_PATH="/opt/triton/bindings/lib/libtriton_distributed_llm_capi.so"
+ENV VLLM_KV_CAPI_PATH="/opt/dynemo/bindings/lib/libdynemo_llm_capi.so"

 # Copy binaries
 COPY --from=dev /usr/local/bin/http /usr/local/bin/http
@@ -291,7 +291,7 @@ COPY examples/python_rs/llm/vllm_nixl /workspace/examples/python_rs/llm/vllm_nix

 WORKDIR /workspace

-# FIXME: May want a modification with triton-distributed banner on entry
+# FIXME: May want a modification with dynemo-distributed banner on entry
 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]

 CMD []
--- a/container/deps/clone_tensorrtllm.sh
+++ b/container/deps/clone_tensorrtllm.sh
@@ -16,7 +16,7 @@

 TENSORRTLLM_BACKEND_REPO_TAG=
 TENSORRTLLM_BACKEND_REBUILD=
-TRITON_LLM_PATH=
+DYNEMO_LLM_PATH=
 GIT_TOKEN=
 GIT_REPO=

@@ -43,9 +43,9 @@ get_options() {
 		missing_requirement $1
            fi
            ;;
-    --triton-llm-path)
+    --dynemo-llm-path)
            if [ "$2" ]; then
-                TRITON_LLM_PATH=$2
+                DYNEMO_LLM_PATH=$2
                shift
            else
 		missing_requirement $1
@@ -147,9 +147,9 @@ if [ ! -z ${TENSORRTLLM_BACKEND_REBUILD} ]; then

    # Build the backend
    (cd inflight_batcher_llm/src \
-        && cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DUSE_CXX11_ABI=1 -DTRITON_LLM_PATH=$TRITON_LLM_PATH .. \
+        && cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install -DUSE_CXX11_ABI=1 -DDYNEMO_LLM_PATH=$DYNEMO_LLM_PATH .. \
        && make install \
-        && cp libtriton_tensorrtllm.so /opt/tritonserver/backends/tensorrtllm/ \
+        && cp libdynemo_tensorrtllm.so /opt/tritonserver/backends/tensorrtllm/ \
        && cp trtllmExecutorWorker /opt/tritonserver/backends/tensorrtllm/ \
    )
 fi

--- a/container/deps/vllm/vllm_v0.7.2-triton-kv-disagg-patch.patch
+++ b/container/deps/vllm/vllm_v0.7.2-triton-kv-disagg-patch.patch
--- a/deploy/compoundai/sdk/src/compoundai/cli/serve_nova.py
+++ b/deploy/compoundai/sdk/src/compoundai/cli/serve_nova.py
@@ -26,7 +26,8 @@ import typing as t
 from typing import Any

 import click
-from triton_distributed_rs import DistributedRuntime, triton_endpoint, triton_worker
+
+from dynemo.runtime import DistributedRuntime, dynemo_endpoint, dynemo_worker

 logger = logging.getLogger("compoundai.serve.nova")

@@ -102,7 +103,7 @@ def main(
            server_context.worker_index = worker_id
        class_instance = service.inner()

-        @triton_worker()
+        @dynemo_worker()
        async def worker(runtime: DistributedRuntime):
            if service_name and service_name != service.name:
                server_context.service_type = "service"
@@ -157,12 +158,12 @@ def main(
                    # Bind an instance of inner to the endpoint
                    bound_method = endpoint.func.__get__(class_instance)
                    # Only pass request type for now, use Any for response
-                    # TODO: Handle a triton_endpoint not having types
+                    # TODO: Handle a dynemo_endpoint not having types
                    # TODO: Handle multiple endpoints in a single component
-                    triton_wrapped_method = triton_endpoint(endpoint.request_type, Any)(
+                    dynemo_wrapped_method = dynemo_endpoint(endpoint.request_type, Any)(
                        bound_method
                    )
-                    result = await td_endpoint.serve_endpoint(triton_wrapped_method)
+                    result = await td_endpoint.serve_endpoint(dynemo_wrapped_method)
                    # WARNING: unreachable code :( because serve blocks
                    logger.info(f"[{run_id}] Result: {result}")
                    logger.info(f"[{run_id}] Registered endpoint '{name}'")

--- a/deploy/compoundai/sdk/src/compoundai/sdk/decorators.py
+++ b/deploy/compoundai/sdk/src/compoundai/sdk/decorators.py
@@ -50,7 +50,7 @@ class NovaEndpoint:
            if isinstance(args[1], (str, dict)):
                args[1] = self.request_type.parse_obj(args[1])  # type: ignore

-        # Convert Pydantic model to dict before passing to triton
+        # Convert Pydantic model to dict before passing to dynemo
        if len(args) > 1 and isinstance(args[1], BaseModel):
            args = list(args)  # type: ignore
            args[1] = args[1].model_dump()  # type: ignore

--- a/deploy/compoundai/sdk/src/compoundai/sdk/dependency.py
+++ b/deploy/compoundai/sdk/src/compoundai/sdk/dependency.py
@@ -72,9 +72,9 @@ class NovaClient:

                else:
                    # Create nova worker if no runtime
-                    from triton_distributed_rs import DistributedRuntime, triton_worker
+                    from dynemo.runtime import DistributedRuntime, dynemo_worker

-                    @triton_worker()
+                    @dynemo_worker()
                    async def stream_worker(runtime: DistributedRuntime):
                        try:
                            # Store runtime for future use

--- a/examples/python_rs/llm/tensorrt_llm/README.md
+++ b/examples/python_rs/llm/tensorrt_llm/README.md
@@ -90,14 +90,14 @@ Note: NATS and ETCD servers should be running and accessible from the container

 Run the server logging (with debug level logging):
 ```bash
-TRD_LOG=DEBUG http &
+DYN_LOG=DEBUG http &
 ```
 By default the server will run on port 8080.

 Add model to the server:
 ```bash
-llmctl http add chat TinyLlama/TinyLlama-1.1B-Chat-v1.0 triton-init.tensorrt-llm.chat/completions
-llmctl http add completion TinyLlama/TinyLlama-1.1B-Chat-v1.0 triton-init.tensorrt-llm.completions
+llmctl http add chat TinyLlama/TinyLlama-1.1B-Chat-v1.0 dynemo.tensorrt-llm.chat/completions
+llmctl http add completion TinyLlama/TinyLlama-1.1B-Chat-v1.0 dynemo.tensorrt-llm.completions
 ```

 #### 2. Workers
@@ -214,14 +214,14 @@ Run the container interactively with the following command:

 Run the server logging (with debug level logging):
 ```bash
-TRD_LOG=DEBUG http &
+DYN_LOG=DEBUG http &
 ```
 By default the server will run on port 8080.

 Add model to the server:
 ```bash
-llmctl http add chat TinyLlama/TinyLlama-1.1B-Chat-v1.0 triton-init.router.chat/completions
-llmctl http add completion TinyLlama/TinyLlama-1.1B-Chat-v1.0 triton-init.router.completions
+llmctl http add chat TinyLlama/TinyLlama-1.1B-Chat-v1.0 dynemo.router.chat/completions
+llmctl http add completion TinyLlama/TinyLlama-1.1B-Chat-v1.0 dynemo.router.completions
 ```

 #### 2. Workers

--- a/examples/python_rs/llm/tensorrt_llm/common/client.py
+++ b/examples/python_rs/llm/tensorrt_llm/common/client.py
@@ -19,12 +19,12 @@ import asyncio

 import uvloop

-from triton_distributed.runtime import DistributedRuntime, triton_worker
+from dynemo.runtime import DistributedRuntime, dynemo_worker

 from .protocol import Request


-@triton_worker()
+@dynemo_worker()
 async def worker(
    runtime: DistributedRuntime,
    component: str,
@@ -38,7 +38,7 @@ async def worker(
    """
    # create client
    client = (
-        await runtime.namespace("triton-init")
+        await runtime.namespace("dynemo")
        .component(component)
        .endpoint("generate")
        .client()