refactor: rename triton_distributed to dynemo (#22)

Co-authored-by: Graham King <grahamk@nvidia.com>

refactor: rename triton_distributed to dynemo (#22)
Co-authored-by: Graham King <grahamk@nvidia.com>
1af7433b · Neelay Shah · GitHub · ee4ef06b · 1af7433b · 1af7433b
Commit 1af7433b authored Mar 05, 2025 by Neelay Shah Committed by GitHub Mar 05, 2025
20 changed files
--- a/lib/bindings/python/examples/hello_world/client.py
+++ b/lib/bindings/python/examples/hello_world/client.py
@@ -17,12 +17,12 @@ import asyncio

 import uvloop

-from triton_distributed.runtime import DistributedRuntime, triton_worker
+from dynemo.runtime import DistributedRuntime, dynemo_worker


-@triton_worker()
+@dynemo_worker()
 async def worker(runtime: DistributedRuntime):
-    await init(runtime, "triton-init")
+    await init(runtime, "dynemo")


 async def init(runtime: DistributedRuntime, ns: str):

--- a/lib/bindings/python/examples/hello_world/run.py
+++ b/lib/bindings/python/examples/hello_world/run.py
@@ -21,7 +21,7 @@ import uvloop
 from client import init as client_init
 from server import init as server_init

-from triton_distributed.runtime import DistributedRuntime, triton_worker
+from dynemo.runtime import DistributedRuntime, dynemo_worker


 def random_string(length=10):
@@ -29,7 +29,7 @@ def random_string(length=10):
    return "".join(random.choices(chars, k=length))


-@triton_worker()
+@dynemo_worker()
 async def worker(runtime: DistributedRuntime):
    ns = random_string()
    task = asyncio.create_task(server_init(runtime, ns))

--- a/lib/bindings/python/examples/hello_world/server.py
+++ b/lib/bindings/python/examples/hello_world/server.py
@@ -17,7 +17,7 @@ import asyncio

 import uvloop

-from triton_distributed.runtime import DistributedRuntime, triton_worker
+from dynemo.runtime import DistributedRuntime, dynemo_worker


 class RequestHandler:
@@ -31,9 +31,9 @@ class RequestHandler:
            yield char


-@triton_worker()
+@dynemo_worker()
 async def worker(runtime: DistributedRuntime):
-    await init(runtime, "triton-init")
+    await init(runtime, "dynemo")


 async def init(runtime: DistributedRuntime, ns: str):

--- a/lib/bindings/python/examples/pipeline/backend.py
+++ b/lib/bindings/python/examples/pipeline/backend.py
@@ -17,7 +17,7 @@ import asyncio

 import uvloop

-from triton_distributed.runtime import DistributedRuntime, triton_worker
+from dynemo.runtime import DistributedRuntime, dynemo_worker

 uvloop.install()

@@ -29,7 +29,7 @@ class RequestHandler:
            yield char


-@triton_worker()
+@dynemo_worker()
 async def worker(runtime: DistributedRuntime):
    component = runtime.namespace("examples/pipeline").component("backend")
    await component.create_service()

--- a/lib/bindings/python/examples/pipeline/frontend.py
+++ b/lib/bindings/python/examples/pipeline/frontend.py
@@ -17,7 +17,7 @@ import asyncio

 import uvloop

-from triton_distributed.runtime import DistributedRuntime, triton_worker
+from dynemo.runtime import DistributedRuntime, dynemo_worker

 uvloop.install()

@@ -32,7 +32,7 @@ class RequestHandler:
            yield output.get("data")


-@triton_worker()
+@dynemo_worker()
 async def worker(runtime: DistributedRuntime):
    # client to the next component - in this case the middle component
    next = (

--- a/lib/bindings/python/examples/pipeline/middle.py
+++ b/lib/bindings/python/examples/pipeline/middle.py
@@ -17,7 +17,7 @@ import asyncio

 import uvloop

-from triton_distributed.runtime import DistributedRuntime, triton_worker
+from dynemo.runtime import DistributedRuntime, dynemo_worker

 uvloop.install()

@@ -32,7 +32,7 @@ class RequestHandler:
            yield output.get("data")


-@triton_worker()
+@dynemo_worker()
 async def worker(runtime: DistributedRuntime):
    # client to backend
    backend = (

--- a/lib/bindings/python/examples/pipeline/pipeline.py
+++ b/lib/bindings/python/examples/pipeline/pipeline.py
@@ -17,12 +17,12 @@ import asyncio

 import uvloop

-from triton_distributed.runtime import DistributedRuntime, triton_worker
+from dynemo.runtime import DistributedRuntime, dynemo_worker

 uvloop.install()


-@triton_worker()
+@dynemo_worker()
 async def worker(runtime: DistributedRuntime):
    """
    # Pipeline Example

--- a/lib/bindings/python/examples/typed/client.py
+++ b/lib/bindings/python/examples/typed/client.py
@@ -17,18 +17,16 @@ import asyncio

 from protocol import Request

-from triton_distributed.runtime import DistributedRuntime, triton_worker
+from dynemo.runtime import DistributedRuntime, dynemo_worker


-@triton_worker()
+@dynemo_worker()
 async def worker(runtime: DistributedRuntime):
    """
    Instantiate a `backend` client and call the `generate` endpoint
    """
    # get endpoint
-    endpoint = (
-        runtime.namespace("triton-init").component("backend").endpoint("generate")
-    )
+    endpoint = runtime.namespace("dynemo").component("backend").endpoint("generate")

    # create client
    client = await endpoint.client()

--- a/lib/bindings/python/examples/typed/server.py
+++ b/lib/bindings/python/examples/typed/server.py
@@ -19,11 +19,7 @@ import asyncio
 import uvloop
 from protocol import Request, Response

-from triton_distributed.runtime import (
-    DistributedRuntime,
-    triton_endpoint,
-    triton_worker,
-)
+from dynemo.runtime import DistributedRuntime, dynemo_endpoint, dynemo_worker

 uvloop.install()

@@ -33,19 +29,19 @@ class RequestHandler:
    Request handler for the generate endpoint
    """

-    @triton_endpoint(Request, Response)
+    @dynemo_endpoint(Request, Response)
    async def generate(self, request):
        for char in request.data:
            yield char


-@triton_worker()
+@dynemo_worker()
 async def worker(runtime: DistributedRuntime):
    """
    Instantiate a `backend` component and serve the `generate` endpoint
    A `Component` can serve multiple endpoints
    """
-    component = runtime.namespace("triton-init").component("backend")
+    component = runtime.namespace("dynemo").component("backend")
    await component.create_service()

    endpoint = component.endpoint("generate")

--- a/lib/bindings/python/rust/engine.rs
+++ b/lib/bindings/python/rust/engine.rs
@@ -15,8 +15,7 @@

 use std::sync::Arc;

-pub use serde::{Deserialize, Serialize};
-pub use triton_distributed_runtime::{
+pub use dynemo_runtime::{
    error,
    pipeline::{
        async_trait, AsyncEngine, AsyncEngineContextProvider, Data, ManyOut, ResponseStream,
@@ -25,6 +24,7 @@ pub use triton_distributed_runtime::{
    protocols::annotated::Annotated,
    Error, Result,
 };
+pub use serde::{Deserialize, Serialize};

 use pyo3::prelude::*;
 use pyo3_async_runtimes::TaskLocals;
@@ -192,7 +192,6 @@ where
                                // tell the python async generator to stop generating
                                // right now, this is impossible as we are not passing the context to the python async generator
                                // todo: add task-local context to the python async generator
-                                // see: https://github.com/triton-inference-server/triton_distributed/issues/130
                                ctx.stop_generating();
                                let msg = format!("critical error: invalid response object from python async generator; application-logic-mismatch: {}", e);
                                tracing::error!(request_id, "{}", msg);

--- a/lib/bindings/python/rust/lib.rs
+++ b/lib/bindings/python/rust/lib.rs
@@ -25,14 +25,14 @@ use std::{fmt::Display, sync::Arc};
 use tokio::sync::Mutex;
 use tracing_subscriber::FmtSubscriber;

-use triton_distributed_runtime::{
+use dynemo_runtime::{
    self as rs,
    pipeline::{EngineStream, ManyOut, SingleIn},
    protocols::annotated::Annotated as RsAnnotated,
    traits::DistributedRuntimeProvider,
 };

-use triton_distributed_llm::{self as llm_rs};
+use dynemo_llm::{self as llm_rs};

 mod engine;
 mod llm;

--- a/lib/bindings/python/rust/llm/backend.rs
+++ b/lib/bindings/python/rust/llm/backend.rs
@@ -19,7 +19,7 @@ use crate::llm::model_card::ModelDeploymentCard;
 use llm_rs::protocols::common::llm_backend::{BackendInput, BackendOutput};
 use llm_rs::types::Annotated;

-use triton_distributed_runtime::pipeline::{Operator, ServiceBackend, ServiceFrontend, Source};
+use dynemo_runtime::pipeline::{Operator, ServiceBackend, ServiceFrontend, Source};

 use crate::engine::PythonAsyncEngine;


--- a/lib/bindings/python/rust/llm/preprocessor.rs
+++ b/lib/bindings/python/rust/llm/preprocessor.rs
@@ -27,9 +27,9 @@ use llm_rs::{
    },
 };

-use triton_distributed_runtime::pipeline::{Operator, ServiceFrontend, Source};
+use dynemo_runtime::pipeline::{Operator, ServiceFrontend, Source};

-use triton_distributed_runtime::pipeline::{ManyOut, SegmentSink, SingleIn};
+use dynemo_runtime::pipeline::{ManyOut, SegmentSink, SingleIn};

 #[pyclass]
 pub(crate) struct OAIChatPreprocessor {

--- a/lib/bindings/python/src/triton_distributed/_core.pyi
+++ b/lib/bindings/python/src/triton_distributed/_core.pyi
@@ -26,7 +26,7 @@ RequestHandler = Callable[[JsonLike], AsyncGenerator[JsonLike, None]]

 class DistributedRuntime:
    """
-    The runtime object for a distributed NOVA applications
+    The runtime object for dynemo applications
    """

    ...

--- a/lib/bindings/python/src/triton_distributed/llm/__init__.py
+++ b/lib/bindings/python/src/triton_distributed/llm/__init__.py
@@ -13,5 +13,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from triton_distributed._core import KvMetricsPublisher as KvMetricsPublisher
-from triton_distributed._core import KvRouter as KvRouter
+from dynemo._core import KvMetricsPublisher as KvMetricsPublisher
+from dynemo._core import KvRouter as KvRouter
--- a/lib/bindings/python/src/triton_distributed/runtime/__init__.py
+++ b/lib/bindings/python/src/triton_distributed/runtime/__init__.py
--- a/lib/bindings/python/tests/soak.py
+++ b/lib/bindings/python/tests/soak.py
@@ -19,7 +19,7 @@ import string

 import uvloop

-from triton_distributed.runtime import DistributedRuntime, triton_worker
+from dynemo.runtime import DistributedRuntime, dynemo_worker

 # Soak Test
 #
@@ -31,7 +31,7 @@ from triton_distributed.runtime import DistributedRuntime, triton_worker
 # could still eventually be a problem.


-@triton_worker()
+@dynemo_worker()
 async def worker(runtime: DistributedRuntime):
    ns = random_string()
    task = asyncio.create_task(server_init(runtime, ns))

--- a/lib/bindings/python/tests/test_bindings_install.py
+++ b/lib/bindings/python/tests/test_bindings_install.py
--- a/lib/bindings/python/tests/test_etcd_bindings.py
+++ b/lib/bindings/python/tests/test_etcd_bindings.py
--- a/lib/llm/Cargo.lock
+++ b/lib/llm/Cargo.lock