chore: stragglers rename (#69)

Co-authored-by: Harrison King Saturley-Hall <hsaturleyhal@nvidia.com>

chore: stragglers rename (#69)
Co-authored-by: Harrison King Saturley-Hall <hsaturleyhal@nvidia.com>
dd31a322 · Neelay Shah · GitHub · efe82b86 · dd31a322 · dd31a322
Commit dd31a322 authored Mar 09, 2025 by Neelay Shah Committed by GitHub Mar 09, 2025
7 changed files
--- a/examples/python_rs/llm/vllm/scripts/disaggregated.sh
+++ b/examples/python_rs/llm/vllm/scripts/disaggregated.sh
@@ -99,7 +99,7 @@ PREFILL_CMD="VLLM_WORKER_MULTIPROC_METHOD=spawn CUDA_VISIBLE_DEVICES=0 \
    --max-model-len 1000 \
    --tensor-parallel-size 1 \
    --kv-transfer-config \
-    '{\"kv_connector\":\"TritonNcclConnector\",\"kv_role\":\"kv_producer\",\"kv_rank\":0,\"kv_parallel_size\":2}'"
+    '{\"kv_connector\":\"DynamoNcclConnector\",\"kv_role\":\"kv_producer\",\"kv_rank\":0,\"kv_parallel_size\":2}'"

 tmux select-pane -t 2
 tmux send-keys "$INIT_CMD && $PREFILL_CMD" C-m
@@ -115,7 +115,7 @@ DECODE_CMD="VLLM_WORKER_MULTIPROC_METHOD=spawn CUDA_VISIBLE_DEVICES=1 \
    --max-model-len 1000 \
    --tensor-parallel-size 1 \
    --kv-transfer-config \
-    '{\"kv_connector\":\"TritonNcclConnector\",\"kv_role\":\"kv_consumer\",\"kv_rank\":1,\"kv_parallel_size\":2}'"
+    '{\"kv_connector\":\"DynamoNcclConnector\",\"kv_role\":\"kv_consumer\",\"kv_rank\":1,\"kv_parallel_size\":2}'"

 tmux select-pane -t 3
 tmux send-keys "$INIT_CMD && $DECODE_CMD" C-m

--- a/examples/python_rs/llm/vllm_nixl/router/prefill_worker.py
+++ b/examples/python_rs/llm/vllm_nixl/router/prefill_worker.py
@@ -94,7 +94,7 @@ async def worker(runtime: DistributedRuntime, engine_args: AsyncEngineArgs):

        request_handler = RequestHandler(engine_client, metadata_store)

-        # TODO: integrate prefill_queue to an triton_distributed endpoint
+        # TODO: integrate prefill_queue to a dynamo endpoint
        async with PrefillQueue.get_instance(
            nats_server=prefill_queue_nats_server,
            stream_name=prefill_queue_stream_name,

--- a/examples/python_rs/llm/vllm_nixl/router/worker.py
+++ b/examples/python_rs/llm/vllm_nixl/router/worker.py
@@ -70,7 +70,7 @@ class RequestHandler:
        print("RequestHandler initialized")

    def get_remote_prefill_request_callback(self):
-        # TODO: integrate prefill_queue to an triton_distributed endpoint
+        # TODO: integrate prefill_queue to dynamo endpoint
        async def callback(request: RemotePrefillRequest):
            async with PrefillQueue.get_instance(
                nats_server=self._prefill_queue_nats_server,

--- a/lib/bindings/cpp/nvllm-trt/include/nvidia/nvllm/nvllm_trt.h
+++ b/lib/bindings/cpp/nvllm-trt/include/nvidia/nvllm/nvllm_trt.h
@@ -33,7 +33,7 @@ nvllm_trt_engine_t nvllm_trt_engine_create(const char* config_proto);
 // Create a nvLLM TRT Engine from an instance of the engine
 // This requires the raw engine pointer to be an instantiated object at the exact same
 // commit version as the version of TRTLLM used to build the nvLLM C API.
-// This is a workaround to enable the Triton TensorRT LLM backend to use nvLLM.
+// This is a workaround to enable the Dynamo TensorRT LLM backend to use nvLLM.
 nvllm_trt_engine_t nvllm_trt_engine_unsafe_create_from_executor(void* engine);

 // Source: Enqueue a streaming request via a json message to the request queue

--- a/lib/bindings/python/uv.lock
+++ b/lib/bindings/python/uv.lock
@@ -100,7 +100,7 @@ wheels = [
 ]

 [[package]]
-name = "triton-distributed-rs"
+name = "dynamo"
 version = "0.1.3"
 source = { editable = "." }
 dependencies = [

--- a/lib/runtime/src/component/service.rs
+++ b/lib/runtime/src/component/service.rs
@@ -27,8 +27,7 @@ pub type StatsHandler =
 pub type EndpointStatsHandler =
    Box<dyn FnMut(endpoint::Stats) -> serde_json::Value + Send + Sync + 'static>;

-// TODO(rename) - pending rename of project
-pub const PROJECT_NAME: &str = "Triton";
+pub const PROJECT_NAME: &str = "Dynamo";

 #[derive(Educe, Builder, Dissolve)]
 #[educe(Debug)]

--- a/lib/runtime/src/lib.rs
+++ b/lib/runtime/src/lib.rs
@@ -13,7 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-//! Triton
+//! Dynamo

 #![allow(dead_code)]
 #![allow(unused_imports)]
@@ -49,7 +49,7 @@ pub use futures::stream;
 pub use tokio_util::sync::CancellationToken;
 pub use worker::Worker;

-/// Types of Tokio runtimes that can be used to construct a Triton [Runtime].
+/// Types of Tokio runtimes that can be used to construct a Dynamo [Runtime].
 #[derive(Clone)]
 enum RuntimeType {
    Shared(Arc<tokio::runtime::Runtime>),