chore: rename dynamo (#44)

Co-authored-by: Biswa Panda <biswa.panda@gmail.com>

chore: rename dynamo (#44)
Co-authored-by: Biswa Panda <biswa.panda@gmail.com>
602352ce · Neelay Shah · GitHub · ecf53ce2 · 602352ce · 602352ce
Commit 602352ce authored Mar 08, 2025 by Neelay Shah Committed by GitHub Mar 08, 2025
20 changed files
--- a/lib/bindings/python/tests/soak.py
+++ b/lib/bindings/python/tests/soak.py
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import asyncio
-import random
-import string
-
-import uvloop
-
-from dynemo.runtime import DistributedRuntime, dynemo_worker
-
-# Soak Test
-#
-# This was a failure case for the distributed runtime. If the Rust Tokio
-# runtime is started with a small number of threads, it will starve the
-# the GIL + asyncio event loop can starve timeout the ingress handler.
-#
-# There may still be some blocking operations in the ingress handler that
-# could still eventually be a problem.
-
-
-@dynemo_worker()
-async def worker(runtime: DistributedRuntime):
-    ns = random_string()
-    task = asyncio.create_task(server_init(runtime, ns))
-    await client_init(runtime, ns)
-    runtime.shutdown()
-    await task
-
-
-async def client_init(runtime: DistributedRuntime, ns: str):
-    """
-    Instantiate a `backend` client and call the `generate` endpoint
-    """
-    # get endpoint
-    endpoint = runtime.namespace(ns).component("backend").endpoint("generate")
-
-    # create client
-    client = await endpoint.client()
-
-    # wait for an endpoint to be ready
-    await client.wait_for_endpoints()
-
-    # Issue many concurrent requests to put load on the server,
-    # the task should issue the request and process the response
-    tasks = []
-    for i in range(20000):
-        tasks.append(asyncio.create_task(do_one(client)))
-
-    await asyncio.gather(*tasks)
-
-    # ensure all tasks are done and without errors
-    error_count = 0
-    for task in tasks:
-        if task.exception():
-            error_count += 1
-
-    assert error_count == 0, f"expected 0 errors, got {error_count}"
-
-
-async def do_one(client):
-    stream = await client.generate("hello world")
-    async for char in stream:
-        pass
-
-
-async def server_init(runtime: DistributedRuntime, ns: str):
-    """
-    Instantiate a `backend` component and serve the `generate` endpoint
-    A `Component` can serve multiple endpoints
-    """
-    component = runtime.namespace(ns).component("backend")
-    await component.create_service()
-
-    endpoint = component.endpoint("generate")
-    print("Started server instance")
-    await endpoint.serve_endpoint(RequestHandler().generate)
-
-
-class RequestHandler:
-    """
-    Request handler for the generate endpoint
-    """
-
-    async def generate(self, request):
-        for char in request:
-            await asyncio.sleep(0.1)
-            yield char
-
-
-def random_string(length=10):
-    chars = string.ascii_letters + string.digits  # a-z, A-Z, 0-9
-    return "".join(random.choices(chars, k=length))
-
-
-if __name__ == "__main__":
-    uvloop.install()
-    asyncio.run(worker())
--- a/lib/bindings/python/tests/test_bindings_install.py
+++ b/lib/bindings/python/tests/test_bindings_install.py
@@ -20,7 +20,7 @@ pytestmark = pytest.mark.pre_merge

 def test_bindings_install():
    # Verify python bindings to rust can be imported
-    import dynemo.runtime as tdr
+    import dynamo.runtime as tdr

    # Placeholder to avoid unused import errors or removal by linters
    assert tdr
--- a/lib/bindings/python/tests/test_etcd_bindings.py
+++ b/lib/bindings/python/tests/test_etcd_bindings.py
@@ -15,7 +15,10 @@

 import asyncio

-from dynemo._core import DistributedRuntime
+from dynamo._core import DistributedRuntime
+
+# Todo add support for launching etcd
+# pytestmark = pytest.mark.pre_merge


 async def test_simple_put_get():

--- a/lib/bindings/python/tests/test_kv_bindings.py
+++ b/lib/bindings/python/tests/test_kv_bindings.py
@@ -24,8 +24,8 @@ from typing import List

 import pytest

-from dynemo.llm import KvIndexer, KvMetricsAggregator, KvMetricsPublisher
-from dynemo.runtime import DistributedRuntime
+from dynamo.llm import KvIndexer, KvMetricsAggregator, KvMetricsPublisher
+from dynamo.runtime import DistributedRuntime

 pytestmark = pytest.mark.pre_merge

@@ -89,7 +89,7 @@ async def test_event_handler():


 # KV events
-class DynemoResult:
+class DynamoResult:
    OK = 0
    ERR = 1

@@ -101,13 +101,13 @@ class EventPublisher:

        # load event publisher library
        self.lib = ctypes.CDLL(os.environ["VLLM_KV_CAPI_PATH"])
-        self.lib.dynemo_llm_init.argtypes = [c_char_p, c_char_p, c_int64]
-        self.lib.dynemo_llm_init.restype = c_uint32
-        result = self.lib.dynemo_llm_init(
+        self.lib.dynamo_llm_init.argtypes = [c_char_p, c_char_p, c_int64]
+        self.lib.dynamo_llm_init.restype = c_uint32
+        result = self.lib.dynamo_llm_init(
            namespace.encode(), component.encode(), worker_id
        )
-        assert result == DynemoResult.OK
-        self.lib.dynemo_kv_event_publish_stored.argtypes = [
+        assert result == DynamoResult.OK
+        self.lib.dynamo_kv_event_publish_stored.argtypes = [
            ctypes.c_uint64,  # event_id
            ctypes.POINTER(ctypes.c_uint32),  # token_ids
            ctypes.POINTER(ctypes.c_size_t),  # num_block_tokens
@@ -116,18 +116,18 @@ class EventPublisher:
            ctypes.POINTER(ctypes.c_uint64),  # parent_hash
            ctypes.c_uint64,  # lora_id
        ]
-        self.lib.dynemo_kv_event_publish_stored.restype = (
+        self.lib.dynamo_kv_event_publish_stored.restype = (
            ctypes.c_uint32
-        )  # dynemo_llm_result_t
+        )  # dynamo_llm_result_t

-        self.lib.dynemo_kv_event_publish_removed.argtypes = [
+        self.lib.dynamo_kv_event_publish_removed.argtypes = [
            ctypes.c_uint64,  # event_id
            ctypes.POINTER(ctypes.c_uint64),  # block_ids
            ctypes.c_size_t,  # num_blocks
        ]
-        self.lib.dynemo_kv_event_publish_removed.restype = (
+        self.lib.dynamo_kv_event_publish_removed.restype = (
            ctypes.c_uint32
-        )  # dynemo_llm_result_t
+        )  # dynamo_llm_result_t

    def store_event(self, tokens, lora_id):
        parent_hash = (
@@ -135,7 +135,7 @@ class EventPublisher:
            if self.event_id_counter > 0
            else None
        )
-        result = self.lib.dynemo_kv_event_publish_stored(
+        result = self.lib.dynamo_kv_event_publish_stored(
            self.event_id_counter,  # uint64_t event_id
            (ctypes.c_uint32 * len(tokens))(*tokens),  # const uint32_t *token_ids
            (ctypes.c_size_t * 1)(len(tokens)),  # const uintptr_t *num_block_tokens
@@ -147,17 +147,17 @@ class EventPublisher:
        self.block_ids.append(self.event_id_counter)
        self.event_id_counter += 1

-        assert result == DynemoResult.OK
+        assert result == DynamoResult.OK

    def remove_event(self):
-        result = self.lib.dynemo_kv_event_publish_removed(
+        result = self.lib.dynamo_kv_event_publish_removed(
            self.event_id_counter,  # uint64_t event_id
            (ctypes.c_uint64 * 1)(self.block_ids[-1]),  # const uint64_t *block_ids
            1,  # uintptr_t num_blocks
        )
        self.event_id_counter += 1

-        assert result == DynemoResult.OK
+        assert result == DynamoResult.OK


 async def test_metrics_aggregator():

--- a/lib/llm/Cargo.lock
+++ b/lib/llm/Cargo.lock
@@ -1409,7 +1409,7 @@ dependencies = [
 ]

 [[package]]
-name = "dynemo-llm"
+name = "dynamo-llm"
 version = "0.2.1"
 dependencies = [
 "anyhow",
@@ -1425,7 +1425,7 @@ dependencies = [
 "chrono",
 "cmake",
 "derive_builder",
- "dynemo-runtime",
+ "dynamo-runtime",
 "either",
 "erased-serde",
 "futures",
@@ -1470,7 +1470,7 @@ dependencies = [
 ]

 [[package]]
-name = "dynemo-runtime"
+name = "dynamo-runtime"
 version = "0.2.1"
 dependencies = [
 "anyhow",

--- a/lib/llm/Cargo.toml
+++ b/lib/llm/Cargo.toml
@@ -22,7 +22,7 @@ homepage = "https://github.com/dynemo-ai/dynemo"
 repository = "https://github.com/dynemo-ai/dynemo.git"

 [package]
-name = "dynemo-llm"
+name = "dynamo-llm"
 version.workspace = true
 edition.workspace = true
 authors.workspace = true
@@ -44,7 +44,7 @@ vulkan = ["llama-cpp-2/vulkan"]

 [workspace.dependencies]
 # local or crates.io
-dynemo-runtime = { version = "0.2.0", path = "../runtime" }
+dynamo-runtime = { version = "0.2.0", path = "../runtime" }

 # crates.io
 anyhow = { version = "1" }
@@ -67,7 +67,7 @@ strum = { version = "0.27", features = ["derive"] }
 [dependencies]

 # repo
-dynemo-runtime = { workspace = true }
+dynamo-runtime = { workspace = true }

 # workspace
 anyhow = { workspace = true }

--- a/lib/llm/src/backend.rs
+++ b/lib/llm/src/backend.rs
@@ -34,7 +34,7 @@ use futures::stream::{self, StreamExt};
 use tracing as log;

 use crate::model_card::model::{ModelDeploymentCard, TokenizerKind};
-use dynemo_runtime::{
+use dynamo_runtime::{
    pipeline::{
        async_trait, AsyncEngineContextProvider, ManyOut, Operator, ResponseStream,
        ServerStreamingEngine, SingleIn,

--- a/lib/llm/src/engines/llamacpp.rs
+++ b/lib/llm/src/engines/llamacpp.rs
@@ -22,11 +22,11 @@ use std::{
 use anyhow::Context;
 use async_stream::stream;
 use async_trait::async_trait;
-use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
-use dynemo_runtime::pipeline::error as pipeline_error;
-use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
-use dynemo_runtime::protocols::annotated::Annotated;
-use dynemo_runtime::CancellationToken;
+use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
+use dynamo_runtime::pipeline::error as pipeline_error;
+use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
+use dynamo_runtime::protocols::annotated::Annotated;
+use dynamo_runtime::CancellationToken;
 use llama_cpp_2::{
    context::{params::LlamaContextParams, LlamaContext},
    llama_backend::LlamaBackend,

--- a/lib/llm/src/engines/mistralrs.rs
+++ b/lib/llm/src/engines/mistralrs.rs
@@ -28,10 +28,10 @@ use mistralrs::{
 };
 use tokio::sync::mpsc::channel;

-use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
-use dynemo_runtime::pipeline::error as pipeline_error;
-use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
-use dynemo_runtime::protocols::annotated::Annotated;
+use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
+use dynamo_runtime::pipeline::error as pipeline_error;
+use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
+use dynamo_runtime::protocols::annotated::Annotated;

 use crate::protocols::openai::chat_completions::{
    NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,

--- a/lib/llm/src/engines/python.rs
+++ b/lib/llm/src/engines/python.rs
@@ -16,8 +16,8 @@
 use std::ffi::CStr;
 use std::{path::Path, sync::Arc};

-use dynemo_runtime::pipeline::error as pipeline_error;
-pub use dynemo_runtime::{
+use dynamo_runtime::pipeline::error as pipeline_error;
+pub use dynamo_runtime::{
    error,
    pipeline::{
        async_trait, AsyncEngine, AsyncEngineContextProvider, Data, ManyOut, ResponseStream,

--- a/lib/llm/src/engines/sglang.rs
+++ b/lib/llm/src/engines/sglang.rs
@@ -17,8 +17,8 @@ use std::path::Path;
 use std::sync::Arc;

 use crate::backend::ExecutionContext;
-use dynemo_runtime::pipeline::error as pipeline_error;
-use dynemo_runtime::CancellationToken;
+use dynamo_runtime::pipeline::error as pipeline_error;
+use dynamo_runtime::CancellationToken;

 mod worker;


--- a/lib/llm/src/engines/sglang/engine.rs
+++ b/lib/llm/src/engines/sglang/engine.rs
@@ -19,10 +19,10 @@ use async_stream::stream;
 use async_trait::async_trait;

 use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
-use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
-use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
-use dynemo_runtime::protocols::annotated::Annotated;
-use dynemo_runtime::runtime::CancellationToken;
+use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
+use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
+use dynamo_runtime::protocols::annotated::Annotated;
+use dynamo_runtime::runtime::CancellationToken;

 use crate::engines::MultiNodeConfig;


--- a/lib/llm/src/engines/sglang/worker.rs
+++ b/lib/llm/src/engines/sglang/worker.rs
@@ -37,8 +37,8 @@ use tokio::sync::mpsc::Sender;
 use tokio::{io::AsyncBufReadExt, sync::mpsc::error::SendError};
 use tokio::{io::AsyncReadExt as _, task::JoinHandle};

-use dynemo_runtime::protocols::annotated::Annotated;
-use dynemo_runtime::runtime::CancellationToken;
+use dynamo_runtime::protocols::annotated::Annotated;
+use dynamo_runtime::runtime::CancellationToken;

 use crate::engines::sglang::MultiGPUConfig;
 use crate::engines::MultiNodeConfig;

--- a/lib/llm/src/engines/trtllm.rs
+++ b/lib/llm/src/engines/trtllm.rs
@@ -16,7 +16,7 @@
 use std::sync::Arc;

 use crate::backend::ExecutionContext;
-use dynemo_runtime::pipeline::error as pipeline_error;
+use dynamo_runtime::pipeline::error as pipeline_error;

 pub mod executor;


--- a/lib/llm/src/engines/trtllm/executor/engine.rs
+++ b/lib/llm/src/engines/trtllm/executor/engine.rs
@@ -15,9 +15,9 @@

 use anyhow::{Error, Result};
 use async_trait::async_trait;
-use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
-use dynemo_runtime::pipeline::{ManyOut, SingleIn};
-use dynemo_runtime::protocols::annotated::Annotated;
+use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
+use dynamo_runtime::pipeline::{ManyOut, SingleIn};
+use dynamo_runtime::protocols::annotated::Annotated;
 use futures::stream;
 use tokio::sync::mpsc;
 use tokio_util::sync::CancellationToken;

--- a/lib/llm/src/engines/vllm.rs
+++ b/lib/llm/src/engines/vllm.rs
@@ -19,8 +19,8 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};

-use dynemo_runtime::pipeline::error as pipeline_error;
-use dynemo_runtime::CancellationToken;
+use dynamo_runtime::pipeline::error as pipeline_error;
+use dynamo_runtime::CancellationToken;

 use crate::backend::ExecutionContext;
 use crate::engines::MultiNodeConfig;

--- a/lib/llm/src/engines/vllm/engine.rs
+++ b/lib/llm/src/engines/vllm/engine.rs
@@ -21,10 +21,10 @@ use async_trait::async_trait;
 use crate::engines::vllm::worker;
 use crate::engines::MultiNodeConfig;
 use crate::protocols::common::llm_backend::{BackendInput, LLMEngineOutput};
-use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
-use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
-use dynemo_runtime::protocols::annotated::Annotated;
-use dynemo_runtime::runtime::CancellationToken;
+use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
+use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
+use dynamo_runtime::protocols::annotated::Annotated;
+use dynamo_runtime::runtime::CancellationToken;

 pub struct VllmEngine {
    cancel_token: CancellationToken,

--- a/lib/llm/src/engines/vllm/ray.rs
+++ b/lib/llm/src/engines/vllm/ray.rs
@@ -24,7 +24,7 @@ use tokio::select;
 use tokio::time;
 use tracing;

-use dynemo_runtime::CancellationToken;
+use dynamo_runtime::CancellationToken;

 /// Default is 16 seconds, we make it a bit shorter
 const RAY_STOP_TIMEOUT_SECS: u32 = 10;

--- a/lib/llm/src/engines/vllm/worker.rs
+++ b/lib/llm/src/engines/vllm/worker.rs
@@ -19,8 +19,8 @@ use std::{
 };

 use async_zmq::{SinkExt, StreamExt};
-use dynemo_runtime::protocols::annotated::Annotated;
-use dynemo_runtime::CancellationToken;
+use dynamo_runtime::protocols::annotated::Annotated;
+use dynamo_runtime::CancellationToken;
 use pyo3::{
    prelude::*,
    types::{IntoPyDict, PyBytes, PyString},

--- a/lib/llm/src/http/service/discovery.rs
+++ b/lib/llm/src/http/service/discovery.rs
@@ -18,7 +18,7 @@ use std::sync::Arc;
 use serde::{Deserialize, Serialize};
 use tokio::sync::mpsc::Receiver;

-use dynemo_runtime::{
+use dynamo_runtime::{
    protocols::{self, annotated::Annotated},
    raise,
    transports::etcd::{KeyValue, WatchEvent},