chore: rename dynamo (#44)

Co-authored-by: Biswa Panda <biswa.panda@gmail.com>

chore: rename dynamo (#44)
Co-authored-by: Biswa Panda <biswa.panda@gmail.com>
602352ce · Neelay Shah · GitHub · ecf53ce2 · 602352ce · 602352ce
Commit 602352ce authored Mar 08, 2025 by Neelay Shah Committed by GitHub Mar 08, 2025
20 changed files
--- a/launch/dynemo-run/src/input/http.rs
+++ b/launch/dynemo-run/src/input/http.rs
@@ -15,7 +15,7 @@
 use std::sync::Arc;
-use dynemo_llm::{
+use dynamo_llm::{
    backend::Backend,
    http::service::{discovery, service_v2},
    model_type::ModelType,
@@ -27,7 +27,7 @@ use dynemo_llm::{
        Annotated,
    },
 };
-use dynemo_runtime::{
+use dynamo_runtime::{
    pipeline::{ManyOut, Operator, ServiceBackend, ServiceFrontend, SingleIn, Source},
    DistributedRuntime, Runtime,
 };

--- a/launch/dynemo-run/src/input/text.rs
+++ b/launch/dynemo-run/src/input/text.rs
@@ -13,7 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-use dynemo_llm::{
+use dynamo_llm::{
    backend::Backend,
    preprocessor::OpenAIPreprocessor,
    types::{
@@ -24,7 +24,7 @@ use dynemo_llm::{
        Annotated,
    },
 };
-use dynemo_runtime::{
+use dynamo_runtime::{
    pipeline::{Context, ManyOut, Operator, ServiceBackend, ServiceFrontend, SingleIn, Source},
    runtime::CancellationToken,
    DistributedRuntime, Runtime,

--- a/launch/dynemo-run/src/lib.rs
+++ b/launch/dynemo-run/src/lib.rs
@@ -16,11 +16,11 @@
 #[cfg(any(feature = "vllm", feature = "sglang"))]
 use std::{future::Future, pin::Pin};
-use dynemo_llm::{
+use dynamo_llm::{
    backend::ExecutionContext, model_card::model::ModelDeploymentCard,
    types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine,
 };
-use dynemo_runtime::protocols::Endpoint;
+use dynamo_runtime::protocols::Endpoint;
 mod flags;
 pub use flags::Flags;
@@ -67,7 +67,7 @@ pub enum EngineConfig {
 #[allow(unused_mut)]
 pub async fn run(
-    runtime: dynemo_runtime::Runtime,
+    runtime: dynamo_runtime::Runtime,
    mut in_opt: Input, // mut because vllm and sglang multi-node can change it
    out_opt: Output,
    flags: Flags,
@@ -152,12 +152,12 @@ pub async fn run(
            };
            EngineConfig::StaticFull {
                service_name: model_name,
-                engine: dynemo_llm::engines::mistralrs::make_engine(&model_path).await?,
+                engine: dynamo_llm::engines::mistralrs::make_engine(&model_path).await?,
            }
        }
        #[cfg(feature = "sglang")]
        Output::SgLang => {
-            use dynemo_llm::engines::sglang;
+            use dynamo_llm::engines::sglang;
            let Some(model_path) = model_path else {
                anyhow::bail!("out=sglang requires flag --model-path=<full-path-to-model-dir>");
            };
@@ -169,7 +169,7 @@ pub async fn run(
            let Some(sock_prefix) = zmq_socket_prefix else {
                anyhow::bail!("sglang requires zmq_socket_prefix");
            };
-            let node_conf = dynemo_llm::engines::MultiNodeConfig {
+            let node_conf = dynamo_llm::engines::MultiNodeConfig {
                num_nodes: flags.num_nodes,
                node_rank: flags.node_rank,
                leader_addr: flags.leader_addr.unwrap_or_default(),
@@ -207,7 +207,7 @@ pub async fn run(
        }
        #[cfg(feature = "vllm")]
        Output::Vllm => {
-            use dynemo_llm::engines::vllm;
+            use dynamo_llm::engines::vllm;
            if flags.base_gpu_id != 0 {
                anyhow::bail!("vllm does not support base_gpu_id. Set environment variable CUDA_VISIBLE_DEVICES instead.");
            }
@@ -231,7 +231,7 @@ pub async fn run(
            let Some(sock_prefix) = zmq_socket_prefix else {
                anyhow::bail!("vllm requires zmq_socket_prefix");
            };
-            let node_conf = dynemo_llm::engines::MultiNodeConfig {
+            let node_conf = dynamo_llm::engines::MultiNodeConfig {
                num_nodes: flags.num_nodes,
                node_rank: flags.node_rank,
                leader_addr: flags.leader_addr.unwrap_or_default(),
@@ -274,7 +274,7 @@ pub async fn run(
        }
        #[cfg(feature = "llamacpp")]
        Output::LlamaCpp => {
-            use dynemo_llm::engines::llamacpp;
+            use dynamo_llm::engines::llamacpp;
            let Some(model_path) = model_path else {
                anyhow::bail!("out=llamacpp requires flag --model-path=<full-path-to-model-gguf>");
            };
@@ -295,7 +295,7 @@ pub async fn run(
        }
        #[cfg(feature = "trtllm")]
        Output::TrtLLM => {
-            use dynemo_llm::engines::trtllm;
+            use dynamo_llm::engines::trtllm;
            let Some(model_path) = model_path else {
                anyhow::bail!("out=trtllm requires flag --model-path=<full-path-to-model-dir>");
            };
@@ -315,7 +315,7 @@ pub async fn run(
        }
        #[cfg(feature = "python")]
        Output::PythonStr(path_str) => {
-            use dynemo_llm::engines::python;
+            use dynamo_llm::engines::python;
            let Some(model_name) = model_name else {
                anyhow::bail!("Provide model service name as `--model-name <this>`");
            };
@@ -328,7 +328,7 @@ pub async fn run(
        }
        #[cfg(feature = "python")]
        Output::PythonTok(path_str) => {
-            use dynemo_llm::engines::python;
+            use dynamo_llm::engines::python;
            let Some(card) = maybe_card.clone() else {
                anyhow::bail!("Could not find tokenizer. Pass flag --model-path <path>");
            };

--- a/launch/dynemo-run/src/main.rs
+++ b/launch/dynemo-run/src/main.rs
@@ -17,17 +17,17 @@ use std::env;
 use clap::Parser;
-use dynemo_run::{Input, Output};
+use dynamo_run::{Input, Output};
-use dynemo_runtime::logging;
+use dynamo_runtime::logging;
 const HELP: &str = r#"
-dynemo-run is a single binary that wires together the various inputs (http, text, network) and workers (network, engine), that runs the services. It is the simplest way to use dynemo locally.
+dynamo-run is a single binary that wires together the various inputs (http, text, network) and workers (network, engine), that runs the services. It is the simplest way to use dynamo locally.
 Example:
 - cargo build --release --features mistralrs,cuda
 - cd target/release
- ./dynemo-run hf_checkouts/Llama-3.2-3B-Instruct/
+- ./dynamo-run hf_checkouts/Llama-3.2-3B-Instruct/
- OR: ./dynemo-run Llama-3.2-1B-Instruct-Q4_K_M.gguf
+- OR: ./dynamo-run Llama-3.2-1B-Instruct-Q4_K_M.gguf
 "#;
@@ -41,14 +41,14 @@ const DEFAULT_OUT: Output = Output::EchoFull;
 const ZMQ_SOCKET_PREFIX: &str = "dyn";
-const USAGE: &str = "USAGE: dynemo-run in=[http|text|dyn://<path>|none] out=[mistralrs|sglang|llamacpp|vllm|trtllm|echo_full|echo_core|pystr:<engine.py>|pytok:<engine.py>] [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--tensor-parallel-size=1] [--num-nodes=1] [--node-rank=0] [--leader-addr=127.0.0.1:9876] [--base-gpu-id=0]";
+const USAGE: &str = "USAGE: dynamo-run in=[http|text|dyn://<path>|none] out=[mistralrs|sglang|llamacpp|vllm|trtllm|echo_full|echo_core|pystr:<engine.py>|pytok:<engine.py>] [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--tensor-parallel-size=1] [--num-nodes=1] [--node-rank=0] [--leader-addr=127.0.0.1:9876] [--base-gpu-id=0]";
 fn main() -> anyhow::Result<()> {
    logging::init();
    // Call sub-processes before starting the Runtime machinery
    // For anything except sub-process starting try_parse_from will error.
-    if let Ok(flags) = dynemo_run::Flags::try_parse_from(env::args()) {
+    if let Ok(flags) = dynamo_run::Flags::try_parse_from(env::args()) {
        #[allow(unused_variables)]
        if let Some(sglang_flags) = flags.internal_sglang_process {
            let Some(model_path) = flags.model_path_flag.as_ref() else {
@@ -60,13 +60,13 @@ fn main() -> anyhow::Result<()> {
            if cfg!(feature = "sglang") {
                #[cfg(feature = "sglang")]
                {
-                    use dynemo_llm::engines::sglang;
+                    use dynamo_llm::engines::sglang;
                    let gpu_config = sglang::MultiGPUConfig {
                        tp_size: flags.tensor_parallel_size,
                        tp_rank: sglang_flags.tp_rank,
                        gpu_id: sglang_flags.gpu_id,
                    };
-                    let node_config = dynemo_llm::engines::MultiNodeConfig {
+                    let node_config = dynamo_llm::engines::MultiNodeConfig {
                        num_nodes: flags.num_nodes,
                        node_rank: flags.node_rank,
                        leader_addr: flags.leader_addr.unwrap_or_default(),
@@ -98,8 +98,8 @@ fn main() -> anyhow::Result<()> {
            if cfg!(feature = "vllm") {
                #[cfg(feature = "vllm")]
                {
-                    use dynemo_llm::engines::vllm;
+                    use dynamo_llm::engines::vllm;
-                    let node_config = dynemo_llm::engines::MultiNodeConfig {
+                    let node_config = dynamo_llm::engines::MultiNodeConfig {
                        num_nodes: flags.num_nodes,
                        node_rank: flags.node_rank,
                        leader_addr: flags.leader_addr.unwrap_or_default(),
@@ -119,15 +119,15 @@ fn main() -> anyhow::Result<()> {
    }
    // max_worker_threads and max_blocking_threads from env vars or config file.
-    let rt_config = dynemo_runtime::RuntimeConfig::from_settings()?;
+    let rt_config = dynamo_runtime::RuntimeConfig::from_settings()?;
    // One per process. Wraps a Runtime with holds two tokio runtimes.
-    let worker = dynemo_runtime::Worker::from_config(rt_config)?;
+    let worker = dynamo_runtime::Worker::from_config(rt_config)?;
    worker.execute(wrapper)
 }
-async fn wrapper(runtime: dynemo_runtime::Runtime) -> anyhow::Result<()> {
+async fn wrapper(runtime: dynamo_runtime::Runtime) -> anyhow::Result<()> {
    let mut in_opt = None;
    let mut out_opt = None;
    let args: Vec<String> = env::args().skip(1).collect();
@@ -171,13 +171,13 @@ async fn wrapper(runtime: dynemo_runtime::Runtime) -> anyhow::Result<()> {
    // Clap skips the first argument expecting it to be the binary name, so add it back
    // Note `--model-path` has index=1 (in lib.rs) so that doesn't need a flag.
-    let flags = dynemo_run::Flags::try_parse_from(
+    let flags = dynamo_run::Flags::try_parse_from(
-        ["dynemo-run".to_string()]
+        ["dynamo-run".to_string()]
            .into_iter()
            .chain(env::args().skip(non_flag_params)),
    )?;
-    dynemo_run::run(
+    dynamo_run::run(
        runtime,
        in_opt,
        out_opt,

--- a/launch/dynemo-run/src/net.rs
+++ b/launch/dynemo-run/src/net.rs
--- a/launch/dynemo-run/src/opt.rs
+++ b/launch/dynemo-run/src/opt.rs
--- a/launch/dynemo-run/src/output.rs
+++ b/launch/dynemo-run/src/output.rs
--- a/launch/dynemo-run/src/output/echo_core.rs
+++ b/launch/dynemo-run/src/output/echo_core.rs
@@ -18,12 +18,12 @@ use std::{sync::Arc, time::Duration};
 use async_stream::stream;
 use async_trait::async_trait;
-use dynemo_llm::backend::ExecutionContext;
+use dynamo_llm::backend::ExecutionContext;
-use dynemo_llm::preprocessor::BackendInput;
+use dynamo_llm::preprocessor::BackendInput;
-use dynemo_llm::protocols::common::llm_backend::LLMEngineOutput;
+use dynamo_llm::protocols::common::llm_backend::LLMEngineOutput;
-use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
+use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
-use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
+use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
-use dynemo_runtime::protocols::annotated::Annotated;
+use dynamo_runtime::protocols::annotated::Annotated;
 /// How long to sleep between echoed tokens.
 /// 50ms gives us 20 tok/s.

--- a/launch/dynemo-run/src/output/echo_full.rs
+++ b/launch/dynemo-run/src/output/echo_full.rs
@@ -18,13 +18,13 @@ use std::{sync::Arc, time::Duration};
 use async_stream::stream;
 use async_trait::async_trait;
-use dynemo_llm::protocols::openai::chat_completions::{
+use dynamo_llm::protocols::openai::chat_completions::{
    NvCreateChatCompletionRequest, NvCreateChatCompletionStreamResponse,
 };
-use dynemo_llm::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine;
+use dynamo_llm::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine;
-use dynemo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
+use dynamo_runtime::engine::{AsyncEngine, AsyncEngineContextProvider, ResponseStream};
-use dynemo_runtime::pipeline::{Error, ManyOut, SingleIn};
+use dynamo_runtime::pipeline::{Error, ManyOut, SingleIn};
-use dynemo_runtime::protocols::annotated::Annotated;
+use dynamo_runtime::protocols::annotated::Annotated;
 /// How long to sleep between echoed tokens.
 /// 50ms gives us 20 tok/s.

--- a/lib/bindings/c/Cargo.lock
+++ b/lib/bindings/c/Cargo.lock
@@ -955,7 +955,7 @@ dependencies = [
 ]
 [[package]]
-name = "dynemo-llm"
+name = "dynamo-llm"
 version = "0.2.1"
 dependencies = [
 "anyhow",
@@ -970,7 +970,7 @@ dependencies = [
 "chrono",
 "cmake",
 "derive_builder",
- "dynemo-runtime",
+ "dynamo-runtime",
 "either",
 "erased-serde",
 "futures",
@@ -1004,7 +1004,7 @@ dependencies = [
 ]
 [[package]]
-name = "dynemo-runtime"
+name = "dynamo-runtime"
 version = "0.2.1"
 dependencies = [
 "anyhow",
@@ -1947,14 +1947,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
 [[package]]
-name = "libdynemo-llm"
+name = "libdynamo_llm"
 version = "0.2.1"
 dependencies = [
 "anyhow",
 "async-once-cell",
 "cbindgen",
- "dynemo-llm",
+ "dynamo-llm",
- "dynemo-runtime",
+ "dynamo-runtime",
 "futures",
 "libc",
 "once_cell",

--- a/lib/bindings/c/Cargo.toml
+++ b/lib/bindings/c/Cargo.toml
@@ -14,7 +14,7 @@
 # limitations under the License.
 [package]
-name = "libdynemo-llm"
+name = "libdynamo_llm"
 version = "0.2.1"
 edition = "2021"
 authors = ["NVIDIA"]
@@ -23,15 +23,15 @@ homepage = "https://github.com/dynemo-ai/dynemo"
 repository = "https://github.com/dynemo-ai/dynemo.git"
 [lib]
-name = "dynemo_llm_capi"
+name = "dynamo_llm_capi"
 crate-type = ["cdylib"]
 [build-dependencies]
 cbindgen = "0.27"
 [dependencies]
-dynemo-llm = { path = "../../llm" }
+dynamo-llm = { path = "../../llm" }
-dynemo-runtime = { path = "../../runtime" }
+dynamo-runtime = { path = "../../runtime" }
 anyhow = { version = "1" }
 futures = "0.3"

--- a/lib/bindings/c/build.rs
+++ b/lib/bindings/c/build.rs
@@ -22,7 +22,7 @@ fn main() {
    let header_path = Path::new(&crate_dir)
        .join("include")
        .join("nvidia")
-        .join("dynemo_llm")
+        .join("dynamo_llm")
        .join("llm_engine.h");
    cbindgen::generate(crate_dir)

--- a/lib/bindings/c/cbindgen.toml
+++ b/lib/bindings/c/cbindgen.toml
@@ -25,7 +25,7 @@ enum_class = false
 [export]
-include = ["DynemoLlmResult", "dynemo_llm_init", "dynemo_llm_shutdown"]
+include = ["DynamoLlmResult", "dynamo_llm_init", "dynamo_llm_shutdown"]
 [export.rename]
-"DynemoLlmResult" = "dynemo_llm_result_t"
+"DynamoLlmResult" = "dynamo_llm_result_t"
--- a/lib/bindings/c/src/lib.rs
+++ b/lib/bindings/c/src/lib.rs
@@ -19,10 +19,10 @@ use once_cell::sync::OnceCell;
 use std::ffi::CStr;
 use std::sync::atomic::{AtomicU32, Ordering};
-use dynemo_llm::kv_router::{
+use dynamo_llm::kv_router::{
    indexer::compute_block_hash_for_seq, protocols::*, publisher::KvEventPublisher,
 };
-use dynemo_runtime::{DistributedRuntime, Worker};
+use dynamo_runtime::{DistributedRuntime, Worker};
 static WK: OnceCell<Worker> = OnceCell::new();
 static DRT: AsyncOnceCell<DistributedRuntime> = AsyncOnceCell::new();
 // [FIXME] shouldn't the publisher be instance passing between API calls?
@@ -41,7 +41,7 @@ fn initialize_tracing() {
 }
 #[repr(u32)]
-pub enum DynemoLlmResult {
+pub enum DynamoLlmResult {
    OK = 0,
    ERR = 1,
 }
@@ -49,17 +49,17 @@ pub enum DynemoLlmResult {
 /// # Safety
 /// the namespace_c_str and component_c_str are passed as pointers to C strings
 #[no_mangle]
-pub unsafe extern "C" fn dynemo_llm_init(
+pub unsafe extern "C" fn dynamo_llm_init(
    namespace_c_str: *const c_char,
    component_c_str: *const c_char,
    worker_id: i64,
-) -> DynemoLlmResult {
+) -> DynamoLlmResult {
    initialize_tracing();
    let wk = match WK.get_or_try_init(Worker::from_settings) {
        Ok(wk) => wk.clone(),
        Err(e) => {
            eprintln!("Failed to initialize runtime: {:?}", e);
-            return DynemoLlmResult::ERR;
+            return DynamoLlmResult::ERR;
        }
    };
    let rt = wk.runtime();
@@ -73,7 +73,7 @@ pub unsafe extern "C" fn dynemo_llm_init(
            Ok(_) => Ok(()),
            Err(e) => {
                eprintln!("Failed to initialize distributed runtime: {:?}", e);
-                Err(DynemoLlmResult::ERR)
+                Err(DynamoLlmResult::ERR)
            }
        }
    });
@@ -81,7 +81,7 @@ pub unsafe extern "C" fn dynemo_llm_init(
        Ok(s) => s.to_string(),
        Err(e) => {
            eprintln!("Failed to convert C string to Rust string: {:?}", e);
-            return DynemoLlmResult::ERR;
+            return DynamoLlmResult::ERR;
        }
    };
@@ -89,18 +89,18 @@ pub unsafe extern "C" fn dynemo_llm_init(
        Ok(s) => s.to_string(),
        Err(e) => {
            eprintln!("Failed to convert C string to Rust string: {:?}", e);
-            return DynemoLlmResult::ERR;
+            return DynamoLlmResult::ERR;
        }
    };
    match result {
        Ok(_) => match KV_PUB
-            .get_or_try_init(move || dynemo_create_kv_publisher(namespace, component, worker_id))
+            .get_or_try_init(move || dynamo_create_kv_publisher(namespace, component, worker_id))
        {
-            Ok(_) => DynemoLlmResult::OK,
+            Ok(_) => DynamoLlmResult::OK,
            Err(e) => {
                eprintln!("Failed to initialize distributed runtime: {:?}", e);
-                DynemoLlmResult::ERR
+                DynamoLlmResult::ERR
            }
        },
        Err(e) => e,
@@ -108,33 +108,33 @@ pub unsafe extern "C" fn dynemo_llm_init(
 }
 #[no_mangle]
-pub extern "C" fn dynemo_llm_shutdown() -> DynemoLlmResult {
+pub extern "C" fn dynamo_llm_shutdown() -> DynamoLlmResult {
    let wk = match WK.get() {
        Some(wk) => wk,
        None => {
            eprintln!("Runtime not initialized");
-            return DynemoLlmResult::ERR;
+            return DynamoLlmResult::ERR;
        }
    };
    wk.runtime().shutdown();
-    DynemoLlmResult::OK
+    DynamoLlmResult::OK
 }
 #[no_mangle]
-pub extern "C" fn dynemo_llm_load_publisher_create() -> DynemoLlmResult {
+pub extern "C" fn dynamo_llm_load_publisher_create() -> DynamoLlmResult {
-    DynemoLlmResult::OK
+    DynamoLlmResult::OK
 }
 // instantiate a kv publisher
 // this will bring up the task to publish and the channels to await publishing events
-// the [`dynemo_kv_publish_store_event`] call will use a handle to the publisher to send events
+// the [`dynamo_kv_publish_store_event`] call will use a handle to the publisher to send events
-// store and the [`dynemo_kv_event_create_removed`] will create remove events
+// store and the [`dynamo_kv_event_create_removed`] will create remove events
 // these call mus be driving by external c++ threads that are consuming the kv events from the
 // c++ executor api
-fn dynemo_create_kv_publisher(
+fn dynamo_create_kv_publisher(
    namespace: String,
    component: String,
    worker_id: i64,
@@ -238,7 +238,7 @@ fn kv_event_create_removed_from_parts(
 /// parent_hash is passed as pointer to indicate whether the blocks
 /// has a parent hash or not. nullptr is used to represent no parent hash
 #[no_mangle]
-pub unsafe extern "C" fn dynemo_kv_event_publish_stored(
+pub unsafe extern "C" fn dynamo_kv_event_publish_stored(
    event_id: u64,
    token_ids: *const u32,
    num_block_tokens: *const usize,
@@ -246,7 +246,7 @@ pub unsafe extern "C" fn dynemo_kv_event_publish_stored(
    num_blocks: usize,
    parent_hash: *const u64,
    lora_id: u64,
-) -> DynemoLlmResult {
+) -> DynamoLlmResult {
    let publisher = KV_PUB.get().unwrap();
    let parent_hash = {
        if parent_hash.is_null() {
@@ -265,40 +265,40 @@ pub unsafe extern "C" fn dynemo_kv_event_publish_stored(
        lora_id,
    );
    match publisher.publish(event) {
-        Ok(_) => DynemoLlmResult::OK,
+        Ok(_) => DynamoLlmResult::OK,
        Err(e) => {
            eprintln!("Error publishing stored kv event {:?}", e);
-            DynemoLlmResult::ERR
+            DynamoLlmResult::ERR
        }
    }
 }
 #[no_mangle]
-pub extern "C" fn dynemo_kv_event_publish_removed(
+pub extern "C" fn dynamo_kv_event_publish_removed(
    event_id: u64,
    block_ids: *const u64,
    num_blocks: usize,
-) -> DynemoLlmResult {
+) -> DynamoLlmResult {
    let publisher = KV_PUB.get().unwrap();
    let event = kv_event_create_removed_from_parts(event_id, block_ids, num_blocks);
    match publisher.publish(event) {
-        Ok(_) => DynemoLlmResult::OK,
+        Ok(_) => DynamoLlmResult::OK,
        Err(e) => {
            eprintln!("Error publishing removed kv event {:?}", e);
-            DynemoLlmResult::ERR
+            DynamoLlmResult::ERR
        }
    }
 }
 // #[no_mangle]
-// pub extern "C" fn dynemo_kv_publish_store_event(
+// pub extern "C" fn dynamo_kv_publish_store_event(
 //     event_id: u64,
 //     token_ids: *const u32,
 //     num_tokens: usize,
 //     lora_id: u64,
-// ) -> DynemoLlmResult {
+// ) -> DynamoLlmResult {
 //     // if event.is_null() || token_ids.is_null() {
-//     //     return dynemoKvErrorType::INVALID_TOKEN_IDS;
+//     //     return dynamoKvErrorType::INVALID_TOKEN_IDS;
 //     // }
 //     // let tokens = unsafe { std::slice::from_raw_parts(token_ids, num_tokens) }.to_vec();
@@ -311,15 +311,15 @@ pub extern "C" fn dynemo_kv_event_publish_removed(
 //     // unsafe { *event = Box::into_raw(new_event) };
-//     DynemoLlmResult::OK
+//     DynamoLlmResult::OK
 // }
 // #[no_mangle]
-// pub extern "C" fn dynemo_kv_event_create_removed(
+// pub extern "C" fn dynamo_kv_event_create_removed(
 //     event_id: u64,
 //     block_hashes: *const u64,
 //     num_hashes: usize,
-// ) -> DynemoLlmResult {
+// ) -> DynamoLlmResult {
 //     // if event.is_null() || block_hashes.is_null() {
 //     //     return -1;
 //     // }
@@ -334,19 +334,19 @@ pub extern "C" fn dynemo_kv_event_publish_removed(
 //     // unsafe { *event = Box::into_raw(new_event) };
 //     // 0
-//     DynemoLlmResult::OK
+//     DynamoLlmResult::OK
 // }
 // /// create load publisher object and return a handle
 // /// load publisher will instantiate the nats service and tie its stats handler to
 // /// a watch channel receiver.  the watch channel sender will be attach to the
-// /// handle and calls to [`dynemo_load_stats_publish`] issue the stats to the watch t
+// /// handle and calls to [`dynamo_load_stats_publish`] issue the stats to the watch t
-// pub extern "C" fn dynemo_load_publisher_create() -> *mut LoadPublisher {
+// pub extern "C" fn dynamo_load_publisher_create() -> *mut LoadPublisher {
 //     // let publisher = Box::new(LoadPublisher::new());
 //     // Box::into_raw(publisher)
 // }
-// pub extern "C" fn dynemo_load_stats_publish(
+// pub extern "C" fn dynamo_load_stats_publish(
 //     publisher: *mut LoadPublisher,
 //     active_slots: u64,
 //     total_slots: u64,

--- a/lib/bindings/python/.gitignore
+++ b/lib/bindings/python/.gitignore
 /target
-python/dynemo/.*.so
+python/dynamo/.*.so
--- a/lib/bindings/python/Cargo.lock
+++ b/lib/bindings/python/Cargo.lock
@@ -957,7 +957,7 @@ dependencies = [
 ]
 [[package]]
-name = "dynemo-llm"
+name = "dynamo-llm"
 version = "0.2.1"
 dependencies = [
 "anyhow",
@@ -972,7 +972,7 @@ dependencies = [
 "chrono",
 "cmake",
 "derive_builder",
- "dynemo-runtime",
+ "dynamo-runtime",
 "either",
 "erased-serde",
 "futures",
@@ -1008,11 +1008,11 @@ dependencies = [
 ]
 [[package]]
-name = "dynemo-py3"
+name = "dynamo-py3"
 version = "0.2.1"
 dependencies = [
- "dynemo-llm",
+ "dynamo-llm",
- "dynemo-runtime",
+ "dynamo-runtime",
 "futures",
 "once_cell",
 "pyo3",
@@ -1028,7 +1028,7 @@ dependencies = [
 ]
 [[package]]
-name = "dynemo-runtime"
+name = "dynamo-runtime"
 version = "0.2.1"
 dependencies = [
 "anyhow",

--- a/lib/bindings/python/Cargo.toml
+++ b/lib/bindings/python/Cargo.toml
@@ -14,7 +14,7 @@
 # limitations under the License.
 [package]
-name = "dynemo-py3"
+name = "dynamo-py3"
 version = "0.2.1"
 edition = "2021"
 authors = ["NVIDIA"]
@@ -30,8 +30,9 @@ crate-type = ["cdylib"]
 [dependencies]
-dynemo-llm = { path = "../../llm", features = ["python"] }
-dynemo-runtime = { path = "../../runtime" }
+dynamo-llm = { path = "../../llm", features = ["python"] }
+dynamo-runtime = { path = "../../runtime" }
 futures = "0.3"
 once_cell = "1.20.3"

--- a/lib/bindings/python/README.md
+++ b/lib/bindings/python/README.md
@@ -41,7 +41,7 @@ source .venv/bin/activate
 uv pip install maturin
 ```
-4. Build and install dynemo wheel
+4. Build and install dynamo wheel
 ```
 maturin develop --uv
 ```

--- a/lib/bindings/python/examples/bls/bar.py
+++ b/lib/bindings/python/examples/bls/bar.py
@@ -17,7 +17,7 @@ import asyncio
 import uvloop
-from dynemo.runtime import DistributedRuntime, dynemo_worker
+from dynamo.runtime import DistributedRuntime, dynamo_worker
 uvloop.install()
@@ -29,7 +29,7 @@ class RequestHandler:
            yield char
-@dynemo_worker()
+@dynamo_worker()
 async def worker(runtime: DistributedRuntime):
    component = runtime.namespace("examples/bls").component("bar")
    await component.create_service()

--- a/lib/bindings/python/examples/bls/bls.py
+++ b/lib/bindings/python/examples/bls/bls.py
@@ -17,12 +17,12 @@ import asyncio
 import uvloop
-from dynemo.runtime import DistributedRuntime, dynemo_worker
+from dynamo.runtime import DistributedRuntime, dynamo_worker
 uvloop.install()
-@dynemo_worker()
+@dynamo_worker()
 async def worker(runtime: DistributedRuntime):
    foo = (
        await runtime.namespace("examples/bls")