chore: rename dynamo (#44)

Co-authored-by: Biswa Panda <biswa.panda@gmail.com>

chore: rename dynamo (#44)
Co-authored-by: Biswa Panda <biswa.panda@gmail.com>
602352ce · Neelay Shah · GitHub · ecf53ce2 · 602352ce · 602352ce
Commit 602352ce authored Mar 08, 2025 by Neelay Shah Committed by GitHub Mar 08, 2025
20 changed files
--- a/examples/rust/Cargo.toml
+++ b/examples/rust/Cargo.toml
@@ -33,8 +33,8 @@ repository = "https://github.com/dynemo-ai/dynemo.git"

 [workspace.dependencies]
 # local or crates.io
-dynemo-runtime = { path = "../../lib/runtime" }
-dynemo-llm = { path = "../../lib/llm" }
+dynamo-runtime = { path = "../../lib/runtime" }
+dynamo-llm = { path = "../../lib/llm" }

 # crates.io
 anyhow = { version = "1" }

--- a/examples/rust/hello_world/Cargo.toml
+++ b/examples/rust/hello_world/Cargo.toml
@@ -22,6 +22,6 @@ license.workspace = true
 homepage.workspace = true

 [dependencies]
-dynemo-runtime = { workspace = true }
+dynamo-runtime = { workspace = true }

 # third-party
--- a/examples/rust/hello_world/src/bin/client.rs
+++ b/examples/rust/hello_world/src/bin/client.rs
@@ -13,7 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use dynemo_runtime::{
+use dynamo_runtime::{
    logging, protocols::annotated::Annotated, stream::StreamExt, DistributedRuntime, Result,
    Runtime, Worker,
 };

--- a/examples/rust/hello_world/src/bin/server.rs
+++ b/examples/rust/hello_world/src/bin/server.rs
@@ -13,7 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use dynemo_runtime::{
+use dynamo_runtime::{
    logging,
    pipeline::{
        async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,

--- a/examples/rust/hello_world/src/lib.rs
+++ b/examples/rust/hello_world/src/lib.rs
@@ -13,4 +13,4 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-pub const DEFAULT_NAMESPACE: &str = "dynemo";
+pub const DEFAULT_NAMESPACE: &str = "dynamo";
--- a/examples/rust/http/Cargo.toml
+++ b/examples/rust/http/Cargo.toml
@@ -24,8 +24,8 @@ homepage.workspace = true
 repository.workspace = true

 [dependencies]
-dynemo-runtime = { workspace = true}
-dynemo-llm = { workspace = true}
+dynamo-runtime = { workspace = true}
+dynamo-llm = { workspace = true}
 clap = { version = "4.5", features = ["derive"] }

 serde = { workspace = true }

--- a/examples/rust/http/src/main.rs
+++ b/examples/rust/http/src/main.rs
@@ -16,14 +16,14 @@
 use clap::Parser;
 use std::sync::Arc;

-use dynemo_llm::{
+use dynamo_llm::{
    http::service::{
        discovery::{model_watcher, ModelWatchState},
        service_v2::HttpService,
    },
    model_type::ModelType,
 };
-use dynemo_runtime::{
+use dynamo_runtime::{
    logging, transports::etcd::PrefixWatcher, DistributedRuntime, Result, Runtime, Worker,
 };


--- a/examples/rust/llmctl/Cargo.toml
+++ b/examples/rust/llmctl/Cargo.toml
@@ -23,8 +23,8 @@ homepage.workspace = true
 repository.workspace = true

 [dependencies]
-dynemo-runtime = { workspace = true}
-dynemo-llm = { workspace = true}
+dynamo-runtime = { workspace = true}
+dynamo-llm = { workspace = true}

 serde = { workspace = true }
 serde_json = { workspace = true }

--- a/examples/rust/llmctl/src/main.rs
+++ b/examples/rust/llmctl/src/main.rs
@@ -16,8 +16,8 @@
 use clap::{Parser, Subcommand};
 use tracing as log;

-use dynemo_llm::{http::service::discovery::ModelEntry, model_type::ModelType};
-use dynemo_runtime::{
+use dynamo_llm::{http::service::discovery::ModelEntry, model_type::ModelType};
+use dynamo_runtime::{
    distributed::DistributedConfig, logging, protocols::Endpoint, raise, DistributedRuntime,
    Result, Runtime, Worker,
 };

--- a/examples/rust/service_metrics/Cargo.toml
+++ b/examples/rust/service_metrics/Cargo.toml
@@ -23,7 +23,7 @@ homepage.workspace = true
 repository.workspace = true

 [dependencies]
-dynemo-runtime = { workspace = true }
+dynamo-runtime = { workspace = true }

 # third-party
 futures = { workspace = true }

--- a/examples/rust/service_metrics/src/bin/client.rs
+++ b/examples/rust/service_metrics/src/bin/client.rs
@@ -16,7 +16,7 @@
 use futures::StreamExt;
 use service_metrics::DEFAULT_NAMESPACE;

-use dynemo_runtime::{
+use dynamo_runtime::{
    logging, protocols::annotated::Annotated, utils::Duration, DistributedRuntime, Result, Runtime,
    Worker,
 };

--- a/examples/rust/service_metrics/src/bin/server.rs
+++ b/examples/rust/service_metrics/src/bin/server.rs
@@ -15,7 +15,7 @@

 use service_metrics::{MyStats, DEFAULT_NAMESPACE};

-use dynemo_runtime::{
+use dynamo_runtime::{
    logging,
    pipeline::{
        async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,

--- a/examples/rust/service_metrics/src/lib.rs
+++ b/examples/rust/service_metrics/src/lib.rs
@@ -15,7 +15,7 @@

 use serde::{Deserialize, Serialize};

-pub const DEFAULT_NAMESPACE: &str = "dynemo";
+pub const DEFAULT_NAMESPACE: &str = "dynamo";

 #[derive(Serialize, Deserialize)]
 // Dummy Stats object to demonstrate how to attach a custom stats handler

--- a/launch/dynemo-run/Cargo.lock
+++ b/launch/dynemo-run/Cargo.lock
@@ -1396,7 +1396,7 @@ dependencies = [
 ]

 [[package]]
-name = "dynemo-llm"
+name = "dynamo-llm"
 version = "0.2.1"
 dependencies = [
 "anyhow",
@@ -1412,7 +1412,7 @@ dependencies = [
 "chrono",
 "cmake",
 "derive_builder",
- "dynemo-runtime",
+ "dynamo-runtime",
 "either",
 "erased-serde",
 "futures",
@@ -1450,7 +1450,7 @@ dependencies = [
 ]

 [[package]]
-name = "dynemo-run"
+name = "dynamo-run"
 version = "0.1.0"
 dependencies = [
 "anyhow",
@@ -1459,8 +1459,8 @@ dependencies = [
 "async-trait",
 "clap",
 "dialoguer",
- "dynemo-llm",
- "dynemo-runtime",
+ "dynamo-llm",
+ "dynamo-runtime",
 "futures",
 "futures-util",
 "libc",
@@ -1475,7 +1475,7 @@ dependencies = [
 ]

 [[package]]
-name = "dynemo-runtime"
+name = "dynamo-runtime"
 version = "0.2.1"
 dependencies = [
 "anyhow",

--- a/launch/dynemo-run/Cargo.toml
+++ b/launch/dynemo-run/Cargo.toml
@@ -14,7 +14,7 @@
 # limitations under the License.

 [package]
-name = "dynemo-run"
+name = "dynamo-run"
 version = "0.1.0"
 edition = "2021"
 authors = ["NVIDIA"]
@@ -22,14 +22,14 @@ homepage = "https://github.com/dynemo-ai/dynemo"
 license = "Apache-2.0"

 [features]
-mistralrs = ["dynemo-llm/mistralrs"]
-sglang = ["dynemo-llm/sglang", "dep:netlink-packet-route", "dep:rtnetlink"]
-vllm = ["dynemo-llm/vllm", "dep:netlink-packet-route", "dep:rtnetlink"]
-llamacpp = ["dynemo-llm/llamacpp"]
-trtllm = ["dynemo-llm/trtllm"]
-python = ["dynemo-llm/python"]
-cuda = ["dynemo-llm/cuda"]
-metal = ["dynemo-llm/metal"]
+mistralrs = ["dynamo-llm/mistralrs"]
+sglang = ["dynamo-llm/sglang", "dep:netlink-packet-route", "dep:rtnetlink"]
+vllm = ["dynamo-llm/vllm", "dep:netlink-packet-route", "dep:rtnetlink"]
+llamacpp = ["dynamo-llm/llamacpp"]
+trtllm = ["dynamo-llm/trtllm"]
+python = ["dynamo-llm/python"]
+cuda = ["dynamo-llm/cuda"]
+metal = ["dynamo-llm/metal"]

 [dependencies]
 anyhow = "1"
@@ -49,5 +49,5 @@ tokio = { version = "1", features = ["full"] }
 tokio-util = { version = "0.7", features = ["codec", "net"] }
 tracing = { version = "0.1" }
 tracing-subscriber = { version = "0.3", features = ["env-filter", "local-time", "json"] }
-dynemo-runtime = { path = "../../lib/runtime" }
-dynemo-llm = { path = "../../lib/llm" }
+dynamo-runtime = { path = "../../lib/runtime" }
+dynamo-llm = { path = "../../lib/llm" }
--- a/launch/dynemo-run/README.md
+++ b/launch/dynemo-run/README.md
-# Dynemo service runner
+# Dynamo service runner

-`dynemo-run` is a tool for exploring the dynemo components.
+`dynamo-run` is a tool for exploring the dynamo components.

 ## Setup

@@ -36,11 +36,11 @@ For example one of these should be fast and good quality on almost any machine:

 *Text interface*

-`./target/release/dynemo-run Llama-3.2-1B-Instruct-Q4_K_M.gguf` or path to a Hugging Face repo checkout instead of the GGUF.
+`./target/release/dynamo-run Llama-3.2-1B-Instruct-Q4_K_M.gguf` or path to a Hugging Face repo checkout instead of the GGUF.

 *HTTP interface*

-`./target/release/dynemo-run in=http --model-path Llama-3.2-1B-Instruct-Q4_K_M.gguf`
+`./target/release/dynamo-run in=http --model-path Llama-3.2-1B-Instruct-Q4_K_M.gguf`

 List the models: `curl localhost:8080/v1/models`

@@ -53,19 +53,19 @@ curl -d '{"model": "Llama-3.2-1B-Instruct-Q4_K_M", "max_tokens": 2049, "messages

 Node 1:
 ```
-dynemo-run in=http out=dyn://llama3B_pool
+dynamo-run in=http out=dyn://llama3B_pool
 ```

 Node 2:
 ```
-dynemo-run in=dyn://llama3B_pool out=mistralrs ~/llm_models/Llama-3.2-3B-Instruct
+dynamo-run in=dyn://llama3B_pool out=mistralrs ~/llm_models/Llama-3.2-3B-Instruct
 ```

 This will use etcd to auto-discover the model and NATS to talk to it. You can run multiple workers on the same endpoint and it will pick one at random each time.

 The `ns/backend/mistralrs` are purely symbolic, pick anything as long as it has three parts, and it matches the other node.

-Run `dynemo-run --help` for more options.
+Run `dynamo-run --help` for more options.

 ## sglang

@@ -91,26 +91,26 @@ Any example above using `out=sglang` will work, but our sglang backend is also m

 Node 1:
 ```
-dynemo-run in=http out=sglang --model-path ~/llm_models/DeepSeek-R1-Distill-Llama-70B/ --tensor-parallel-size 8 --num-nodes 2 --node-rank 0 --dist-init-addr 10.217.98.122:9876
+dynamo-run in=http out=sglang --model-path ~/llm_models/DeepSeek-R1-Distill-Llama-70B/ --tensor-parallel-size 8 --num-nodes 2 --node-rank 0 --dist-init-addr 10.217.98.122:9876
 ```

 Node 2:
 ```
-dynemo-run in=none out=sglang --model-path ~/llm_models/DeepSeek-R1-Distill-Llama-70B/ --tensor-parallel-size 8 --num-nodes 2 --node-rank 1 --dist-init-addr 10.217.98.122:9876
+dynamo-run in=none out=sglang --model-path ~/llm_models/DeepSeek-R1-Distill-Llama-70B/ --tensor-parallel-size 8 --num-nodes 2 --node-rank 1 --dist-init-addr 10.217.98.122:9876
 ```

 ## llama_cpp

 - `cargo build --release --features llamacpp,cuda`

- `dynemo-run out=llama_cpp --model-path ~/llm_models/Llama-3.2-3B-Instruct-Q6_K.gguf --model-config ~/llm_models/Llama-3.2-3B-Instruct/`
+- `dynamo-run out=llama_cpp --model-path ~/llm_models/Llama-3.2-3B-Instruct-Q6_K.gguf --model-config ~/llm_models/Llama-3.2-3B-Instruct/`

 The extra `--model-config` flag is because:
 - llama_cpp only runs GGUF
 - We send it tokens, meaning we do the tokenization ourself, so we need a tokenizer
 - We don't yet read it out of the GGUF (TODO), so we need an HF repo with `tokenizer.json` et al

-If the build step also builds llama_cpp libraries into `target/release` ("libllama.so", "libggml.so", "libggml-base.so", "libggml-cpu.so", "libggml-cuda.so"), then `dynemo-run` will need to find those at runtime. Set `LD_LIBRARY_PATH`, and be sure to deploy them alongside the `dynemo-run` binary.
+If the build step also builds llama_cpp libraries into `target/release` ("libllama.so", "libggml.so", "libggml-base.so", "libggml-cpu.so", "libggml-cuda.so"), then `dynamo-run` will need to find those at runtime. Set `LD_LIBRARY_PATH`, and be sure to deploy them alongside the `dynamo-run` binary.

 ## vllm

@@ -135,25 +135,25 @@ cargo build --release --features vllm

 Run (still inside that virtualenv) - HF repo:
 ```
-./target/release/dynemo-run in=http out=vllm --model-path ~/llm_models/Llama-3.2-3B-Instruct/
+./target/release/dynamo-run in=http out=vllm --model-path ~/llm_models/Llama-3.2-3B-Instruct/

 ```

 Run (still inside that virtualenv) - GGUF:
 ```
-./target/release/dynemo-run in=http out=vllm --model-path ~/llm_models/Llama-3.2-3B-Instruct-Q6_K.gguf --model-config ~/llm_models/Llama-3.2-3B-Instruct/
+./target/release/dynamo-run in=http out=vllm --model-path ~/llm_models/Llama-3.2-3B-Instruct-Q6_K.gguf --model-config ~/llm_models/Llama-3.2-3B-Instruct/
 ```

 + Multi-node:

 Node 1:
 ```
-dynemo-run in=text out=vllm ~/llm_models/Llama-3.2-3B-Instruct/ --tensor-parallel-size 8 --num-nodes 2 --leader-addr 10.217.98.122:6539 --node-rank 0
+dynamo-run in=text out=vllm ~/llm_models/Llama-3.2-3B-Instruct/ --tensor-parallel-size 8 --num-nodes 2 --leader-addr 10.217.98.122:6539 --node-rank 0
 ```

 Node 2:
 ```
-dynemo-run in=none out=vllm ~/llm_models/Llama-3.2-3B-Instruct/ --num-nodes 2 --leader-addr 10.217.98.122:6539 --node-rank 1
+dynamo-run in=none out=vllm ~/llm_models/Llama-3.2-3B-Instruct/ --num-nodes 2 --leader-addr 10.217.98.122:6539 --node-rank 1
 ```

 ## Python bring-your-own-engine
@@ -170,7 +170,7 @@ Build: `cargo build --release --features python`
 If the Python engine wants to receive and returns strings - it will do the prompt templating and tokenization itself - run it like this:

 ```
-dynemo-run out=pystr:/home/user/my_python_engine.py --name <model-name>
+dynamo-run out=pystr:/home/user/my_python_engine.py --name <model-name>
 ```

 - The `request` parameter is a map, an OpenAI compatible create chat completion request: https://platform.openai.com/docs/api-reference/chat/create
@@ -201,11 +201,11 @@ async def generate(request):
    yield {"id":"1","choices":[{"index":0,"delta":{"content":"","role":"assistant"},"finish_reason":"stop"}],"created":1841762283,"model":"Llama-3.2-1B-Instruct","system_fingerprint":"local","object":"chat.completion.chunk"}
 ```

-### Dynemo does the pre-processing
+### Dynamo does the pre-processing

 If the Python engine wants to receive and return tokens - the prompt templating and tokenization is already done - run it like this:
 ```
-dynemo-run out=pytok:/home/user/my_python_engine.py --model-path <hf-repo-checkout>
+dynamo-run out=pytok:/home/user/my_python_engine.py --model-path <hf-repo-checkout>
 ```

 - The request parameter is a map that looks like this:
@@ -251,7 +251,7 @@ cargo build --release --features trtllm

 Run:
 ```
-dynemo-run in=text out=trtllm --model-path /app/trtllm_engine/ --model-config ~/llm_models/Llama-3.2-3B-Instruct/
+dynamo-run in=text out=trtllm --model-path /app/trtllm_engine/ --model-config ~/llm_models/Llama-3.2-3B-Instruct/
 ```

 Note that TRT-LLM uses it's own `.engine` format for weights. Repo models must be converted like so:
@@ -269,7 +269,7 @@ python convert_checkpoint.py --model_dir /tmp/model/ --output_dir ./converted --
 trtllm-build --checkpoint_dir ./converted --output_dir ./final/trtllm_engine --use_paged_context_fmha enable --gemm_plugin auto
 ```

-The `--model-path` you give to `dynemo-run` must contain the `config.json` (TRT-LLM's , not the model's) and `rank0.engine` (plus other ranks if relevant).
+The `--model-path` you give to `dynamo-run` must contain the `config.json` (TRT-LLM's , not the model's) and `rank0.engine` (plus other ranks if relevant).

 + Execute
 TRT-LLM is a C++ library that must have been previously built and installed. It needs a lot of memory to compile. Gitlab builds a container you can try:

--- a/launch/dynemo-run/rust-toolchain.toml
+++ b/launch/dynemo-run/rust-toolchain.toml
--- a/launch/dynemo-run/src/flags.rs
+++ b/launch/dynemo-run/src/flags.rs
--- a/launch/dynemo-run/src/input.rs
+++ b/launch/dynemo-run/src/input.rs
--- a/launch/dynemo-run/src/input/endpoint.rs
+++ b/launch/dynemo-run/src/input/endpoint.rs
@@ -13,7 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-use dynemo_llm::{
+use dynamo_llm::{
    backend::Backend,
    http::service::discovery::ModelEntry,
    model_type::ModelType,
@@ -25,10 +25,10 @@ use dynemo_llm::{
        Annotated,
    },
 };
-use dynemo_runtime::pipeline::{
+use dynamo_runtime::pipeline::{
    network::Ingress, ManyOut, Operator, SegmentSource, ServiceBackend, SingleIn, Source,
 };
-use dynemo_runtime::{protocols::Endpoint, DistributedRuntime, Runtime};
+use dynamo_runtime::{protocols::Endpoint, DistributedRuntime, Runtime};

 use crate::EngineConfig;