chore: Make llama.cpp a default engine (#1177)

b889948c · Graham King · GitHub · 0df6d462 · b889948c · b889948c
Unverified Commit b889948c authored May 29, 2025 by Graham King Committed by GitHub May 29, 2025
6 changed files
--- a/container/Dockerfile.none
+++ b/container/Dockerfile.none
@@ -32,7 +32,7 @@ ENV RUSTUP_HOME=/usr/local/rustup \
 RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
    echo "a3339fb004c3d0bb9862ba0bce001861fe5cbde9c10d16591eb3f39ee6cd3e7f *rustup-init" | sha256sum -c - && \
    chmod +x rustup-init && \
-    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
+    ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME


--- a/container/Dockerfile.sglang
+++ b/container/Dockerfile.sglang
@@ -294,13 +294,11 @@ COPY components /workspace/components
 COPY launch /workspace/launch
 COPY deploy/sdk /workspace/deploy/sdk

-# Build Rust crate binaries packaged with the wheel
-RUN cargo build --release --locked --features mistralrs,python,dynamo-llm/block-manager \
-    -p dynamo-run \
-    -p llmctl \
-    # Multiple http named crates are present in dependencies, need to specify the path
-    -p file://$PWD/components/http \
-    -p metrics
+RUN cargo build \
+	--release \
+	--locked \
+	--features dynamo-llm/block-manager \
+	--workspace

 # Build dynamo wheel
 RUN uv build --wheel --out-dir /workspace/dist && \
@@ -322,21 +320,13 @@ ENV CARGO_TARGET_DIR=/workspace/target

 WORKDIR /workspace

-COPY --from=wheel_builder /workspace/dist/ /workspace/dist/
-COPY --from=wheel_builder /workspace/target/ /workspace/target/
+COPY --from=wheel_builder /workspace /workspace
 COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
 # Copy Cargo cache to avoid re-downloading dependencies
 COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

-COPY . /workspace
-
-# Build rest of the crates
-# Need to figure out rust caching to avoid rebuilding and remove exclude flags
-RUN cargo build --release --locked --features block-manager --workspace \
-    --exclude dynamo-run \
-    --exclude llmctl \
-    --exclude file://$PWD/components/http \
-    --exclude metrics
+# Build C bindings, creates lib/bindings/c/include
+RUN cd /workspace/lib/bindings/c && cargo build --release --locked

 # Package the bindings
 RUN mkdir -p /opt/dynamo/bindings/wheels && \

--- a/container/Dockerfile.tensorrt_llm
+++ b/container/Dockerfile.tensorrt_llm
@@ -204,7 +204,7 @@ ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
 RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
    # TODO: Add SHA check back based on RUSTARCH
    chmod +x rustup-init && \
-    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
+    ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME

@@ -273,13 +273,11 @@ COPY components /workspace/components
 COPY launch /workspace/launch
 COPY deploy/sdk /workspace/deploy/sdk

-# Build Rust crate binaries packaged with the wheel
-RUN cargo build --release --locked \
-    -p dynamo-run \
-    -p llmctl \
-    # Multiple http named crates are present in dependencies, need to specify the path
-    -p file://$PWD/components/http \
-    -p metrics
+RUN cargo build \
+	--release \
+	--locked \
+	--features dynamo-llm/block-manager \
+	--workspace

 # Build dynamo wheels
 RUN uv build --wheel --out-dir /workspace/dist && \
@@ -297,20 +295,13 @@ RUN uv build --wheel --out-dir /workspace/dist && \
 FROM build AS dev

 WORKDIR /workspace
-COPY --from=wheel_builder /workspace/dist/ /workspace/dist/
-COPY --from=wheel_builder /workspace/target/ /workspace/target/
+
+COPY --from=wheel_builder /workspace /workspace
 # Copy Cargo cache to avoid re-downloading dependencies
 COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

-COPY . /workspace
-
-# Build rest of the crates
-# Need to figure out rust caching to avoid rebuilding and remove exclude flags
-RUN cargo build --release --locked --workspace \
-    --exclude dynamo-run \
-    --exclude llmctl \
-    --exclude file://$PWD/components/http \
-    --exclude metrics
+# Build C bindings, creates lib/bindings/c/include
+RUN cd /workspace/lib/bindings/c && cargo build --release --locked

 # Package the bindings
 RUN mkdir -p /opt/dynamo/bindings/wheels && \

--- a/container/Dockerfile.vllm
+++ b/container/Dockerfile.vllm
@@ -100,9 +100,9 @@ RUN cd /usr/local/src && \
    ldconfig

 ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:$LD_LIBRARY_PATH
-ENV CPATH=/usr/include:$CPATH
+ENV CPATH=/usr/include
 ENV PATH=/usr/bin:$PATH
-ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH
+ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig
 SHELL ["/bin/bash", "-c"]

 WORKDIR /workspace
@@ -259,7 +259,7 @@ ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
 RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
    # TODO: Add SHA check back based on RUSTARCH
    chmod +x rustup-init && \
-    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
+    ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
    rm rustup-init && \
    chmod -R a+w $RUSTUP_HOME $CARGO_HOME

@@ -372,13 +372,11 @@ COPY components /workspace/components
 COPY launch /workspace/launch
 COPY deploy/sdk /workspace/deploy/sdk

-# Build Rust crate binaries packaged with the wheel
-RUN cargo build --release --locked --features dynamo-llm/block-manager \
-    -p dynamo-run \
-    -p llmctl \
-    # Multiple http named crates are present in dependencies, need to specify the path
-    -p file://$PWD/components/http \
-    -p metrics
+RUN cargo build \
+	--release \
+	--locked \
+	--features dynamo-llm/block-manager \
+	--workspace

 # Build dynamo wheel
 RUN uv build --wheel --out-dir /workspace/dist && \
@@ -400,21 +398,17 @@ ENV CARGO_TARGET_DIR=/workspace/target

 WORKDIR /workspace

-COPY --from=wheel_builder /workspace/dist/ /workspace/dist/
-COPY --from=wheel_builder /workspace/target/ /workspace/target/
+COPY --from=wheel_builder /workspace /workspace
 COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
 # Copy Cargo cache to avoid re-downloading dependencies
 COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME

-COPY . /workspace
-
-# Build rest of the crates
-# Need to figure out rust caching to avoid rebuilding and remove exclude flags
-RUN cargo build --release --locked --features block-manager --workspace \
-    --exclude dynamo-run \
-    --exclude llmctl \
-    --exclude file://$PWD/components/http \
-    --exclude metrics
+# Build C bindings, creates lib/bindings/c/include
+#
+# TODO: In theory the 'cargo build' in earlier stage covers this, we "just" need to copy the
+# `lib/bindings/c/include` folder that build.rs generated across.
+# I couldn't get that to work, hence TODO.
+RUN cd /workspace/lib/bindings/c && cargo build --release --locked

 # Package the bindings
 RUN mkdir -p /opt/dynamo/bindings/wheels && \

--- a/docs/guides/dynamo_run.md
+++ b/docs/guides/dynamo_run.md
@@ -305,10 +305,10 @@ If you have multiple GPUs, mistral.rs does automatic tensor parallelism. You do

 #### llamacpp

-Currently [llama.cpp](https://github.com/ggml-org/llama.cpp) is not included by default. Build it like this:
+[llama.cpp](https://github.com/ggml-org/llama.cpp) is built for CPU by default. For an optimized build pass the appropriate feature flag (highly recommended):

 ```
-cargo build --features llamacpp[,cuda|metal|vulkan] -p dynamo-run
+cargo build --features cuda|metal|vulkan -p dynamo-run
 ```

 ```

--- a/launch/dynamo-run/Cargo.toml
+++ b/launch/dynamo-run/Cargo.toml
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.

 [package]
 name = "dynamo-run"
@@ -26,7 +14,7 @@ description = "Dynamo Run CLI"

 [features]
 # Build with `--no-default-features` to disable these defaults
-default = ["mistralrs"]
+default = ["mistralrs", "llamacpp"]
 mistralrs = ["dep:dynamo-engine-mistralrs"]
 llamacpp = ["dep:dynamo-engine-llamacpp"]