Unverified Commit b889948c authored by Graham King's avatar Graham King Committed by GitHub
Browse files

chore: Make llama.cpp a default engine (#1177)

parent 0df6d462
...@@ -32,7 +32,7 @@ ENV RUSTUP_HOME=/usr/local/rustup \ ...@@ -32,7 +32,7 @@ ENV RUSTUP_HOME=/usr/local/rustup \
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
echo "a3339fb004c3d0bb9862ba0bce001861fe5cbde9c10d16591eb3f39ee6cd3e7f *rustup-init" | sha256sum -c - && \ echo "a3339fb004c3d0bb9862ba0bce001861fe5cbde9c10d16591eb3f39ee6cd3e7f *rustup-init" | sha256sum -c - && \
chmod +x rustup-init && \ chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
rm rustup-init && \ rm rustup-init && \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME chmod -R a+w $RUSTUP_HOME $CARGO_HOME
......
...@@ -294,13 +294,11 @@ COPY components /workspace/components ...@@ -294,13 +294,11 @@ COPY components /workspace/components
COPY launch /workspace/launch COPY launch /workspace/launch
COPY deploy/sdk /workspace/deploy/sdk COPY deploy/sdk /workspace/deploy/sdk
# Build Rust crate binaries packaged with the wheel RUN cargo build \
RUN cargo build --release --locked --features mistralrs,python,dynamo-llm/block-manager \ --release \
-p dynamo-run \ --locked \
-p llmctl \ --features dynamo-llm/block-manager \
# Multiple http named crates are present in dependencies, need to specify the path --workspace
-p file://$PWD/components/http \
-p metrics
# Build dynamo wheel # Build dynamo wheel
RUN uv build --wheel --out-dir /workspace/dist && \ RUN uv build --wheel --out-dir /workspace/dist && \
...@@ -322,21 +320,13 @@ ENV CARGO_TARGET_DIR=/workspace/target ...@@ -322,21 +320,13 @@ ENV CARGO_TARGET_DIR=/workspace/target
WORKDIR /workspace WORKDIR /workspace
COPY --from=wheel_builder /workspace/dist/ /workspace/dist/ COPY --from=wheel_builder /workspace /workspace
COPY --from=wheel_builder /workspace/target/ /workspace/target/
COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
# Copy Cargo cache to avoid re-downloading dependencies # Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
COPY . /workspace # Build C bindings, creates lib/bindings/c/include
RUN cd /workspace/lib/bindings/c && cargo build --release --locked
# Build rest of the crates
# Need to figure out rust caching to avoid rebuilding and remove exclude flags
RUN cargo build --release --locked --features block-manager --workspace \
--exclude dynamo-run \
--exclude llmctl \
--exclude file://$PWD/components/http \
--exclude metrics
# Package the bindings # Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \ RUN mkdir -p /opt/dynamo/bindings/wheels && \
......
...@@ -204,7 +204,7 @@ ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu ...@@ -204,7 +204,7 @@ ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
# TODO: Add SHA check back based on RUSTARCH # TODO: Add SHA check back based on RUSTARCH
chmod +x rustup-init && \ chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
rm rustup-init && \ rm rustup-init && \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME chmod -R a+w $RUSTUP_HOME $CARGO_HOME
...@@ -273,13 +273,11 @@ COPY components /workspace/components ...@@ -273,13 +273,11 @@ COPY components /workspace/components
COPY launch /workspace/launch COPY launch /workspace/launch
COPY deploy/sdk /workspace/deploy/sdk COPY deploy/sdk /workspace/deploy/sdk
# Build Rust crate binaries packaged with the wheel RUN cargo build \
RUN cargo build --release --locked \ --release \
-p dynamo-run \ --locked \
-p llmctl \ --features dynamo-llm/block-manager \
# Multiple http named crates are present in dependencies, need to specify the path --workspace
-p file://$PWD/components/http \
-p metrics
# Build dynamo wheels # Build dynamo wheels
RUN uv build --wheel --out-dir /workspace/dist && \ RUN uv build --wheel --out-dir /workspace/dist && \
...@@ -297,20 +295,13 @@ RUN uv build --wheel --out-dir /workspace/dist && \ ...@@ -297,20 +295,13 @@ RUN uv build --wheel --out-dir /workspace/dist && \
FROM build AS dev FROM build AS dev
WORKDIR /workspace WORKDIR /workspace
COPY --from=wheel_builder /workspace/dist/ /workspace/dist/
COPY --from=wheel_builder /workspace/target/ /workspace/target/ COPY --from=wheel_builder /workspace /workspace
# Copy Cargo cache to avoid re-downloading dependencies # Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
COPY . /workspace # Build C bindings, creates lib/bindings/c/include
RUN cd /workspace/lib/bindings/c && cargo build --release --locked
# Build rest of the crates
# Need to figure out rust caching to avoid rebuilding and remove exclude flags
RUN cargo build --release --locked --workspace \
--exclude dynamo-run \
--exclude llmctl \
--exclude file://$PWD/components/http \
--exclude metrics
# Package the bindings # Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \ RUN mkdir -p /opt/dynamo/bindings/wheels && \
......
...@@ -100,9 +100,9 @@ RUN cd /usr/local/src && \ ...@@ -100,9 +100,9 @@ RUN cd /usr/local/src && \
ldconfig ldconfig
ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:$LD_LIBRARY_PATH ENV LD_LIBRARY_PATH=/usr/lib:/usr/local/ucx/lib:$LD_LIBRARY_PATH
ENV CPATH=/usr/include:$CPATH ENV CPATH=/usr/include
ENV PATH=/usr/bin:$PATH ENV PATH=/usr/bin:$PATH
ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig
SHELL ["/bin/bash", "-c"] SHELL ["/bin/bash", "-c"]
WORKDIR /workspace WORKDIR /workspace
...@@ -259,7 +259,7 @@ ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu ...@@ -259,7 +259,7 @@ ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
# TODO: Add SHA check back based on RUSTARCH # TODO: Add SHA check back based on RUSTARCH
chmod +x rustup-init && \ chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
rm rustup-init && \ rm rustup-init && \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME chmod -R a+w $RUSTUP_HOME $CARGO_HOME
...@@ -372,13 +372,11 @@ COPY components /workspace/components ...@@ -372,13 +372,11 @@ COPY components /workspace/components
COPY launch /workspace/launch COPY launch /workspace/launch
COPY deploy/sdk /workspace/deploy/sdk COPY deploy/sdk /workspace/deploy/sdk
# Build Rust crate binaries packaged with the wheel RUN cargo build \
RUN cargo build --release --locked --features dynamo-llm/block-manager \ --release \
-p dynamo-run \ --locked \
-p llmctl \ --features dynamo-llm/block-manager \
# Multiple http named crates are present in dependencies, need to specify the path --workspace
-p file://$PWD/components/http \
-p metrics
# Build dynamo wheel # Build dynamo wheel
RUN uv build --wheel --out-dir /workspace/dist && \ RUN uv build --wheel --out-dir /workspace/dist && \
...@@ -400,21 +398,17 @@ ENV CARGO_TARGET_DIR=/workspace/target ...@@ -400,21 +398,17 @@ ENV CARGO_TARGET_DIR=/workspace/target
WORKDIR /workspace WORKDIR /workspace
COPY --from=wheel_builder /workspace/dist/ /workspace/dist/ COPY --from=wheel_builder /workspace /workspace
COPY --from=wheel_builder /workspace/target/ /workspace/target/
COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl COPY --from=wheel_builder /opt/nvidia/nvda_nixl /opt/nvidia/nvda_nixl
# Copy Cargo cache to avoid re-downloading dependencies # Copy Cargo cache to avoid re-downloading dependencies
COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME
COPY . /workspace # Build C bindings, creates lib/bindings/c/include
#
# Build rest of the crates # TODO: In theory the 'cargo build' in earlier stage covers this, we "just" need to copy the
# Need to figure out rust caching to avoid rebuilding and remove exclude flags # `lib/bindings/c/include` folder that build.rs generated across.
RUN cargo build --release --locked --features block-manager --workspace \ # I couldn't get that to work, hence TODO.
--exclude dynamo-run \ RUN cd /workspace/lib/bindings/c && cargo build --release --locked
--exclude llmctl \
--exclude file://$PWD/components/http \
--exclude metrics
# Package the bindings # Package the bindings
RUN mkdir -p /opt/dynamo/bindings/wheels && \ RUN mkdir -p /opt/dynamo/bindings/wheels && \
......
...@@ -305,10 +305,10 @@ If you have multiple GPUs, mistral.rs does automatic tensor parallelism. You do ...@@ -305,10 +305,10 @@ If you have multiple GPUs, mistral.rs does automatic tensor parallelism. You do
#### llamacpp #### llamacpp
Currently [llama.cpp](https://github.com/ggml-org/llama.cpp) is not included by default. Build it like this: [llama.cpp](https://github.com/ggml-org/llama.cpp) is built for CPU by default. For an optimized build pass the appropriate feature flag (highly recommended):
``` ```
cargo build --features llamacpp[,cuda|metal|vulkan] -p dynamo-run cargo build --features cuda|metal|vulkan -p dynamo-run
``` ```
``` ```
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[package] [package]
name = "dynamo-run" name = "dynamo-run"
...@@ -26,7 +14,7 @@ description = "Dynamo Run CLI" ...@@ -26,7 +14,7 @@ description = "Dynamo Run CLI"
[features] [features]
# Build with `--no-default-features` to disable these defaults # Build with `--no-default-features` to disable these defaults
default = ["mistralrs"] default = ["mistralrs", "llamacpp"]
mistralrs = ["dep:dynamo-engine-mistralrs"] mistralrs = ["dep:dynamo-engine-mistralrs"]
llamacpp = ["dep:dynamo-engine-llamacpp"] llamacpp = ["dep:dynamo-engine-llamacpp"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment