Commit e5db9e86 authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: Move rust binaries out of examples, update nixl dockerfile (#89)


Co-authored-by: default avatarMeenakshi Sharma <163925564+nvda-mesharma@users.noreply.github.com>
parent e1a95dab
...@@ -37,3 +37,4 @@ ...@@ -37,3 +37,4 @@
**/*backup*/ **/*backup*/
.dockerignore .dockerignore
**/target/* **/target/*
**/*safetensors
\ No newline at end of file
...@@ -31,7 +31,7 @@ jobs: ...@@ -31,7 +31,7 @@ jobs:
matrix: matrix:
framework: framework:
- standard - standard
- vllm - vllm_nixl
name: Build and Test - ${{ matrix.framework }} name: Build and Test - ${{ matrix.framework }}
env: env:
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_${{ matrix.framework }} CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_${{ matrix.framework }}
...@@ -67,6 +67,8 @@ jobs: ...@@ -67,6 +67,8 @@ jobs:
fi fi
echo "cache_from_location=--cache-from \"type=registry,ref=${{ steps.define_image_tag.outputs.image_tag }}\"" >> $GITHUB_OUTPUT echo "cache_from_location=--cache-from \"type=registry,ref=${{ steps.define_image_tag.outputs.image_tag }}\"" >> $GITHUB_OUTPUT
- name: Build image - name: Build image
env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
run: | run: |
./container/build.sh --tag ${{ steps.define_image_tag.outputs.image_tag }} --framework ${{ matrix.framework }} ${{ steps.which_caches.outputs.cache_from_location }} ${{ steps.which_caches.outputs.cache_to_location }} ./container/build.sh --tag ${{ steps.define_image_tag.outputs.image_tag }} --framework ${{ matrix.framework }} ${{ steps.which_caches.outputs.cache_from_location }} ${{ steps.which_caches.outputs.cache_to_location }}
- name: Run pytest - name: Run pytest
......
...@@ -31,7 +31,7 @@ on: ...@@ -31,7 +31,7 @@ on:
- 'lib/llm/**' - 'lib/llm/**'
- 'lib/bindings/**' - 'lib/bindings/**'
- 'launch/**' - 'launch/**'
- 'examples/rust/**' - 'components/**'
- '**.rs' - '**.rs'
- 'Cargo.toml' - 'Cargo.toml'
- 'Cargo.lock' - 'Cargo.lock'
...@@ -40,7 +40,7 @@ jobs: ...@@ -40,7 +40,7 @@ jobs:
pre-merge-rust: pre-merge-rust:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: { dir: ['lib/runtime', 'lib/llm', 'lib/bindings/c', 'lib/bindings/python', 'launch/dynamo-run', 'components/metrics', 'examples/rust'] } matrix: { dir: ['lib/runtime', 'lib/llm', 'lib/bindings/c', 'lib/bindings/python', 'launch/dynamo-run', 'launch/llmctl', 'components', 'lib/runtime/examples'] }
permissions: permissions:
contents: read contents: read
steps: steps:
......
...@@ -46,6 +46,7 @@ jobs: ...@@ -46,6 +46,7 @@ jobs:
filters: | filters: |
vllm: vllm:
- 'container/Dockerfile.vllm' - 'container/Dockerfile.vllm'
- 'container/Dockerfile.vllm_nixl'
- 'examples/python/llm/**' - 'examples/python/llm/**'
- 'examples/python_rs/llm/**' - 'examples/python_rs/llm/**'
- 'container/deps/requirements.vllm.txt' - 'container/deps/requirements.vllm.txt'
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[workspace]
members = [
"http",
"metrics",
]
resolver = "2"
[workspace.package]
version = "0.1.0"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/ai-dynamo/dynamo"
repository = "https://github.com/ai-dynamo/dynamo.git"
[workspace.dependencies]
# local or crates.io
dynamo-runtime = { path = "../lib/runtime" }
dynamo-llm = { path = "../lib/llm" }
# crates.io
anyhow = { version = "1" }
async-stream = { version = "0.3" }
async-trait = { version = "0.1" }
bytes = "1"
derive_builder = "0.20"
futures = "0.3"
serde = { version = "1", features = ["derive"] }
serde_json = { version = "1" }
thiserror = { version = "2.0.11" }
tokio = { version = "1", features = ["full"] }
tokio-stream = { version = "0.1" }
tokio-util = { version = "0.7", features = ["codec", "net"] }
tracing = { version = "0.1" }
validator = { version = "0.20.0", features = ["derive"] }
uuid = { version = "1", features = ["v4", "serde"] }
...@@ -15,32 +15,34 @@ ...@@ -15,32 +15,34 @@
[package] [package]
name = "metrics" name = "metrics"
version = "0.1.0" version.workspace = true
edition = "2021" edition.workspace = true
license = "Apache-2.0" authors.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true
[dependencies] [dependencies]
# local # local
dynamo-runtime.workspace = true
dynamo-runtime = { path = "../../lib/runtime" } dynamo-llm.workspace = true
dynamo-llm = { path = "../../lib/llm" }
# workspace - todo # workspace - todo
# crates.io # crates.io
serde.workspace = true
serde_json.workspace = true
tokio.workspace = true
tracing.workspace = true
futures.workspace = true
async-nats = { version = "0.38", features = ["service"] } async-nats = { version = "0.38", features = ["service"] }
clap = { version = "4.5", features = ["derive", "env"] } clap = { version = "4.5", features = ["derive", "env"] }
serde = { version = "1", features = ["derive"] }
serde_json = { version = "1" }
tokio = { version = "1", features = ["full"] }
tracing = { version = "0.1" }
thiserror = "1.0" thiserror = "1.0"
opentelemetry = "0.20" opentelemetry = "0.20"
opentelemetry-prometheus = "0.13" opentelemetry-prometheus = "0.13"
prometheus = "0.13" prometheus = "0.13"
rand = "0.8" rand = "0.8"
axum = "0.6" axum = "0.6"
futures = "0.3"
[dev-dependencies] [dev-dependencies]
reqwest = { version = "0.11", features = ["blocking"] } reqwest = { version = "0.11", features = ["blocking"] }
...@@ -133,20 +133,25 @@ RUN cd lib/runtime && \ ...@@ -133,20 +133,25 @@ RUN cd lib/runtime && \
# Build OpenAI HTTP Service binaries # Build OpenAI HTTP Service binaries
COPY lib/llm /workspace/lib/llm COPY lib/llm /workspace/lib/llm
COPY examples/rust /workspace/examples/rust COPY components /workspace/components
RUN cd examples/rust && \ RUN cd components && \
cargo build --release && \ cargo build --release && \
cp target/release/http /usr/local/bin/ && \ cp target/release/http /usr/local/bin/
cp target/release/llmctl /usr/local/bin/
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
# Build Dynamo Run binaries
COPY launch /workspace/launch
RUN cd launch && \
cargo build --release --features mistralrs,sglang,vllm,python && \
cp target/release/dynamo-run /usr/local/bin/ && \
cp target/release/llmctl /usr/local/bin/
# Generate C bindings. Note that this is required for TRTLLM backend re-build # Generate C bindings. Note that this is required for TRTLLM backend re-build
COPY lib/bindings /workspace/lib/bindings COPY lib/bindings /workspace/lib/bindings
RUN cd lib/bindings/c/ && \ RUN cd lib/bindings/c/ && \
cargo build --release --locked && cargo doc --no-deps cargo build --release --locked && cargo doc --no-deps
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
# Install uv, create virtualenv for general use, and build dynamo wheel # Install uv, create virtualenv for general use, and build dynamo wheel
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN mkdir /opt/dynamo && \ RUN mkdir /opt/dynamo && \
......
...@@ -86,14 +86,22 @@ RUN cd lib/runtime && \ ...@@ -86,14 +86,22 @@ RUN cd lib/runtime && \
# Build OpenAI HTTP Service binaries # Build OpenAI HTTP Service binaries
COPY lib/llm /workspace/lib/llm COPY lib/llm /workspace/lib/llm
COPY examples/rust /workspace/examples/rust COPY components /workspace/components
RUN cd examples/rust && \ RUN cd components && \
cargo build --release && \ cargo build --release && \
cp target/release/http /usr/local/bin/ && \ cp target/release/http /usr/local/bin/
# Build Dynamo Run binaries
COPY launch /workspace/launch
RUN cd launch && \
cargo build --release --features mistralrs,sglang,vllm,python && \
cp target/release/dynamo-run /usr/local/bin/ && \
cp target/release/llmctl /usr/local/bin/ cp target/release/llmctl /usr/local/bin/
# TODO: Build dynamo-run # Generate C bindings for kv cache routing in vLLM
# COPY applications/... COPY lib/bindings /workspace/lib/bindings
RUN cd lib/bindings/c && \
cargo build --release --locked && cargo doc --no-deps
# Generate C bindings for kv cache routing in vLLM # Generate C bindings for kv cache routing in vLLM
COPY lib/bindings /workspace/lib/bindings COPY lib/bindings /workspace/lib/bindings
......
...@@ -216,14 +216,17 @@ RUN cd lib/runtime && \ ...@@ -216,14 +216,17 @@ RUN cd lib/runtime && \
# Build OpenAI HTTP Service binaries # Build OpenAI HTTP Service binaries
COPY lib/llm /workspace/lib/llm COPY lib/llm /workspace/lib/llm
COPY examples/rust /workspace/examples/rust COPY components /workspace/components
RUN cd examples/rust && \ RUN cd components && \
cargo build --release && \ cargo build --release && \
cp target/release/http /usr/local/bin/ && \ cp target/release/http /usr/local/bin/
cp target/release/llmctl /usr/local/bin/
# TODO: Build tio # Build Dynamo Run binaries
# COPY applications/... COPY launch /workspace/launch
RUN cd launch && \
cargo build --release --features mistralrs,sglang,vllm,python && \
cp target/release/dynamo-run /usr/local/bin/ && \
cp target/release/llmctl /usr/local/bin/
# Generate C bindings for kv cache routing in vLLM # Generate C bindings for kv cache routing in vLLM
COPY lib/bindings /workspace/lib/bindings COPY lib/bindings /workspace/lib/bindings
...@@ -257,50 +260,5 @@ ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ...@@ -257,50 +260,5 @@ ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
### Lean Runtime Image Stage ### ### TODO Lean Runtime Image Stage ###
# FIXME: Separate build and runtime images
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS runtime
USER root
# Install tools for interactive convenience
RUN apt update -y && \
apt install -y curl tmux vim && \
echo "set -g mouse on" >> /root/.tmux.conf
# Set environment variables
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
# Copy binaries
COPY --from=dev /usr/local/bin/http /usr/local/bin/http
COPY --from=dev /usr/local/bin/llmctl /usr/local/bin/llmctl
COPY --from=dev /usr/local/bin/etcd/etcd /usr/local/bin/etcd
COPY --from=dev /usr/bin/nats-server /usr/local/bin/nats-server
COPY --from=dev /bin/uv /usr/local/bin/uv
COPY --from=dev /bin/uvx /usr/local/bin/uvx
# Copy venv with installed packages
RUN uv python install 3.12
COPY --from=dev /opt/vllm /opt/vllm
COPY --from=dev ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# Copy minimal set of files for testing. May consider separate stage for testing
# if test dependencies start to negatively impact deployment environment/size.
COPY pyproject.toml /workspace/pyproject.toml
COPY container/deps/vllm /workspace/container/deps/vllm
# Add library for KV routing
COPY --from=dev ${VLLM_KV_CAPI_PATH} ${VLLM_KV_CAPI_PATH}
# Copy minimal set of files for deployment/examples
# FIXME: Use a more consolidated path after directory restructure
COPY examples/python_rs/llm/vllm_nixl /workspace/examples/python_rs/llm/vllm_nixl
WORKDIR /workspace
# FIXME: May want a modification with dynamo banner on entry
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
...@@ -67,6 +67,9 @@ VLLM_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" ...@@ -67,6 +67,9 @@ VLLM_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
VLLM_NIXL_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" VLLM_NIXL_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
VLLM_NIXL_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" VLLM_NIXL_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
NIXL_COMMIT=3ce6a673b266b4f293909ceb17ca7975f1ba5cd7
NIXL_REPO=ai-dynamo/nixl.git
get_options() { get_options() {
while :; do while :; do
case $1 in case $1 in
...@@ -290,6 +293,31 @@ elif [[ $FRAMEWORK == "TENSORRTLLM" ]]; then ...@@ -290,6 +293,31 @@ elif [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.tensorrt_llm DOCKERFILE=${SOURCE_DIR}/Dockerfile.tensorrt_llm
fi fi
if [[ $FRAMEWORK == "VLLM_NIXL" ]]; then
TEMP_DIR=$(mktemp -d)
# Clean up temp directory on script exit
trap 'rm -rf "$TEMP_DIR"' EXIT
# Clone original NIXL to temp directory
if [ ! -z ${GITHUB_TOKEN} ]; then
git clone https://oauth2:${GITHUB_TOKEN}@github.com/${NIXL_REPO} "$TEMP_DIR/nixl_src"
else
# Try HTTPS first with credential prompting disabled, fall back to SSH if it fails
if ! GIT_TERMINAL_PROMPT=0 git clone https://github.com/${NIXL_REPO} "$TEMP_DIR/nixl_src"; then
echo "HTTPS clone failed, falling back to SSH..."
git clone git@github.com:${NIXL_REPO} "$TEMP_DIR/nixl_src"
fi
fi
cd "$TEMP_DIR/nixl_src"
git checkout ${NIXL_COMMIT}
BUILD_CONTEXT_ARG+=" --build-context nixl=$TEMP_DIR/nixl_src"
fi
# BUILD DEV IMAGE # BUILD DEV IMAGE
BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG --build-arg FRAMEWORK=$FRAMEWORK --build-arg ${FRAMEWORK}_FRAMEWORK=1 --build-arg VERSION=$VERSION --build-arg PYTHON_PACKAGE_VERSION=$PYTHON_PACKAGE_VERSION" BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG --build-arg FRAMEWORK=$FRAMEWORK --build-arg ${FRAMEWORK}_FRAMEWORK=1 --build-arg VERSION=$VERSION --build-arg PYTHON_PACKAGE_VERSION=$PYTHON_PACKAGE_VERSION"
......
This diff is collapsed.
...@@ -15,10 +15,9 @@ ...@@ -15,10 +15,9 @@
[workspace] [workspace]
members = [ members = [
"hello_world",
"http",
"llmctl", "llmctl",
"service_metrics", "dynamo-run"
] ]
resolver = "2" resolver = "2"
...@@ -33,8 +32,8 @@ repository = "https://github.com/ai-dynamo/dynamo.git" ...@@ -33,8 +32,8 @@ repository = "https://github.com/ai-dynamo/dynamo.git"
[workspace.dependencies] [workspace.dependencies]
# local or crates.io # local or crates.io
dynamo-runtime = { path = "../../lib/runtime" } dynamo-runtime = { path = "../lib/runtime" }
dynamo-llm = { path = "../../lib/llm" } dynamo-llm = { path = "../lib/llm" }
# crates.io # crates.io
anyhow = { version = "1" } anyhow = { version = "1" }
......
...@@ -15,21 +15,20 @@ ...@@ -15,21 +15,20 @@
[package] [package]
name = "llmctl" name = "llmctl"
version.workspace = true version = "0.1.0"
edition.workspace = true edition = "2021"
authors.workspace = true authors = ["NVIDIA"]
license.workspace = true license = "Apache-2.0"
homepage.workspace = true homepage = "https://github.com/ai-dynamo/dynamo"
repository.workspace = true repository = "https://github.com/ai-dynamo/dynamo.git"
[dependencies] [dependencies]
dynamo-runtime = { workspace = true} dynamo-runtime = { path = "../../lib/runtime" }
dynamo-llm = { workspace = true} dynamo-llm = { path = "../../lib/llm" }
serde = { workspace = true }
serde_json = { workspace = true }
tracing = { workspace = true }
tokio = { workspace = true }
serde = { version = "1", features = ["derive"] }
serde_json = { version = "1" }
tracing = { version = "0.1" }
tokio = { version = "1", features = ["full"] }
clap = { version = "4.5", features = ["derive"] } clap = { version = "4.5", features = ["derive"] }
tabled = "0.18" tabled = "0.18"
\ No newline at end of file
This diff is collapsed.
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[workspace]
members = [
"hello_world",
"service_metrics",
]
resolver = "2"
[workspace.package]
version = "0.1.0"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/ai-dynamo/dynamo"
repository = "https://github.com/ai-dynamo/dynamo.git"
[workspace.dependencies]
# local or crates.io
dynamo-runtime = { path = "../" }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment