Commit e5db9e86 authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

refactor: Move rust binaries out of examples, update nixl dockerfile (#89)


Co-authored-by: default avatarMeenakshi Sharma <163925564+nvda-mesharma@users.noreply.github.com>
parent e1a95dab
......@@ -36,4 +36,5 @@
**/.github
**/*backup*/
.dockerignore
**/target/*
\ No newline at end of file
**/target/*
**/*safetensors
\ No newline at end of file
......@@ -31,7 +31,7 @@ jobs:
matrix:
framework:
- standard
- vllm
- vllm_nixl
name: Build and Test - ${{ matrix.framework }}
env:
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_${{ matrix.framework }}
......@@ -67,6 +67,8 @@ jobs:
fi
echo "cache_from_location=--cache-from \"type=registry,ref=${{ steps.define_image_tag.outputs.image_tag }}\"" >> $GITHUB_OUTPUT
- name: Build image
env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
run: |
./container/build.sh --tag ${{ steps.define_image_tag.outputs.image_tag }} --framework ${{ matrix.framework }} ${{ steps.which_caches.outputs.cache_from_location }} ${{ steps.which_caches.outputs.cache_to_location }}
- name: Run pytest
......@@ -96,4 +98,4 @@ jobs:
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
with:
name: Event File
path: ${{ github.event_path }}
\ No newline at end of file
path: ${{ github.event_path }}
......@@ -31,7 +31,7 @@ on:
- 'lib/llm/**'
- 'lib/bindings/**'
- 'launch/**'
- 'examples/rust/**'
- 'components/**'
- '**.rs'
- 'Cargo.toml'
- 'Cargo.lock'
......@@ -40,7 +40,7 @@ jobs:
pre-merge-rust:
runs-on: ubuntu-latest
strategy:
matrix: { dir: ['lib/runtime', 'lib/llm', 'lib/bindings/c', 'lib/bindings/python', 'launch/dynamo-run', 'components/metrics', 'examples/rust'] }
matrix: { dir: ['lib/runtime', 'lib/llm', 'lib/bindings/c', 'lib/bindings/python', 'launch/dynamo-run', 'launch/llmctl', 'components', 'lib/runtime/examples'] }
permissions:
contents: read
steps:
......
......@@ -46,6 +46,7 @@ jobs:
filters: |
vllm:
- 'container/Dockerfile.vllm'
- 'container/Dockerfile.vllm_nixl'
- 'examples/python/llm/**'
- 'examples/python_rs/llm/**'
- 'container/deps/requirements.vllm.txt'
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[workspace]
members = [
"http",
"metrics",
]
resolver = "2"
[workspace.package]
version = "0.1.0"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/ai-dynamo/dynamo"
repository = "https://github.com/ai-dynamo/dynamo.git"
[workspace.dependencies]
# local or crates.io
dynamo-runtime = { path = "../lib/runtime" }
dynamo-llm = { path = "../lib/llm" }
# crates.io
anyhow = { version = "1" }
async-stream = { version = "0.3" }
async-trait = { version = "0.1" }
bytes = "1"
derive_builder = "0.20"
futures = "0.3"
serde = { version = "1", features = ["derive"] }
serde_json = { version = "1" }
thiserror = { version = "2.0.11" }
tokio = { version = "1", features = ["full"] }
tokio-stream = { version = "0.1" }
tokio-util = { version = "0.7", features = ["codec", "net"] }
tracing = { version = "0.1" }
validator = { version = "0.20.0", features = ["derive"] }
uuid = { version = "1", features = ["v4", "serde"] }
......@@ -15,32 +15,34 @@
[package]
name = "metrics"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true
[dependencies]
# local
dynamo-runtime = { path = "../../lib/runtime" }
dynamo-llm = { path = "../../lib/llm" }
dynamo-runtime.workspace = true
dynamo-llm.workspace = true
# workspace - todo
# crates.io
serde.workspace = true
serde_json.workspace = true
tokio.workspace = true
tracing.workspace = true
futures.workspace = true
async-nats = { version = "0.38", features = ["service"] }
clap = { version = "4.5", features = ["derive", "env"] }
serde = { version = "1", features = ["derive"] }
serde_json = { version = "1" }
tokio = { version = "1", features = ["full"] }
tracing = { version = "0.1" }
thiserror = "1.0"
opentelemetry = "0.20"
opentelemetry-prometheus = "0.13"
prometheus = "0.13"
rand = "0.8"
axum = "0.6"
futures = "0.3"
[dev-dependencies]
reqwest = { version = "0.11", features = ["blocking"] }
......@@ -133,20 +133,25 @@ RUN cd lib/runtime && \
# Build OpenAI HTTP Service binaries
COPY lib/llm /workspace/lib/llm
COPY examples/rust /workspace/examples/rust
RUN cd examples/rust && \
COPY components /workspace/components
RUN cd components && \
cargo build --release && \
cp target/release/http /usr/local/bin/ && \
cp target/release/llmctl /usr/local/bin/
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
cp target/release/http /usr/local/bin/
# Build Dynamo Run binaries
COPY launch /workspace/launch
RUN cd launch && \
cargo build --release --features mistralrs,sglang,vllm,python && \
cp target/release/dynamo-run /usr/local/bin/ && \
cp target/release/llmctl /usr/local/bin/
# Generate C bindings. Note that this is required for TRTLLM backend re-build
COPY lib/bindings /workspace/lib/bindings
RUN cd lib/bindings/c/ && \
cargo build --release --locked && cargo doc --no-deps
COPY deploy/dynamo/sdk /workspace/deploy/dynamo/sdk
# Install uv, create virtualenv for general use, and build dynamo wheel
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
RUN mkdir /opt/dynamo && \
......
......@@ -86,14 +86,22 @@ RUN cd lib/runtime && \
# Build OpenAI HTTP Service binaries
COPY lib/llm /workspace/lib/llm
COPY examples/rust /workspace/examples/rust
RUN cd examples/rust && \
COPY components /workspace/components
RUN cd components && \
cargo build --release && \
cp target/release/http /usr/local/bin/ && \
cp target/release/http /usr/local/bin/
# Build Dynamo Run binaries
COPY launch /workspace/launch
RUN cd launch && \
cargo build --release --features mistralrs,sglang,vllm,python && \
cp target/release/dynamo-run /usr/local/bin/ && \
cp target/release/llmctl /usr/local/bin/
# TODO: Build dynamo-run
# COPY applications/...
# Generate C bindings for kv cache routing in vLLM
COPY lib/bindings /workspace/lib/bindings
RUN cd lib/bindings/c && \
cargo build --release --locked && cargo doc --no-deps
# Generate C bindings for kv cache routing in vLLM
COPY lib/bindings /workspace/lib/bindings
......
......@@ -216,14 +216,17 @@ RUN cd lib/runtime && \
# Build OpenAI HTTP Service binaries
COPY lib/llm /workspace/lib/llm
COPY examples/rust /workspace/examples/rust
RUN cd examples/rust && \
COPY components /workspace/components
RUN cd components && \
cargo build --release && \
cp target/release/http /usr/local/bin/ && \
cp target/release/llmctl /usr/local/bin/
cp target/release/http /usr/local/bin/
# TODO: Build tio
# COPY applications/...
# Build Dynamo Run binaries
COPY launch /workspace/launch
RUN cd launch && \
cargo build --release --features mistralrs,sglang,vllm,python && \
cp target/release/dynamo-run /usr/local/bin/ && \
cp target/release/llmctl /usr/local/bin/
# Generate C bindings for kv cache routing in vLLM
COPY lib/bindings /workspace/lib/bindings
......@@ -257,50 +260,5 @@ ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
### Lean Runtime Image Stage ###
# FIXME: Separate build and runtime images
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS runtime
USER root
# Install tools for interactive convenience
RUN apt update -y && \
apt install -y curl tmux vim && \
echo "set -g mouse on" >> /root/.tmux.conf
# Set environment variables
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
ENV RAPIDS_LIBUCX_PREFER_SYSTEM_LIBRARY=true
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
# Copy binaries
COPY --from=dev /usr/local/bin/http /usr/local/bin/http
COPY --from=dev /usr/local/bin/llmctl /usr/local/bin/llmctl
COPY --from=dev /usr/local/bin/etcd/etcd /usr/local/bin/etcd
COPY --from=dev /usr/bin/nats-server /usr/local/bin/nats-server
COPY --from=dev /bin/uv /usr/local/bin/uv
COPY --from=dev /bin/uvx /usr/local/bin/uvx
# Copy venv with installed packages
RUN uv python install 3.12
COPY --from=dev /opt/vllm /opt/vllm
COPY --from=dev ${VIRTUAL_ENV} ${VIRTUAL_ENV}
# Copy minimal set of files for testing. May consider separate stage for testing
# if test dependencies start to negatively impact deployment environment/size.
COPY pyproject.toml /workspace/pyproject.toml
COPY container/deps/vllm /workspace/container/deps/vllm
# Add library for KV routing
COPY --from=dev ${VLLM_KV_CAPI_PATH} ${VLLM_KV_CAPI_PATH}
# Copy minimal set of files for deployment/examples
# FIXME: Use a more consolidated path after directory restructure
COPY examples/python_rs/llm/vllm_nixl /workspace/examples/python_rs/llm/vllm_nixl
WORKDIR /workspace
### TODO Lean Runtime Image Stage ###
# FIXME: May want a modification with dynamo banner on entry
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
......@@ -67,6 +67,9 @@ VLLM_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
VLLM_NIXL_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
VLLM_NIXL_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
NIXL_COMMIT=3ce6a673b266b4f293909ceb17ca7975f1ba5cd7
NIXL_REPO=ai-dynamo/nixl.git
get_options() {
while :; do
case $1 in
......@@ -290,6 +293,31 @@ elif [[ $FRAMEWORK == "TENSORRTLLM" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.tensorrt_llm
fi
if [[ $FRAMEWORK == "VLLM_NIXL" ]]; then
TEMP_DIR=$(mktemp -d)
# Clean up temp directory on script exit
trap 'rm -rf "$TEMP_DIR"' EXIT
# Clone original NIXL to temp directory
if [ ! -z ${GITHUB_TOKEN} ]; then
git clone https://oauth2:${GITHUB_TOKEN}@github.com/${NIXL_REPO} "$TEMP_DIR/nixl_src"
else
# Try HTTPS first with credential prompting disabled, fall back to SSH if it fails
if ! GIT_TERMINAL_PROMPT=0 git clone https://github.com/${NIXL_REPO} "$TEMP_DIR/nixl_src"; then
echo "HTTPS clone failed, falling back to SSH..."
git clone git@github.com:${NIXL_REPO} "$TEMP_DIR/nixl_src"
fi
fi
cd "$TEMP_DIR/nixl_src"
git checkout ${NIXL_COMMIT}
BUILD_CONTEXT_ARG+=" --build-context nixl=$TEMP_DIR/nixl_src"
fi
# BUILD DEV IMAGE
BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG --build-arg FRAMEWORK=$FRAMEWORK --build-arg ${FRAMEWORK}_FRAMEWORK=1 --build-arg VERSION=$VERSION --build-arg PYTHON_PACKAGE_VERSION=$PYTHON_PACKAGE_VERSION"
......
This diff is collapsed.
......@@ -15,10 +15,9 @@
[workspace]
members = [
"hello_world",
"http",
"llmctl",
"service_metrics",
"dynamo-run"
]
resolver = "2"
......@@ -33,8 +32,8 @@ repository = "https://github.com/ai-dynamo/dynamo.git"
[workspace.dependencies]
# local or crates.io
dynamo-runtime = { path = "../../lib/runtime" }
dynamo-llm = { path = "../../lib/llm" }
dynamo-runtime = { path = "../lib/runtime" }
dynamo-llm = { path = "../lib/llm" }
# crates.io
anyhow = { version = "1" }
......
......@@ -15,21 +15,20 @@
[package]
name = "llmctl"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
homepage.workspace = true
repository.workspace = true
version = "0.1.0"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/ai-dynamo/dynamo"
repository = "https://github.com/ai-dynamo/dynamo.git"
[dependencies]
dynamo-runtime = { workspace = true}
dynamo-llm = { workspace = true}
serde = { workspace = true }
serde_json = { workspace = true }
tracing = { workspace = true }
tokio = { workspace = true }
dynamo-runtime = { path = "../../lib/runtime" }
dynamo-llm = { path = "../../lib/llm" }
serde = { version = "1", features = ["derive"] }
serde_json = { version = "1" }
tracing = { version = "0.1" }
tokio = { version = "1", features = ["full"] }
clap = { version = "4.5", features = ["derive"] }
tabled = "0.18"
tabled = "0.18"
\ No newline at end of file
This diff is collapsed.
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[workspace]
members = [
"hello_world",
"service_metrics",
]
resolver = "2"
[workspace.package]
version = "0.1.0"
edition = "2021"
authors = ["NVIDIA"]
license = "Apache-2.0"
homepage = "https://github.com/ai-dynamo/dynamo"
repository = "https://github.com/ai-dynamo/dynamo.git"
[workspace.dependencies]
# local or crates.io
dynamo-runtime = { path = "../" }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment