Unverified Commit ac020629 authored by Dillon Cullinan's avatar Dillon Cullinan Committed by GitHub
Browse files

feat: Dockerfile templating (#5633)


Signed-off-by: default avatarDillon Cullinan <dcullinan@nvidia.com>
parent 5755a8de
......@@ -143,23 +143,12 @@ Build the appropriate framework image (e.g., `dynamo:latest-vllm-local-dev`) fro
```bash
# Single command approach (recommended)
export FRAMEWORK=VLLM # Note: any of VLLM, SGLANG, TRTLLM can be used
./container/build.sh --framework $FRAMEWORK --target local-dev
python container/render.py --framework=${FRAMEWORK} --target=local-dev --short-output
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile .
# Now you've created both dynamo:latest-vllm and dynamo:latest-vllm-local-dev
```
Alternatively, you can build a development container, then build local-dev:
```bash
export FRAMEWORK=VLLM
./container/build.sh --framework $FRAMEWORK
# Now you have a development image dynamo:latest-vllm
./container/build.sh --dev-image dynamo:latest-${FRAMEWORK,,}
# Now you have a local-dev image dynamo:latest-vllm-local-dev
```
The local-dev image will give you local user permissions matching your host user and includes extra developer utilities (debugging tools, text editors, system monitors, etc.).
### Step 1: Choose Your Framework
......@@ -427,10 +416,8 @@ If you see errors like "container is not running" or "An error occurred setting
# If missing, build the dev image first, then build local-dev
export FRAMEWORK=VLLM # Replace with VLLM, SGLANG, or TRTLLM
./container/build.sh --framework $FRAMEWORK
# change to lower case portable way across shells
./container/build.sh --dev-image dynamo:latest-$(echo "$FRAMEWORK" | tr '[:upper:]' '[:lower:]') --framework "$FRAMEWORK"
# Now you have dynamo:latest-vllm-local-dev
python container/render.py --framework=${FRAMEWORK} --target=local-dev --short-output
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile .
```
2. **Container startup failure:**
......
......@@ -42,6 +42,7 @@
**/target/*
**/*safetensors
container/Dockerfile*
container/*.Dockerfile
.venv
.venv-docs
......
name: 'Docker Build'
description: 'Build Dynamo container images'
inputs:
# --- Common Docker Inputs
framework:
description: 'Framework to build'
required: true
......@@ -13,9 +14,14 @@ inputs:
description: 'Docker platform to build on, ie. linux/amd64'
required: false
default: 'linux/amd64'
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: true
image_tag:
description: 'Custom image tag (optional, defaults to framework:latest)'
required: false
# --- Secret Inputs
ci_token:
description: 'CI Token'
required: false
......@@ -34,21 +40,6 @@ inputs:
aws_secret_access_key:
description: 'AWS Secret Access Key'
required: false
base_image_tag:
description: 'Optional override for base image tag passed to build.sh'
required: false
runtime_image_tag:
description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
required: false
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: true
enable_kvbm:
description: 'Enable KVBM support (optional)'
required: false
dynamo_base_image:
description: 'Pre-built Dynamo base image to use instead of building from scratch'
required: false
outputs:
image_tag:
......@@ -70,6 +61,44 @@ runs:
shell: bash
run: |
docker system prune -af
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0
with:
python-version: '3.12'
pip-install: jinja2 pyyaml
- name: Generate Dockerfile
shell: bash
run: |
echo "::group::Generating Dockerfile"
echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
python ./container/render.py \
--target=${{ inputs.target }} \
--framework=${{ inputs.framework }} \
--platform=${{ inputs.platform }} \
--cuda-version=${{ inputs.cuda_version }} \
--show-result \
--short-output
echo "::endgroup::"
- name: Build EPP image
if: ${{ inputs.target == 'frontend' }}
shell: bash
env:
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
run: |
sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
. "$HOME/.cargo/env"
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
cargo install cbindgen
DOCKER_PROXY="${ECR_HOSTNAME}/dockerhub/"
pushd deploy/inference-gateway/epp
make all DOCKER_PROXY=${DOCKER_PROXY}
popd
EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
EPP_IMAGE="dynamo/dynamo-epp:${EPP_GIT_TAG}"
echo "EPP_IMAGE=${EPP_IMAGE}" >> $GITHUB_ENV
- name: Build image
id: build
shell: bash
......@@ -107,50 +136,34 @@ runs:
echo "BUILD_LOG_FILE=${BUILD_LOG_FILE}" >> $GITHUB_ENV
echo "📝 Build log will be saved to: ${BUILD_LOG_FILE}"
# Collect optional overrides provided by the workflow
# Set base cache args and set --cache-to if this is a main commit
# TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export
EXTRA_ARGS=""
CACHE_ARGS=""
if [[ "${{ inputs.target }}" != "frontend" ]]; then
EXTRA_ARGS="--cache-to type=inline "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
CACHE_ARGS="--cache-to type=inline "
CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
# Release branches also use release cache
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
elif [[ "$GITHUB_REF_NAME" == "main" ]]; then
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
fi
fi
echo "$EXTRA_ARGS"
# Collect optional overrides provided by the workflow
if [ -n "${{ inputs.base_image_tag }}" ]; then
EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} "
fi
if [ -n "${{ inputs.runtime_image_tag }}" ]; then
EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} "
fi
if [ -n "${{ inputs.cuda_version }}" ]; then
EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} "
fi
if [ -n "${{ inputs.dynamo_base_image }}" ]; then
EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}"
fi
if [ -n "${{ inputs.enable_kvbm }}" ]; then
EXTRA_ARGS+=" --build-arg ENABLE_KVBM=${{ inputs.enable_kvbm }}"
EPP_IMAGE_ARG=""
if [[ ${{ inputs.target }} == "frontend" ]]; then
EPP_IMAGE_ARG="--build-arg EPP_IMAGE=${EPP_IMAGE}"
fi
# Execute build and capture output (show on console AND save to file)
./container/build.sh --tag "$IMAGE_TAG" \
--target ${{ inputs.target }} \
--vllm-max-jobs 10 \
--framework ${{ inputs.framework }} \
--platform ${{ inputs.platform }} \
--use-sccache \
--sccache-bucket "$SCCACHE_S3_BUCKET" \
--sccache-region "$AWS_DEFAULT_REGION" $EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
docker buildx build \
--progress=plain \
--tag "$IMAGE_TAG" \
--load \
-f ./container/rendered.Dockerfile \
$CACHE_ARGS \
$EPP_IMAGE_ARG . 2>&1 | tee "${BUILD_LOG_FILE}"
BUILD_EXIT_CODE=${PIPESTATUS[0]}
......
......@@ -13,6 +13,9 @@ inputs:
description: 'Docker platform to build on, ie. linux/amd64'
required: false
default: 'linux/amd64'
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: true
image_tag:
description: 'Custom image tag'
required: true
......@@ -34,21 +37,6 @@ inputs:
aws_secret_access_key:
description: 'AWS Secret Access Key'
required: false
base_image_tag:
description: 'Optional override for base image tag passed to build.sh'
required: false
runtime_image_tag:
description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
required: false
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: true
enable_kvbm:
description: 'Enable KVBM support (optional)'
required: false
dynamo_base_image:
description: 'Pre-built Dynamo base image to use instead of building from scratch'
required: false
no_cache:
description: 'Disable Docker build cache'
required: false
......@@ -117,21 +105,20 @@ runs:
# Collect optional overrides provided by the workflow
# Set base cache args and set --cache-to if this is a main commit
# TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export
EXTRA_ARGS=""
CACHE_ARGS=""
if [[ "${{ inputs.target }}" != "frontend" ]]; then
EXTRA_ARGS="--cache-to type=inline "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
CACHE_ARGS="--cache-to type=inline "
CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
# Release branches also use release cache
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
elif [[ "$GITHUB_REF_NAME" == "main" ]]; then
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
fi
fi
echo "$EXTRA_ARGS"
echo "$CACHE_ARGS"
# Collect optional overrides provided by the workflow
if [[ "${{ inputs.ci }}" == "true" ]]; then
......@@ -139,21 +126,6 @@ runs:
EXTRA_ARGS+=" --ci"
fi
if [ -n "${{ inputs.base_image_tag }}" ]; then
EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} "
fi
if [ -n "${{ inputs.runtime_image_tag }}" ]; then
EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} "
fi
if [ -n "${{ inputs.cuda_version }}" ]; then
EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} "
fi
if [ -n "${{ inputs.dynamo_base_image }}" ]; then
EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}"
fi
if [ -n "${{ inputs.enable_kvbm }}" ]; then
EXTRA_ARGS+=" --build-arg ENABLE_KVBM=${{ inputs.enable_kvbm }}"
fi
if [ "${{ inputs.no_cache }}" == "true" ]; then
EXTRA_ARGS+=" --no-cache"
fi
......@@ -161,9 +133,9 @@ runs:
EXTRA_ARGS+=" --build-arg CARGO_BUILD_JOBS=4 --use-sccache"
fi
if [ "${{ inputs.push_image }}" == "true" ]; then
EXTRA_ARGS+=" --push --no-load"
elif [ "${{ inputs.no_load }}" == "true" ]; then
EXTRA_ARGS+=" --no-load"
EXTRA_ARGS+=" --push"
elif [ "${{ inputs.no_load }}" == "false" ]; then
EXTRA_ARGS+=" --load"
fi
# Add extra tags (each as a separate --tag argument)
......@@ -176,16 +148,14 @@ runs:
done <<< "$EXTRA_TAGS"
fi
# Execute build and capture output (show on console AND save to file)
./container/build.sh --tag "$IMAGE_TAG" \
--target ${{ inputs.target }} \
--vllm-max-jobs 10 \
--no-tag-latest \
--framework ${{ inputs.framework }} \
--platform ${{ inputs.platform }} \
--sccache-bucket "$SCCACHE_S3_BUCKET" \
--sccache-region "$AWS_DEFAULT_REGION" \
$EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
docker buildx build \
--progress=plain \
--tag "$IMAGE_TAG" \
--platform linux/${{ inputs.platform }} \
-f ./container/rendered.Dockerfile \
$CACHE_ARGS \
$EXTRA_ARGS \
$EPP_IMAGE_ARG . 2>&1 | tee "${BUILD_LOG_FILE}"
BUILD_EXIT_CODE=${PIPESTATUS[0]}
......
......@@ -63,6 +63,9 @@ ignore:
- 'container/run.sh'
- 'container/use-sccache.sh'
- 'container/dev/**'
- 'container/templates/aws.Dockerfile'
- 'container/templates/local_dev.Dockerfile'
- 'container/templates/dev.Dockerfile'
ci: &ci
- '.github/workflows/**'
......@@ -78,6 +81,12 @@ core:
- *ci
- 'container/build.sh'
- 'container/Dockerfile'
- 'container/render.py'
- 'container/Dockerfile.template'
- 'container/context.yaml'
- 'container/templates/args.Dockerfile'
- 'container/templates/dynamo_*'
- 'container/templates/wheel_builder.Dockerfile'
- '.dockerignore'
- 'container/deps/*'
- '.cargo/config.toml'
......@@ -120,6 +129,7 @@ vllm:
- 'container/deps/vllm/**'
- 'examples/backends/vllm/**'
- 'components/src/dynamo/vllm/**'
- 'container/templates/vllm_*'
sglang:
- '!**/*.md'
......@@ -127,6 +137,7 @@ sglang:
- 'container/Dockerfile.sglang'
- 'examples/backends/sglang/**'
- 'components/src/dynamo/sglang/**'
- 'container/templates/sglang_*'
trtllm:
- '!**/*.md'
......@@ -136,6 +147,7 @@ trtllm:
- 'examples/backends/trtllm/**'
- 'components/src/dynamo/trtllm/**'
- 'container/build_trtllm_wheel.sh'
- 'container/templates/trtllm_*'
frontend:
- '!**/*.md'
......@@ -153,6 +165,7 @@ frontend:
- 'components/src/dynamo/frontend/**'
- 'components/src/dynamo/common/**'
- 'deploy/inference-gateway/**'
- 'container/templates/frontend.Dockerfile'
rust:
- '.github/workflows/pre-merge.yml'
......
......@@ -68,21 +68,6 @@ jobs:
uses: actions/setup-go@v5
with:
go-version: '1.24'
- name: Install dependencies
shell: bash
run: |
set -euo pipefail
# Install system dependencies from apt
sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
# Install Rust (cargo + rustc)
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
# Make cargo available to later steps
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
- name: Install cbindgen
shell: bash
run: |
set -euo pipefail
cargo install cbindgen
- name: Docker Login
uses: ./.github/actions/docker-login
with:
......@@ -95,10 +80,10 @@ jobs:
id: build-image
uses: ./.github/actions/docker-build
env:
PLATFORMS: linux/${{ matrix.platform.arch }}
PLATFORMS: ${{ matrix.platform.arch }}
TARGETARCH: ${{ matrix.platform.arch }}
with:
framework: none
framework: dynamo
target: frontend
platform: ${{ env.PLATFORMS }}
ci_token: ${{ secrets.CI_TOKEN }}
......
......@@ -10,6 +10,10 @@ on:
description: 'Framework name (vllm, sglang, trtllm)'
required: true
type: string
target:
description: 'Target stage for Docker rendering'
required: true
type: string
platforms:
description: 'Platforms to build (JSON array, e.g., ["amd64", "arm64"])'
required: true
......@@ -86,6 +90,7 @@ jobs:
with:
framework: ${{ inputs.framework }}
platform: ${{ matrix.platform }}
target: ${{ inputs.target }}
cuda_version: ${{ matrix.cuda_version }}
extra_tags: ${{ inputs.extra_tags }}
no_cache: ${{ inputs.no_cache }}
......
......@@ -10,6 +10,10 @@ on:
description: 'Framework name (vllm, sglang, trtllm)'
required: true
type: string
target:
description: 'Target stage for Docker rendering'
required: true
type: string
platform:
description: 'Platform to build (amd64 or arm64)'
required: true
......@@ -105,13 +109,17 @@ jobs:
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
lfs: true
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0
with:
python-version: '3.12'
pip-install: jinja2 pyyaml
- name: Calculate extra tags with platform suffix # will get redundant upon multi arch builds support
id: extra-tags
shell: bash
env:
EXTRA_TAGS: ${{ inputs.extra_tags }}
PLATFORM: ${{ inputs.platform }}
PLATFORM: linux/${{ inputs.platform }}
run: |
if [ -n "$EXTRA_TAGS" ]; then
RESULT=""
......@@ -134,7 +142,6 @@ jobs:
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Calculate target tag
id: calculate-target-tag
shell: bash
......@@ -146,7 +153,6 @@ jobs:
echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
- name: Initialize Dynamo Builder
uses: ./.github/actions/init-dynamo-builder
with:
......@@ -154,28 +160,37 @@ jobs:
flavor: ${{ inputs.framework }}
arch: ${{ inputs.platform }}
cuda_version: ${{ inputs.cuda_version }}
- name: Print Build Container inputs
run: |
echo "=== Build Container Inputs ==="
echo "image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}"
echo "framework: ${{ inputs.framework }}"
echo "target: runtime"
echo "platform: linux/${{ inputs.platform }}"
echo "platform: ${{ inputs.platform }}"
echo "cuda_version: ${{ inputs.cuda_version }}"
echo "no_cache: ${{ inputs.no_cache }}"
echo "extra_tags: ${{ steps.extra-tags.outputs.tags }}"
echo "push_image: ${{ inputs.push_image }}"
echo "no_load: ${{ inputs.no_load }}"
- name: Generate Dockerfile
shell: bash
run: |
echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
python ./container/render.py \
--target=${{ inputs.target }} \
--framework=${{ inputs.framework }} \
--platform=${{ inputs.platform }} \
--cuda-version=${{ inputs.cuda_version }} \
--show-result \
--short-output
- name: Build Container
id: build-image
uses: ./.github/actions/docker-remote-build
with:
image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}
framework: ${{ inputs.framework }}
target: runtime
platform: linux/${{ inputs.platform }}
target: ${{ inputs.target }}
platform: ${{ inputs.platform }}
cuda_version: ${{ inputs.cuda_version }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
......
......@@ -99,17 +99,15 @@ jobs:
with:
framework: ${{ matrix.framework }}
target: runtime
platform: linux/amd64
base_image_tag: ''
runtime_image_tag: ''
cuda_version: ''
platform: amd64
cuda_version: '12.9'
image_tag: runtime-${{ matrix.framework }}-amd64:${{ github.run_id }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
image_tag: runtime-${{ matrix.framework }}-amd64:${{ github.run_id }}
- name: Tag and Push Runtime Images
uses: ./.github/actions/docker-tag-push
with:
......@@ -132,13 +130,9 @@ jobs:
matrix:
include:
- framework: vllm
base_image_tag: '25.06-cuda12.9-devel-ubuntu24.04'
runtime_image_tag: '12.9.0-runtime-ubuntu24.04'
cuda_version: '12.9'
- framework: sglang
base_image_tag: ''
runtime_image_tag: ''
cuda_version: ''
cuda_version: '12.9'
env:
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
steps:
......@@ -158,17 +152,15 @@ jobs:
with:
framework: ${{ matrix.framework }}
target: runtime
platform: linux/arm64
base_image_tag: ${{ matrix.base_image_tag }}
runtime_image_tag: ${{ matrix.runtime_image_tag }}
platform: arm64
cuda_version: ${{ matrix.cuda_version }}
image_tag: runtime-${{ matrix.framework }}-arm64:${{ github.run_id }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
image_tag: runtime-${{ matrix.framework }}-arm64:${{ github.run_id }}
- name: Tag and Push Runtime Images
uses: ./.github/actions/docker-tag-push
with:
......@@ -210,17 +202,15 @@ jobs:
with:
framework: ${{ matrix.framework }}
target: runtime
platform: linux/amd64
base_image_tag: ''
runtime_image_tag: ''
platform: amd64
cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
image_tag: runtime-${{ matrix.framework }}-cuda13-amd64:${{ github.run_id }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
image_tag: runtime-${{ matrix.framework }}-cuda13-amd64:${{ github.run_id }}
- name: Tag and Push CUDA 13 Runtime Images
uses: ./.github/actions/docker-tag-push
with:
......@@ -262,17 +252,15 @@ jobs:
with:
framework: ${{ matrix.framework }}
target: runtime
platform: linux/arm64
base_image_tag: ''
runtime_image_tag: ''
platform: arm64
cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
image_tag: runtime-${{ matrix.framework }}-cuda13-arm64:${{ github.run_id }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
image_tag: runtime-${{ matrix.framework }}-cuda13-arm64:${{ github.run_id }}
- name: Tag and Push CUDA 13 Runtime Images
uses: ./.github/actions/docker-tag-push
with:
......
......@@ -66,11 +66,27 @@ jobs:
id: define_image_tag
run: |
echo "image_tag=dynamo:latest-dev" >> $GITHUB_OUTPUT
- name: Generate Dockerfile
shell: bash
run: |
echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
python ./container/render.py \
--target=dev \
--framework=dynamo \
--platform=amd64 \
--show-result \
--short-output
- name: Build image
env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
run: |
./container/build.sh --tag ${{ steps.define_image_tag.outputs.image_tag }} --target dev --framework none --enable-kvbm --enable-media-ffmpeg
docker buildx build \
--progress=plain \
--tag ${{ steps.define_image_tag.outputs.image_tag }} \
-f ./container/rendered.Dockerfile \
--build-arg ENABLE_MEDIA_FFMPEG=true \
--build-arg ENABLE_KVBM=true \
--load .
- name: Start services with docker-compose
working-directory: ./deploy
run: |
......
......@@ -177,6 +177,7 @@ jobs:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: vllm
target: runtime
platforms: '["amd64", "arm64"]'
cuda_versions: '["12.9", "13.0"]'
extra_tags: |
......@@ -194,6 +195,7 @@ jobs:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: sglang
target: runtime
platforms: '["amd64", "arm64"]'
cuda_versions: '["12.9", "13.0"]'
extra_tags: |
......@@ -211,11 +213,12 @@ jobs:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with:
framework: trtllm
target: runtime
platforms: '["amd64", "arm64"]'
cuda_versions: '["13.1"]'
extra_tags: |
${{ github.ref_name == 'main' && 'main-trtllm' || '' }}
${{ github.ref_name == 'main' && format('main-trt-llm-{0}', github.sha) || '' }}
${{ github.ref_name == 'main' && format('main-trtllm-{0}', github.sha) || '' }}
builder_name: ${{ needs.changed-files.outputs.builder_name }}
secrets: inherit
......
......@@ -2,6 +2,7 @@
.vs/
.vscode/
.helix
*rendered.Dockerfile
[Bb]inlog/
[Bb][Uu][Ii][Ll][Dd]/
[Oo][Bb][Jj]/
......
This diff is collapsed.
# syntax=docker/dockerfile:1.10.0-labs
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
{% include "templates/args.Dockerfile" %}
# --- Base Image Stages
{% if framework != "dynamo" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% include "templates/wheel_builder.Dockerfile" %}
{% elif framework == "dynamo" %}
{% if target == "frontend" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% include "templates/wheel_builder.Dockerfile" %}
{% include "templates/dynamo_runtime.Dockerfile" %}
{% include "templates/frontend.Dockerfile" %}
{% elif target == "runtime" or target == "dev" or target == "local-dev" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% include "templates/wheel_builder.Dockerfile" %}
{% include "templates/dynamo_runtime.Dockerfile" %}
{% elif target == "wheel_builder" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% include "templates/wheel_builder.Dockerfile" %}
{% elif target == "base" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% endif %}
{% endif %}
# --- Framework Stages
{% if framework == "sglang" %}
# SGLang is the only framework without a `framework` target currently, needs special treatment
{% include "templates/sglang_runtime.Dockerfile" %}
{% elif target == "framework" and framework != "dynamo" %}
{% include "templates/" ~ framework ~ "_framework.Dockerfile" %}
{% elif ( target == "runtime" or target == "dev" or target == "local-dev" ) and framework != "dynamo" %}
{% include "templates/" ~ framework ~ "_framework.Dockerfile" %}
{% include "templates/" ~ framework ~ "_runtime.Dockerfile" %}
{% else %}
# --- No framework stages included
{% endif %}
{% if make_efa == true and target == "runtime" %}
{% include "templates/aws.Dockerfile" %}
{% endif %}
# --- Development Stages
{% if target == "dev" or target == "local-dev" %}
{% include "templates/dev.Dockerfile" %}
{% if make_efa == true %}
{% include "templates/aws.Dockerfile" %}
{% endif %}
{% if target == "local-dev" %}
{% include "templates/local_dev.Dockerfile" %}
{% endif %}
{% else %}
# --- No development stages included
{% endif %}
......@@ -6,22 +6,21 @@ The NVIDIA Dynamo project uses containerized development and deployment to maint
### Core Components
- **`build.sh`** - A Docker image builder that creates containers for different AI inference frameworks (vLLM, TensorRT-LLM, SGLang). It handles framework-specific dependencies, multi-stage builds, and development vs production configurations.
- **`render.py`** - A render script used to generate Dockerfiles for AI inference frameworks (vLLM, TensorRT-LLM, SGLang). The generated Dockerfile includes the needed multi-stage steps for development vs production configurations
- **`run.sh`** - A container runtime manager that launches Docker containers with proper GPU access, volume mounts, and environment configurations. It supports different development workflows from root-based legacy setups to user-based development environments.
- **Multiple Dockerfiles** - Framework-specific Dockerfiles that define the container images:
- `Dockerfile.vllm` - For vLLM inference backend
- `Dockerfile.trtllm` - For TensorRT-LLM inference backend
- `Dockerfile.sglang` - For SGLang inference backend
- `Dockerfile` - Base/standalone configuration
- `Dockerfile.epp` - For building the Endpoint Picker (EPP) image
## Rendering Requirements:
- Python
- Python Packages:
- pyyaml
- jinja2
### Stage Summary for Frameworks
<details>
<summary>Show Stage Summary Table</summary>
Dockerfile.${FRAMEWORK} General Structure
Dockerfile General Structure
Below is a summary of the general file structure for the framework Dockerfile stages. Some exceptions exist.
......@@ -80,14 +79,13 @@ The scripts in this directory abstract away the complexity of Docker commands wh
### Convenience Scripts vs Direct Docker Commands
The `build.sh` and `run.sh` scripts are convenience wrappers that simplify common Docker operations. They automatically handle:
- Framework-specific image selection and tagging
The `run.sh` script and rendering scripts are convenience that simplify common Docker operations. They automatically handle:
- GPU access configuration and runtime selection
- Volume mount setup for development workflows
- Environment variable management
- Build argument construction for multi-stage builds
**You can always use Docker commands directly** if you prefer more control or want to customize beyond what the scripts provide. The scripts use `--dry-run` flags to show you the exact Docker commands they would execute, making it easy to understand and modify the underlying operations.
**You can always use Docker commands directly** if you prefer more control or want to customize beyond what the scripts provide. The `run.sh` uses a `--dry-run` flag to show you the exact commands they would execute, making it easy to understand and modify the underlying operations.
## Development Targets Feature Matrix
......@@ -117,10 +115,11 @@ The `build.sh` and `run.sh` scripts are convenience wrappers that simplify commo
### 1. runtime target (runs as non-root dynamo user):
```bash
# Build runtime image
./build.sh --framework vllm --target runtime
python container/render.py --framework vllm --target runtime --short-output
docker build -t dynamo:latest-vllm-runtime -f rendered.Dockerfile .
# Run runtime container
./run.sh --image dynamo:latest-vllm-runtime -it
container/run.sh --image dynamo:latest-vllm-runtime -it
```
### 2. local-dev + `run.sh` (runs as dynamo user with matched host UID/GID):
......@@ -133,20 +132,20 @@ Use VS Code/Cursor Dev Container Extension with devcontainer.json configuration.
## Build and Run Scripts Overview
### build.sh - Docker Image Builder
### render.py - Docker Image Generator
The `build.sh` script is responsible for building Docker images for different AI inference frameworks. It supports multiple frameworks and configurations:
The `render.py` script is responsible for generating Dockerfiles for different AI inference frameworks. It supports multiple frameworks and configurations:
**Purpose:**
- Builds Docker images for NVIDIA Dynamo with support for vLLM, TensorRT-LLM, SGLang, or standalone configurations
- Generates Dockerfiles for NVIDIA Dynamo with support for vLLM, TensorRT-LLM, SGLang, or standalone configurations
- Handles framework-specific dependencies and optimizations
- Manages build contexts, caching, and multi-stage builds
- Configures development vs production targets
**Key Features:**
- **Framework Support**: vLLM (default when --framework not specified), TensorRT-LLM, SGLang, or NONE
- **Framework Support**: vLLM (default when --framework not specified), TensorRT-LLM, SGLang, or NONE (standalone Dynamo)
- **Multi-stage Builds**: Build process with base images
- **Development Targets**: Supports `dev`, `runtime`, and `local-dev` targets via `build.sh`.
- **Development Targets**: Supports `dev`, `runtime`, and `local-dev` targets via `render.py`.
- **Build Caching**: Docker layer caching and sccache support
- **GPU Optimization**: CUDA, EFA, and NIXL support
......@@ -221,52 +220,49 @@ Current cache types (as mounted in various Dockerfiles):
Note: `uv` commands set `UV_CACHE_DIR` per `RUN` so `uv` always uses the same path as the cache mount (instead of relying on `$HOME`).
**How `dev` / `local-dev` builds work:**
- `dev` and `local-dev` targets are defined in `container/dev/Dockerfile.dev`.
- The framework Dockerfiles (`Dockerfile.vllm`, `Dockerfile.trtllm`, `Dockerfile.sglang`, `Dockerfile`) define shared stages used by `Dockerfile.dev` (e.g. `runtime`, `dynamo_base`, `wheel_builder`).
- To build a single coherent Dockerfile, `build.sh` generates a temporary Dockerfile that is a literal concatenation of:
- the selected framework Dockerfile, then
- `container/dev/Dockerfile.dev`
`build.sh` then continues building normally using the temp Dockerfile path.
**Requirements and debugging:**
- By default the temp Dockerfile is deleted at the end of `build.sh`. To keep it for inspection, set `KEEP_DEV_DOCKERFILE_TEMP=1`.
> **💡 Tip**: The `dev` and `local-dev` images have source code baked in, but **using `--mount-workspace` with `run.sh` is recommended for development** to bind mount your local workspace for live editing.
**Common Usage Examples:**
```bash
# Build vLLM dev image called dynamo:latest-vllm (default). This runs as root and is for development.
./build.sh
python container/render.py --framework=vllm --target=dev --short-output
docker build -t dynamo:latest-vllm-dev -f rendered.Dockerfile .
# Build a local-dev image. The local-dev image will run as `dynamo` with UID/GID matched to your host user,
# which is useful when mounting partitions for development.
./build.sh --framework vllm --target local-dev
python container/render.py --framework=vllm --target=local-dev --short-output
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
# Build TensorRT-LLM development image called dynamo:latest-trtllm
./build.sh --framework trtllm
# Build with custom tag
./build.sh --framework sglang --tag my-custom-tag
# Dry run to see commands
./build.sh --dry-run
# Build with no cache
./build.sh --no-cache
# Build with build arguments
./build.sh --build-arg CUSTOM_ARG=value
python container/render.py --framework=trtllm --target=runtime --short-output
docker build -t dynamo:latest-trtllm-runtime -f rendered.Dockerfile .
```
### Building the Frontend Image
The frontend image is a specialized container that includes the Dynamo components (Dynamo, NIXL, etc) along with the Endpoint Picker (EPP) for Kubernetes Gateway API Inference Extension integration. This image is primarily used for inference gateway deployments.
**Build EPP Image**
```bash
sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
. "$HOME/.cargo/env"
cargo install cbindgen
pushd deploy/inference-gateway/epp
make all
popd
EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
EPP_IMAGE="dynamo/dynamo-epp:${EPP_GIT_TAG}"
```
**Build Frontend Image**
```bash
# Build the frontend image (automatically builds EPP image as a dependency)
./build.sh --framework none --target frontend
python container/render.py --framework=dynamo --target=frontend --short-output
docker build -t dynamo:frontend --build-arg EPP_IMAGE=${EPP_IMAGE} -f rendered.Dockerfile .
```
The build process automatically:
......@@ -313,34 +309,34 @@ The `run.sh` script launches Docker containers with the appropriate configuratio
```bash
# Basic container launch with dev image (runs as root by default, non-interactive)
./run.sh --image dynamo:latest-vllm -v $HOME/.cache:/root/.cache
container/run.sh --image dynamo:latest-vllm -v $HOME/.cache:/root/.cache
# Interactive development with workspace mounted using dev image (runs as root)
./run.sh --image dynamo:latest-vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image dynamo:latest-vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Interactive development with local-dev image (runs as dynamo user with matched host UID/GID)
./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Use specific image and framework for development
./run.sh --image v0.1.0.dev.08cc44965-vllm-local-dev --framework vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image v0.1.0.dev.08cc44965-vllm-local-dev --framework vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Interactive development shell with workspace mounted (local-dev)
./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it -- bash
container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it -- bash
# Development with custom environment variables
./run.sh --image dynamo:latest-vllm-local-dev -e CUDA_VISIBLE_DEVICES=0,1 --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image dynamo:latest-vllm-local-dev -e CUDA_VISIBLE_DEVICES=0,1 --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Dry run to see docker command
./run.sh --dry-run
container/run.sh --dry-run
# Development with custom volume mounts
./run.sh --image dynamo:latest-vllm-local-dev -v /host/path:/container/path --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image dynamo:latest-vllm-local-dev -v /host/path:/container/path --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Run runtime image as non-root dynamo user (for production)
./run.sh --image dynamo:latest-vllm-runtime -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image dynamo:latest-vllm-runtime -v $HOME/.cache:/home/dynamo/.cache
# Run dev image as specific user (override default root)
./run.sh --image dynamo:latest-vllm --user dynamo -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image dynamo:latest-vllm --user dynamo -v $HOME/.cache:/home/dynamo/.cache
```
### Network Configuration Options
......@@ -350,8 +346,8 @@ The `run.sh` script supports different networking modes via the `--network` flag
#### Host Networking (Default)
```bash
# Examples with dynamo user
./run.sh --image dynamo:latest-vllm-local-dev --network host -v $HOME/.cache:/home/dynamo/.cache
./run.sh --image dynamo:latest-vllm-local-dev -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image dynamo:latest-vllm-local-dev --network host -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image dynamo:latest-vllm-local-dev -v $HOME/.cache:/home/dynamo/.cache
```
**Use cases:**
- High-performance ML inference (default for GPU workloads)
......@@ -364,7 +360,7 @@ The `run.sh` script supports different networking modes via the `--network` flag
#### Bridge Networking (Isolated)
```bash
# CI/testing with isolated bridge networking and host cache sharing (no -it for automated CI)
./run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache
```
**Use cases:**
- Secure isolation from host network
......@@ -377,10 +373,10 @@ The `run.sh` script supports different networking modes via the `--network` flag
#### No Networking ⚠️ **LIMITED FUNCTIONALITY**
```bash
# Complete network isolation - no external connectivity
./run.sh --image dynamo:latest-vllm --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image dynamo:latest-vllm --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Same with local-dev image (dynamo user with matched host UID/GID)
./run.sh --image dynamo:latest-vllm-local-dev --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image dynamo:latest-vllm-local-dev --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
```
**⚠️ WARNING: `--network none` severely limits Dynamo functionality:**
- **No model downloads** - HuggingFace models cannot be downloaded
......@@ -427,11 +423,12 @@ See Docker documentation for custom network creation and management.
### Development Workflow
```bash
# 1. Build local-dev image (builds runtime, then dev as intermediate, then local-dev as final image)
./build.sh --framework vllm --target local-dev
python container/render.py --framework=vllm --target=local-dev --short-output
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
# 2. Run development container using the local-dev image
# RECOMMENDED: --mount-workspace for live editing in dev and local-dev images
./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it
container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it
# 3. Inside container, run inference (requires both frontend and backend)
# Start frontend
......@@ -444,19 +441,21 @@ python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.20 &
### Production Workflow
```bash
# 1. Build production runtime image (runs as non-root dynamo user)
./build.sh --framework vllm --target runtime
python container/render.py --framework=vllm --target=runtime --short-output
docker build -t dynamo:latest-vllm-runtime -f rendered.Dockerfile .
# 2. Run production container as non-root dynamo user
./run.sh --image dynamo:latest-vllm-runtime --gpus all -v $HOME/.cache:/home/dynamo/.cache
container/run.sh --image dynamo:latest-vllm-runtime --gpus all -v $HOME/.cache:/home/dynamo/.cache
```
### Testing Workflow
```bash
# 1. Build dev image
./build.sh --framework vllm --no-cache
python container/render.py --framework=vllm --target=dev --short-output
docker build -t dynamo:latest-vllm-dev -f rendered.Dockerfile .
# 2. Run tests with network isolation for reproducible results (no -it needed for CI)
./run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache -- python -m pytest tests/
container/run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache -- python -m pytest tests/
# 3. Inside the container with bridge networking, start services
# Note: Services are only accessible from the same container - no port conflicts with host
......
This diff is collapsed.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# This file represents the default ARG values of Dockerfiles generated
# by render.py. These are the recommended default values for users and
# is the source of truth for the values used in our delivered images.
#
# Some ARGs have multiple valid values and can be changed for local testing,
# you can do so locally in this file, or pass the --build-arg into docker build
# when building.
dynamo:
base_image: nvcr.io/nvidia/cuda-dl-base
base_image_tag: 25.01-cuda12.8-devel-ubuntu24.04
epp_image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1
frontend_image: nvcr.io/nvidia/base/ubuntu:noble-20250619
python_version: "3.12"
nats_version: v2.10.28
etcd_version: v3.5.21
nixl_ref: 0.9.0
nixl_ucx_ref: v1.20.0
nixl_gdrcopy_ref: v2.5.1
nixl_ucx_efa_ref: 9d2b88a1f67faf9876f267658bd077b379b8bb76
nixl_libfabric_ref: v2.3.0
enable_kvbm: "false"
enable_media_nixl: "false"
enable_media_ffmpeg: "false"
enable_gpu_memory_service: "false"
ffmpeg_version: "7.1"
efa_version: 1.45.1
vllm:
base_image: nvcr.io/nvidia/cuda-dl-base
runtime_image: nvcr.io/nvidia/cuda
runtime_image_tag: ${CUDA_VERSION}.0-runtime-ubuntu24.04
vllm_ref: v0.14.1
flashinf_ref: v0.5.3
lmcache_ref: 0.3.12
max_jobs: "10"
enable_media_nixl: "true"
enable_media_ffmpeg: "true"
enable_gpu_memory_service: "true"
enable_kvbm: "true"
cuda12.9:
base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
cuda13.0:
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
sglang:
base_image: nvcr.io/nvidia/cuda-dl-base
runtime_image: lmsysorg/sglang
cuda12.9:
base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
runtime_image_tag: v0.5.7-runtime
cuda13.0:
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
runtime_image_tag: v0.5.8-cu130-runtime
enable_media_nixl: "true"
enable_media_ffmpeg: "true"
enable_gpu_memory_service: "true"
enable_kvbm: "false"
trtllm:
base_image: nvcr.io/nvidia/pytorch
base_image_tag: 25.12-py3
runtime_image: nvcr.io/nvidia/cuda-dl-base
runtime_image_tag: 25.10-cuda13.0-runtime-ubuntu24.04
enable_media_nixl: "true"
enable_media_ffmpeg: "true"
enable_gpu_memory_service: "false"
enable_kvbm: "true"
python_version: "3.12"
index_url: https://pypi.nvidia.com/
pip_wheel_dir: /tmp/trtllm_wheel/
pip_wheel: tensorrt-llm==1.3.0rc1
trtllm_wheel_image: nvcr.io/nvidia/tensorrt-llm/release:${TENSORRTLLM_PIP_WHEEL#*==}
github_trtllm_commit: 1.2.0rc6
torch_version: 2.10.0a0+b4e4ee81d3.nv25.12
torch_tensorrt_version: 2.10.0a0
torchvision_version: 0.25.0a0+ca221243
torchao_ver: 0.15.0+git01374eb5
torchdata_ver: 0.11.0
torchtitan_ver: 0.2.0
jinja2_version: 3.1.6
sympy_version: 1.14.0
pytorch_triton_ver: 3.5.1+gitbfeb0668.nv25.12
flash_attn_version: 2.7.4.post1+25.12
flashinfer_python_ver: 0.6.1
has_trtllm_context: "0"
#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import argparse
import re
import sys
from pathlib import Path
import yaml
from jinja2 import Environment, FileSystemLoader
def parse_args():
parser = argparse.ArgumentParser(
description="Renders dynamo Dockerfiles from templates"
)
parser.add_argument(
"--framework",
type=str,
default="vllm",
help="Dockerfile framework to use [dynamo, vllm, sglang, trtllm]",
)
parser.add_argument(
"--target",
type=str,
default="runtime",
help="Dockerfile target to use. Non-exhaustive examples: [runtime, dev, local-dev]",
)
parser.add_argument(
"--platform",
type=str,
default="amd64",
help="Dockerfile platform to use. [amdg64, arm64]",
)
parser.add_argument(
"--cuda-version",
type=str,
default="12.9",
help="CUDA version to use. [12.9, 13.0]",
)
parser.add_argument("--make-efa", action="store_true", help="Enable AWS EFA")
parser.add_argument(
"--short-output",
action="store_true",
help="Output filename is just rendered.Dockerfile",
)
parser.add_argument(
"--show-result",
action="store_true",
help="Prints the rendered Dockerfile to stdout.",
)
args = parser.parse_args()
return args
def validate_args(args):
# TODO: Add validation logic
return
def render(args, context, script_dir):
env = Environment(
loader=FileSystemLoader(script_dir), trim_blocks=False, lstrip_blocks=True
)
template = env.get_template("Dockerfile.template")
rendered = template.render(
context=context,
framework=args.framework,
target=args.target,
platform=args.platform,
cuda_version=args.cuda_version,
make_efa=args.make_efa,
)
# Replace all instances of 3+ newlines with 2 newlines
cleaned = re.sub(r"\n{3,}", "\n\n", rendered)
if args.short_output:
filename = "rendered.Dockerfile"
else:
filename = f"{args.framework}-{args.target}-cuda{args.cuda_version}-{args.platform}-rendered.Dockerfile"
with open(f"{script_dir}/{filename}", "w") as f:
f.write(cleaned)
if args.show_result:
print("##############")
print("# Dockerfile #")
print("##############")
print(cleaned)
print("##############")
print(f"INFO: Generated Dockerfile written to {script_dir}/{filename}")
return
def main():
args = parse_args()
validate_args(args)
script_dir = Path(sys.argv[0]).parent
with open(f"{script_dir}/context.yaml", "r") as f:
context = yaml.safe_load(f)
render(args, context, script_dir)
if args.target == "local-dev":
print(
"INFO: Remember to add --build-arg values for USER_UID and USER_GID when building a local-dev image!"
)
print(
" Recommendation: --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g)"
)
if __name__ == "__main__":
main()
{#
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
##########################
#### Build Arguments #####
##########################
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
# --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
# --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#TODO OPS-592: Leverage uname -m to determine ARCH instead of passing it as an arg
ARG ARCH={{ platform }}
ARG ARCH_ALT={{ "x86_64" if platform == "amd64" else "aarch64" }}
# Python/CUDA configuration
ARG PYTHON_VERSION={{ context.dynamo.python_version }}
ARG CUDA_VERSION={{ cuda_version }}
ARG CUDA_MAJOR=${CUDA_VERSION%%.*}
{% if framework == "vllm" or framework == "sglang" -%}
{% set cuda_context_key = "cuda" + cuda_version %}
# Base image configuration
ARG BASE_IMAGE={{ context[framework].base_image }}
ARG BASE_IMAGE_TAG={{ context[framework][cuda_context_key].base_image_tag }}
{% elif framework != "vllm" and framework != "sglang" -%}
ARG BASE_IMAGE={{ context[framework].base_image }}
ARG BASE_IMAGE_TAG={{ context[framework].base_image_tag }}
{%- endif %}
{% if framework == "sglang" -%}
{% set cuda_context_key = "cuda" + cuda_version %}
# Base image configuration
ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
ARG RUNTIME_IMAGE_TAG={{ context[framework][cuda_context_key].runtime_image_tag }}
{% elif framework != "dynamo" -%}
ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
ARG RUNTIME_IMAGE_TAG={{ context[framework].runtime_image_tag }}
{%- endif %}
# Build configuration
ARG ENABLE_KVBM={{ context[framework].enable_kvbm }}
ARG CARGO_BUILD_JOBS
ARG NATS_VERSION={{ context.dynamo.nats_version }}
ARG ETCD_VERSION={{ context.dynamo.etcd_version }}
ARG ENABLE_MEDIA_NIXL={{ context[framework].enable_media_nixl }}
ARG ENABLE_MEDIA_FFMPEG={{ context[framework].enable_media_ffmpeg }}
ARG FFMPEG_VERSION={{ context.dynamo.ffmpeg_version }}
ARG ENABLE_GPU_MEMORY_SERVICE={{ context[framework].enable_gpu_memory_service }}
# SCCACHE configuration
ARG USE_SCCACHE
ARG SCCACHE_BUCKET=""
ARG SCCACHE_REGION=""
# NIXL configuration
ARG NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
ARG NIXL_REF={{ context.dynamo.nixl_ref }}
ARG NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
ARG NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
{% if target == "dev" or target == "local-dev" %}
ARG FRAMEWORK={{ framework }}
{% endif %}
{% if target == "frontend" %}
ARG EPP_IMAGE={{ context.dynamo.epp_image }}
ARG FRONTEND_IMAGE={{ context.dynamo.frontend_image }}
{% endif %}
{% if framework == "vllm" -%}
# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF={{ context.vllm.vllm_ref }}
ARG MAX_JOBS={{ context.vllm.max_jobs }}
# FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
ARG FLASHINF_REF={{ context.vllm.flashinf_ref }}
ARG LMCACHE_REF={{ context.vllm.lmcache_ref }}
# If left blank, then we will fallback to vLLM defaults
ARG DEEPGEMM_REF=""
{%- endif -%}
{% if framework == "trtllm" %}
# TensorRT-LLM specific configuration
ARG HAS_TRTLLM_CONTEXT={{ context.trtllm.has_trtllm_context }}
ARG TENSORRTLLM_PIP_WHEEL={{ context.trtllm.pip_wheel }}
ARG TENSORRTLLM_INDEX_URL={{ context.trtllm.index_url }}
ARG GITHUB_TRTLLM_COMMIT={{ context.trtllm.github_trtllm_commit }}
ARG TRTLLM_WHEEL_IMAGE={{ context.trtllm.trtllm_wheel_image }}
# Copy pytorch installation from NGC PyTorch
ARG FLASHINFER_PYTHON_VER={{ context.trtllm.flashinfer_python_ver }}
ARG PYTORCH_TRITON_VER={{ context.trtllm.pytorch_triton_ver }}
ARG TORCHAO_VER={{ context.trtllm.torchao_ver }}
ARG TORCHDATA_VER={{ context.trtllm.torchdata_ver }}
ARG TORCHTITAN_VER={{ context.trtllm.torchtitan_ver }}
ARG TORCH_VER={{ context.trtllm.torch_version }}
ARG TORCH_TENSORRT_VER={{ context.trtllm.torch_tensorrt_version }}
ARG TORCHVISION_VER={{ context.trtllm.torchvision_version }}
ARG JINJA2_VER={{ context.trtllm.jinja2_version }}
ARG SYMPY_VER={{ context.trtllm.sympy_version }}
ARG FLASH_ATTN_VER={{ context.trtllm.flash_attn_version }}
# Python configuration
ARG TRTLLM_PYTHON_VERSION={{ context[framework].python_version }}
{%- endif -%}
{% if make_efa == true %}
ARG EFA_VERSION={{ context.dynamo.efa_version }}
ARG EFA_BASE_IMAGE={{ "runtime" if target=="runtime" else "dev" }}
{%- endif -%}
\ No newline at end of file
# syntax=docker/dockerfile:1.10.0
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
{#
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
#############################
########## AWS EFA ##########
#############################
#
# PURPOSE: AWS EFA support layer
#
# This Dockerfile adds AWS EFA (Elastic Fabric Adapter) support on top of
# the runtime or dev stages from framework Dockerfiles (vllm, sglang, trtllm).
#
# Usage (via build.sh with --make-efa flag):
# ./build.sh --framework vllm --target runtime --make-efa
# ./build.sh --framework vllm --target local-dev --make-efa
ARG BASE_IMAGE
ARG EFA_VERSION
###########################################################
########## Runtime with AWS EFA ##########################
###########################################################
#
# This stage extends the runtime stage with AWS EFA installer
# This stage extends the runtime/dev stage with AWS EFA installer
# which includes: libfabric and aws-ofi-nccl plugin
#
# Use this stage when deploying on AWS infrastructure with EFA support
FROM ${BASE_IMAGE} AS runtime-aws
FROM ${EFA_BASE_IMAGE} AS aws
ARG EFA_VERSION
{% if target == "runtime" %}
USER root
{% endif %}
# Install AWS EFA installer with bundled libfabric and aws-ofi-nccl
# Flags explanation:
......@@ -48,43 +38,9 @@ RUN mkdir -p /tmp/efa && \
ENV EFA_VERSION="${EFA_VERSION}"
{% if target == "runtime" %}
USER dynamo
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
########################################################################
########## Development with AWS EFA (run.sh, runs as root user) ########
########################################################################
#
# PURPOSE: Development environment with AWS EFA support
#
# This stage extends dev stages with development tools for building and
# debugging on EFA-enabled AWS instances.
FROM ${BASE_IMAGE} AS dev-aws
ARG EFA_VERSION
# Dev stage runs as root, no USER switch needed
# Install AWS EFA installer with bundled libfabric and aws-ofi-nccl
# Flags explanation:
# --skip-kmod: Skip kernel module installation (handled by host)
# --skip-limit-conf: Skip ulimit configuration (handled by container runtime)
# --no-verify: Skip GPG verification (optional, can be removed if verification is needed)
RUN mkdir -p /tmp/efa && \
cd /tmp/efa && \
curl --retry 3 --retry-delay 2 -fsSL -o aws-efa-installer-${EFA_VERSION}.tar.gz \
https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_VERSION}.tar.gz && \
tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz && \
cd aws-efa-installer && \
apt-get update && \
./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify && \
rm -rf /tmp/efa && \
rm -rf /opt/amazon/aws-ofi-nccl && \
ldconfig
ENV EFA_VERSION="${EFA_VERSION}"
{% endif %}
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment