Unverified Commit ac020629 authored by Dillon Cullinan's avatar Dillon Cullinan Committed by GitHub
Browse files

feat: Dockerfile templating (#5633)


Signed-off-by: default avatarDillon Cullinan <dcullinan@nvidia.com>
parent 5755a8de
...@@ -143,23 +143,12 @@ Build the appropriate framework image (e.g., `dynamo:latest-vllm-local-dev`) fro ...@@ -143,23 +143,12 @@ Build the appropriate framework image (e.g., `dynamo:latest-vllm-local-dev`) fro
```bash ```bash
# Single command approach (recommended) # Single command approach (recommended)
export FRAMEWORK=VLLM # Note: any of VLLM, SGLANG, TRTLLM can be used export FRAMEWORK=VLLM # Note: any of VLLM, SGLANG, TRTLLM can be used
./container/build.sh --framework $FRAMEWORK --target local-dev python container/render.py --framework=${FRAMEWORK} --target=local-dev --short-output
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile .
# Now you've created both dynamo:latest-vllm and dynamo:latest-vllm-local-dev # Now you've created both dynamo:latest-vllm and dynamo:latest-vllm-local-dev
``` ```
Alternatively, you can build a development container, then build local-dev:
```bash
export FRAMEWORK=VLLM
./container/build.sh --framework $FRAMEWORK
# Now you have a development image dynamo:latest-vllm
./container/build.sh --dev-image dynamo:latest-${FRAMEWORK,,}
# Now you have a local-dev image dynamo:latest-vllm-local-dev
```
The local-dev image will give you local user permissions matching your host user and includes extra developer utilities (debugging tools, text editors, system monitors, etc.). The local-dev image will give you local user permissions matching your host user and includes extra developer utilities (debugging tools, text editors, system monitors, etc.).
### Step 1: Choose Your Framework ### Step 1: Choose Your Framework
...@@ -427,10 +416,8 @@ If you see errors like "container is not running" or "An error occurred setting ...@@ -427,10 +416,8 @@ If you see errors like "container is not running" or "An error occurred setting
# If missing, build the dev image first, then build local-dev # If missing, build the dev image first, then build local-dev
export FRAMEWORK=VLLM # Replace with VLLM, SGLANG, or TRTLLM export FRAMEWORK=VLLM # Replace with VLLM, SGLANG, or TRTLLM
./container/build.sh --framework $FRAMEWORK python container/render.py --framework=${FRAMEWORK} --target=local-dev --short-output
# change to lower case portable way across shells docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile .
./container/build.sh --dev-image dynamo:latest-$(echo "$FRAMEWORK" | tr '[:upper:]' '[:lower:]') --framework "$FRAMEWORK"
# Now you have dynamo:latest-vllm-local-dev
``` ```
2. **Container startup failure:** 2. **Container startup failure:**
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
**/target/* **/target/*
**/*safetensors **/*safetensors
container/Dockerfile* container/Dockerfile*
container/*.Dockerfile
.venv .venv
.venv-docs .venv-docs
......
name: 'Docker Build' name: 'Docker Build'
description: 'Build Dynamo container images' description: 'Build Dynamo container images'
inputs: inputs:
# --- Common Docker Inputs
framework: framework:
description: 'Framework to build' description: 'Framework to build'
required: true required: true
...@@ -13,9 +14,14 @@ inputs: ...@@ -13,9 +14,14 @@ inputs:
description: 'Docker platform to build on, ie. linux/amd64' description: 'Docker platform to build on, ie. linux/amd64'
required: false required: false
default: 'linux/amd64' default: 'linux/amd64'
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: true
image_tag: image_tag:
description: 'Custom image tag (optional, defaults to framework:latest)' description: 'Custom image tag (optional, defaults to framework:latest)'
required: false required: false
# --- Secret Inputs
ci_token: ci_token:
description: 'CI Token' description: 'CI Token'
required: false required: false
...@@ -34,21 +40,6 @@ inputs: ...@@ -34,21 +40,6 @@ inputs:
aws_secret_access_key: aws_secret_access_key:
description: 'AWS Secret Access Key' description: 'AWS Secret Access Key'
required: false required: false
base_image_tag:
description: 'Optional override for base image tag passed to build.sh'
required: false
runtime_image_tag:
description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
required: false
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: true
enable_kvbm:
description: 'Enable KVBM support (optional)'
required: false
dynamo_base_image:
description: 'Pre-built Dynamo base image to use instead of building from scratch'
required: false
outputs: outputs:
image_tag: image_tag:
...@@ -70,6 +61,44 @@ runs: ...@@ -70,6 +61,44 @@ runs:
shell: bash shell: bash
run: | run: |
docker system prune -af docker system prune -af
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0
with:
python-version: '3.12'
pip-install: jinja2 pyyaml
- name: Generate Dockerfile
shell: bash
run: |
echo "::group::Generating Dockerfile"
echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
python ./container/render.py \
--target=${{ inputs.target }} \
--framework=${{ inputs.framework }} \
--platform=${{ inputs.platform }} \
--cuda-version=${{ inputs.cuda_version }} \
--show-result \
--short-output
echo "::endgroup::"
- name: Build EPP image
if: ${{ inputs.target == 'frontend' }}
shell: bash
env:
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
run: |
sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
. "$HOME/.cargo/env"
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
cargo install cbindgen
DOCKER_PROXY="${ECR_HOSTNAME}/dockerhub/"
pushd deploy/inference-gateway/epp
make all DOCKER_PROXY=${DOCKER_PROXY}
popd
EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
EPP_IMAGE="dynamo/dynamo-epp:${EPP_GIT_TAG}"
echo "EPP_IMAGE=${EPP_IMAGE}" >> $GITHUB_ENV
- name: Build image - name: Build image
id: build id: build
shell: bash shell: bash
...@@ -107,50 +136,34 @@ runs: ...@@ -107,50 +136,34 @@ runs:
echo "BUILD_LOG_FILE=${BUILD_LOG_FILE}" >> $GITHUB_ENV echo "BUILD_LOG_FILE=${BUILD_LOG_FILE}" >> $GITHUB_ENV
echo "📝 Build log will be saved to: ${BUILD_LOG_FILE}" echo "📝 Build log will be saved to: ${BUILD_LOG_FILE}"
# Collect optional overrides provided by the workflow
# Set base cache args and set --cache-to if this is a main commit # Set base cache args and set --cache-to if this is a main commit
# TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export # TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export
EXTRA_ARGS="" CACHE_ARGS=""
if [[ "${{ inputs.target }}" != "frontend" ]]; then if [[ "${{ inputs.target }}" != "frontend" ]]; then
EXTRA_ARGS="--cache-to type=inline " CACHE_ARGS="--cache-to type=inline "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache " CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} " CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache " CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
# Release branches also use release cache # Release branches also use release cache
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max " CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
elif [[ "$GITHUB_REF_NAME" == "main" ]]; then elif [[ "$GITHUB_REF_NAME" == "main" ]]; then
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max " CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
fi fi
fi fi
echo "$EXTRA_ARGS" EPP_IMAGE_ARG=""
# Collect optional overrides provided by the workflow if [[ ${{ inputs.target }} == "frontend" ]]; then
if [ -n "${{ inputs.base_image_tag }}" ]; then EPP_IMAGE_ARG="--build-arg EPP_IMAGE=${EPP_IMAGE}"
EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} "
fi
if [ -n "${{ inputs.runtime_image_tag }}" ]; then
EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} "
fi
if [ -n "${{ inputs.cuda_version }}" ]; then
EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} "
fi
if [ -n "${{ inputs.dynamo_base_image }}" ]; then
EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}"
fi
if [ -n "${{ inputs.enable_kvbm }}" ]; then
EXTRA_ARGS+=" --build-arg ENABLE_KVBM=${{ inputs.enable_kvbm }}"
fi fi
# Execute build and capture output (show on console AND save to file) docker buildx build \
./container/build.sh --tag "$IMAGE_TAG" \ --progress=plain \
--target ${{ inputs.target }} \ --tag "$IMAGE_TAG" \
--vllm-max-jobs 10 \ --load \
--framework ${{ inputs.framework }} \ -f ./container/rendered.Dockerfile \
--platform ${{ inputs.platform }} \ $CACHE_ARGS \
--use-sccache \ $EPP_IMAGE_ARG . 2>&1 | tee "${BUILD_LOG_FILE}"
--sccache-bucket "$SCCACHE_S3_BUCKET" \
--sccache-region "$AWS_DEFAULT_REGION" $EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
BUILD_EXIT_CODE=${PIPESTATUS[0]} BUILD_EXIT_CODE=${PIPESTATUS[0]}
......
...@@ -13,6 +13,9 @@ inputs: ...@@ -13,6 +13,9 @@ inputs:
description: 'Docker platform to build on, ie. linux/amd64' description: 'Docker platform to build on, ie. linux/amd64'
required: false required: false
default: 'linux/amd64' default: 'linux/amd64'
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: true
image_tag: image_tag:
description: 'Custom image tag' description: 'Custom image tag'
required: true required: true
...@@ -34,21 +37,6 @@ inputs: ...@@ -34,21 +37,6 @@ inputs:
aws_secret_access_key: aws_secret_access_key:
description: 'AWS Secret Access Key' description: 'AWS Secret Access Key'
required: false required: false
base_image_tag:
description: 'Optional override for base image tag passed to build.sh'
required: false
runtime_image_tag:
description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
required: false
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: true
enable_kvbm:
description: 'Enable KVBM support (optional)'
required: false
dynamo_base_image:
description: 'Pre-built Dynamo base image to use instead of building from scratch'
required: false
no_cache: no_cache:
description: 'Disable Docker build cache' description: 'Disable Docker build cache'
required: false required: false
...@@ -117,21 +105,20 @@ runs: ...@@ -117,21 +105,20 @@ runs:
# Collect optional overrides provided by the workflow # Collect optional overrides provided by the workflow
# Set base cache args and set --cache-to if this is a main commit # Set base cache args and set --cache-to if this is a main commit
# TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export # TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export
EXTRA_ARGS="" CACHE_ARGS=""
if [[ "${{ inputs.target }}" != "frontend" ]]; then if [[ "${{ inputs.target }}" != "frontend" ]]; then
EXTRA_ARGS="--cache-to type=inline " CACHE_ARGS="--cache-to type=inline "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache " CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} " CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache " CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
# Release branches also use release cache # Release branches also use release cache
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max " CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
elif [[ "$GITHUB_REF_NAME" == "main" ]]; then elif [[ "$GITHUB_REF_NAME" == "main" ]]; then
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max " CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
fi fi
fi fi
echo "$CACHE_ARGS"
echo "$EXTRA_ARGS"
# Collect optional overrides provided by the workflow # Collect optional overrides provided by the workflow
if [[ "${{ inputs.ci }}" == "true" ]]; then if [[ "${{ inputs.ci }}" == "true" ]]; then
...@@ -139,21 +126,6 @@ runs: ...@@ -139,21 +126,6 @@ runs:
EXTRA_ARGS+=" --ci" EXTRA_ARGS+=" --ci"
fi fi
if [ -n "${{ inputs.base_image_tag }}" ]; then
EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} "
fi
if [ -n "${{ inputs.runtime_image_tag }}" ]; then
EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} "
fi
if [ -n "${{ inputs.cuda_version }}" ]; then
EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} "
fi
if [ -n "${{ inputs.dynamo_base_image }}" ]; then
EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}"
fi
if [ -n "${{ inputs.enable_kvbm }}" ]; then
EXTRA_ARGS+=" --build-arg ENABLE_KVBM=${{ inputs.enable_kvbm }}"
fi
if [ "${{ inputs.no_cache }}" == "true" ]; then if [ "${{ inputs.no_cache }}" == "true" ]; then
EXTRA_ARGS+=" --no-cache" EXTRA_ARGS+=" --no-cache"
fi fi
...@@ -161,9 +133,9 @@ runs: ...@@ -161,9 +133,9 @@ runs:
EXTRA_ARGS+=" --build-arg CARGO_BUILD_JOBS=4 --use-sccache" EXTRA_ARGS+=" --build-arg CARGO_BUILD_JOBS=4 --use-sccache"
fi fi
if [ "${{ inputs.push_image }}" == "true" ]; then if [ "${{ inputs.push_image }}" == "true" ]; then
EXTRA_ARGS+=" --push --no-load" EXTRA_ARGS+=" --push"
elif [ "${{ inputs.no_load }}" == "true" ]; then elif [ "${{ inputs.no_load }}" == "false" ]; then
EXTRA_ARGS+=" --no-load" EXTRA_ARGS+=" --load"
fi fi
# Add extra tags (each as a separate --tag argument) # Add extra tags (each as a separate --tag argument)
...@@ -176,16 +148,14 @@ runs: ...@@ -176,16 +148,14 @@ runs:
done <<< "$EXTRA_TAGS" done <<< "$EXTRA_TAGS"
fi fi
# Execute build and capture output (show on console AND save to file) docker buildx build \
./container/build.sh --tag "$IMAGE_TAG" \ --progress=plain \
--target ${{ inputs.target }} \ --tag "$IMAGE_TAG" \
--vllm-max-jobs 10 \ --platform linux/${{ inputs.platform }} \
--no-tag-latest \ -f ./container/rendered.Dockerfile \
--framework ${{ inputs.framework }} \ $CACHE_ARGS \
--platform ${{ inputs.platform }} \ $EXTRA_ARGS \
--sccache-bucket "$SCCACHE_S3_BUCKET" \ $EPP_IMAGE_ARG . 2>&1 | tee "${BUILD_LOG_FILE}"
--sccache-region "$AWS_DEFAULT_REGION" \
$EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
BUILD_EXIT_CODE=${PIPESTATUS[0]} BUILD_EXIT_CODE=${PIPESTATUS[0]}
......
...@@ -63,6 +63,9 @@ ignore: ...@@ -63,6 +63,9 @@ ignore:
- 'container/run.sh' - 'container/run.sh'
- 'container/use-sccache.sh' - 'container/use-sccache.sh'
- 'container/dev/**' - 'container/dev/**'
- 'container/templates/aws.Dockerfile'
- 'container/templates/local_dev.Dockerfile'
- 'container/templates/dev.Dockerfile'
ci: &ci ci: &ci
- '.github/workflows/**' - '.github/workflows/**'
...@@ -78,6 +81,12 @@ core: ...@@ -78,6 +81,12 @@ core:
- *ci - *ci
- 'container/build.sh' - 'container/build.sh'
- 'container/Dockerfile' - 'container/Dockerfile'
- 'container/render.py'
- 'container/Dockerfile.template'
- 'container/context.yaml'
- 'container/templates/args.Dockerfile'
- 'container/templates/dynamo_*'
- 'container/templates/wheel_builder.Dockerfile'
- '.dockerignore' - '.dockerignore'
- 'container/deps/*' - 'container/deps/*'
- '.cargo/config.toml' - '.cargo/config.toml'
...@@ -120,6 +129,7 @@ vllm: ...@@ -120,6 +129,7 @@ vllm:
- 'container/deps/vllm/**' - 'container/deps/vllm/**'
- 'examples/backends/vllm/**' - 'examples/backends/vllm/**'
- 'components/src/dynamo/vllm/**' - 'components/src/dynamo/vllm/**'
- 'container/templates/vllm_*'
sglang: sglang:
- '!**/*.md' - '!**/*.md'
...@@ -127,6 +137,7 @@ sglang: ...@@ -127,6 +137,7 @@ sglang:
- 'container/Dockerfile.sglang' - 'container/Dockerfile.sglang'
- 'examples/backends/sglang/**' - 'examples/backends/sglang/**'
- 'components/src/dynamo/sglang/**' - 'components/src/dynamo/sglang/**'
- 'container/templates/sglang_*'
trtllm: trtllm:
- '!**/*.md' - '!**/*.md'
...@@ -136,6 +147,7 @@ trtllm: ...@@ -136,6 +147,7 @@ trtllm:
- 'examples/backends/trtllm/**' - 'examples/backends/trtllm/**'
- 'components/src/dynamo/trtllm/**' - 'components/src/dynamo/trtllm/**'
- 'container/build_trtllm_wheel.sh' - 'container/build_trtllm_wheel.sh'
- 'container/templates/trtllm_*'
frontend: frontend:
- '!**/*.md' - '!**/*.md'
...@@ -153,6 +165,7 @@ frontend: ...@@ -153,6 +165,7 @@ frontend:
- 'components/src/dynamo/frontend/**' - 'components/src/dynamo/frontend/**'
- 'components/src/dynamo/common/**' - 'components/src/dynamo/common/**'
- 'deploy/inference-gateway/**' - 'deploy/inference-gateway/**'
- 'container/templates/frontend.Dockerfile'
rust: rust:
- '.github/workflows/pre-merge.yml' - '.github/workflows/pre-merge.yml'
......
...@@ -68,21 +68,6 @@ jobs: ...@@ -68,21 +68,6 @@ jobs:
uses: actions/setup-go@v5 uses: actions/setup-go@v5
with: with:
go-version: '1.24' go-version: '1.24'
- name: Install dependencies
shell: bash
run: |
set -euo pipefail
# Install system dependencies from apt
sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
# Install Rust (cargo + rustc)
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
# Make cargo available to later steps
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
- name: Install cbindgen
shell: bash
run: |
set -euo pipefail
cargo install cbindgen
- name: Docker Login - name: Docker Login
uses: ./.github/actions/docker-login uses: ./.github/actions/docker-login
with: with:
...@@ -95,10 +80,10 @@ jobs: ...@@ -95,10 +80,10 @@ jobs:
id: build-image id: build-image
uses: ./.github/actions/docker-build uses: ./.github/actions/docker-build
env: env:
PLATFORMS: linux/${{ matrix.platform.arch }} PLATFORMS: ${{ matrix.platform.arch }}
TARGETARCH: ${{ matrix.platform.arch }} TARGETARCH: ${{ matrix.platform.arch }}
with: with:
framework: none framework: dynamo
target: frontend target: frontend
platform: ${{ env.PLATFORMS }} platform: ${{ env.PLATFORMS }}
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
......
...@@ -10,6 +10,10 @@ on: ...@@ -10,6 +10,10 @@ on:
description: 'Framework name (vllm, sglang, trtllm)' description: 'Framework name (vllm, sglang, trtllm)'
required: true required: true
type: string type: string
target:
description: 'Target stage for Docker rendering'
required: true
type: string
platforms: platforms:
description: 'Platforms to build (JSON array, e.g., ["amd64", "arm64"])' description: 'Platforms to build (JSON array, e.g., ["amd64", "arm64"])'
required: true required: true
...@@ -86,6 +90,7 @@ jobs: ...@@ -86,6 +90,7 @@ jobs:
with: with:
framework: ${{ inputs.framework }} framework: ${{ inputs.framework }}
platform: ${{ matrix.platform }} platform: ${{ matrix.platform }}
target: ${{ inputs.target }}
cuda_version: ${{ matrix.cuda_version }} cuda_version: ${{ matrix.cuda_version }}
extra_tags: ${{ inputs.extra_tags }} extra_tags: ${{ inputs.extra_tags }}
no_cache: ${{ inputs.no_cache }} no_cache: ${{ inputs.no_cache }}
......
...@@ -10,6 +10,10 @@ on: ...@@ -10,6 +10,10 @@ on:
description: 'Framework name (vllm, sglang, trtllm)' description: 'Framework name (vllm, sglang, trtllm)'
required: true required: true
type: string type: string
target:
description: 'Target stage for Docker rendering'
required: true
type: string
platform: platform:
description: 'Platform to build (amd64 or arm64)' description: 'Platform to build (amd64 or arm64)'
required: true required: true
...@@ -105,13 +109,17 @@ jobs: ...@@ -105,13 +109,17 @@ jobs:
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with: with:
lfs: true lfs: true
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0
with:
python-version: '3.12'
pip-install: jinja2 pyyaml
- name: Calculate extra tags with platform suffix # will get redundant upon multi arch builds support - name: Calculate extra tags with platform suffix # will get redundant upon multi arch builds support
id: extra-tags id: extra-tags
shell: bash shell: bash
env: env:
EXTRA_TAGS: ${{ inputs.extra_tags }} EXTRA_TAGS: ${{ inputs.extra_tags }}
PLATFORM: ${{ inputs.platform }} PLATFORM: linux/${{ inputs.platform }}
run: | run: |
if [ -n "$EXTRA_TAGS" ]; then if [ -n "$EXTRA_TAGS" ]; then
RESULT="" RESULT=""
...@@ -134,7 +142,6 @@ jobs: ...@@ -134,7 +142,6 @@ jobs:
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }} azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }} azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }} azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Calculate target tag - name: Calculate target tag
id: calculate-target-tag id: calculate-target-tag
shell: bash shell: bash
...@@ -146,7 +153,6 @@ jobs: ...@@ -146,7 +153,6 @@ jobs:
echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
- name: Initialize Dynamo Builder - name: Initialize Dynamo Builder
uses: ./.github/actions/init-dynamo-builder uses: ./.github/actions/init-dynamo-builder
with: with:
...@@ -154,28 +160,37 @@ jobs: ...@@ -154,28 +160,37 @@ jobs:
flavor: ${{ inputs.framework }} flavor: ${{ inputs.framework }}
arch: ${{ inputs.platform }} arch: ${{ inputs.platform }}
cuda_version: ${{ inputs.cuda_version }} cuda_version: ${{ inputs.cuda_version }}
- name: Print Build Container inputs - name: Print Build Container inputs
run: | run: |
echo "=== Build Container Inputs ===" echo "=== Build Container Inputs ==="
echo "image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}" echo "image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}"
echo "framework: ${{ inputs.framework }}" echo "framework: ${{ inputs.framework }}"
echo "target: runtime" echo "target: runtime"
echo "platform: linux/${{ inputs.platform }}" echo "platform: ${{ inputs.platform }}"
echo "cuda_version: ${{ inputs.cuda_version }}" echo "cuda_version: ${{ inputs.cuda_version }}"
echo "no_cache: ${{ inputs.no_cache }}" echo "no_cache: ${{ inputs.no_cache }}"
echo "extra_tags: ${{ steps.extra-tags.outputs.tags }}" echo "extra_tags: ${{ steps.extra-tags.outputs.tags }}"
echo "push_image: ${{ inputs.push_image }}" echo "push_image: ${{ inputs.push_image }}"
echo "no_load: ${{ inputs.no_load }}" echo "no_load: ${{ inputs.no_load }}"
- name: Generate Dockerfile
shell: bash
run: |
echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
python ./container/render.py \
--target=${{ inputs.target }} \
--framework=${{ inputs.framework }} \
--platform=${{ inputs.platform }} \
--cuda-version=${{ inputs.cuda_version }} \
--show-result \
--short-output
- name: Build Container - name: Build Container
id: build-image id: build-image
uses: ./.github/actions/docker-remote-build uses: ./.github/actions/docker-remote-build
with: with:
image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }} image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}
framework: ${{ inputs.framework }} framework: ${{ inputs.framework }}
target: runtime target: ${{ inputs.target }}
platform: linux/${{ inputs.platform }} platform: ${{ inputs.platform }}
cuda_version: ${{ inputs.cuda_version }} cuda_version: ${{ inputs.cuda_version }}
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
......
...@@ -99,17 +99,15 @@ jobs: ...@@ -99,17 +99,15 @@ jobs:
with: with:
framework: ${{ matrix.framework }} framework: ${{ matrix.framework }}
target: runtime target: runtime
platform: linux/amd64 platform: amd64
base_image_tag: '' cuda_version: '12.9'
runtime_image_tag: '' image_tag: runtime-${{ matrix.framework }}-amd64:${{ github.run_id }}
cuda_version: ''
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }} sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
image_tag: runtime-${{ matrix.framework }}-amd64:${{ github.run_id }}
- name: Tag and Push Runtime Images - name: Tag and Push Runtime Images
uses: ./.github/actions/docker-tag-push uses: ./.github/actions/docker-tag-push
with: with:
...@@ -132,13 +130,9 @@ jobs: ...@@ -132,13 +130,9 @@ jobs:
matrix: matrix:
include: include:
- framework: vllm - framework: vllm
base_image_tag: '25.06-cuda12.9-devel-ubuntu24.04'
runtime_image_tag: '12.9.0-runtime-ubuntu24.04'
cuda_version: '12.9' cuda_version: '12.9'
- framework: sglang - framework: sglang
base_image_tag: '' cuda_version: '12.9'
runtime_image_tag: ''
cuda_version: ''
env: env:
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
steps: steps:
...@@ -158,17 +152,15 @@ jobs: ...@@ -158,17 +152,15 @@ jobs:
with: with:
framework: ${{ matrix.framework }} framework: ${{ matrix.framework }}
target: runtime target: runtime
platform: linux/arm64 platform: arm64
base_image_tag: ${{ matrix.base_image_tag }}
runtime_image_tag: ${{ matrix.runtime_image_tag }}
cuda_version: ${{ matrix.cuda_version }} cuda_version: ${{ matrix.cuda_version }}
image_tag: runtime-${{ matrix.framework }}-arm64:${{ github.run_id }}
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }} sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
image_tag: runtime-${{ matrix.framework }}-arm64:${{ github.run_id }}
- name: Tag and Push Runtime Images - name: Tag and Push Runtime Images
uses: ./.github/actions/docker-tag-push uses: ./.github/actions/docker-tag-push
with: with:
...@@ -210,17 +202,15 @@ jobs: ...@@ -210,17 +202,15 @@ jobs:
with: with:
framework: ${{ matrix.framework }} framework: ${{ matrix.framework }}
target: runtime target: runtime
platform: linux/amd64 platform: amd64
base_image_tag: ''
runtime_image_tag: ''
cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }} cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
image_tag: runtime-${{ matrix.framework }}-cuda13-amd64:${{ github.run_id }}
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }} sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
image_tag: runtime-${{ matrix.framework }}-cuda13-amd64:${{ github.run_id }}
- name: Tag and Push CUDA 13 Runtime Images - name: Tag and Push CUDA 13 Runtime Images
uses: ./.github/actions/docker-tag-push uses: ./.github/actions/docker-tag-push
with: with:
...@@ -262,17 +252,15 @@ jobs: ...@@ -262,17 +252,15 @@ jobs:
with: with:
framework: ${{ matrix.framework }} framework: ${{ matrix.framework }}
target: runtime target: runtime
platform: linux/arm64 platform: arm64
base_image_tag: ''
runtime_image_tag: ''
cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }} cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
image_tag: runtime-${{ matrix.framework }}-cuda13-arm64:${{ github.run_id }}
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }} sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
image_tag: runtime-${{ matrix.framework }}-cuda13-arm64:${{ github.run_id }}
- name: Tag and Push CUDA 13 Runtime Images - name: Tag and Push CUDA 13 Runtime Images
uses: ./.github/actions/docker-tag-push uses: ./.github/actions/docker-tag-push
with: with:
......
...@@ -66,11 +66,27 @@ jobs: ...@@ -66,11 +66,27 @@ jobs:
id: define_image_tag id: define_image_tag
run: | run: |
echo "image_tag=dynamo:latest-dev" >> $GITHUB_OUTPUT echo "image_tag=dynamo:latest-dev" >> $GITHUB_OUTPUT
- name: Generate Dockerfile
shell: bash
run: |
echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
python ./container/render.py \
--target=dev \
--framework=dynamo \
--platform=amd64 \
--show-result \
--short-output
- name: Build image - name: Build image
env: env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }} GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
run: | run: |
./container/build.sh --tag ${{ steps.define_image_tag.outputs.image_tag }} --target dev --framework none --enable-kvbm --enable-media-ffmpeg docker buildx build \
--progress=plain \
--tag ${{ steps.define_image_tag.outputs.image_tag }} \
-f ./container/rendered.Dockerfile \
--build-arg ENABLE_MEDIA_FFMPEG=true \
--build-arg ENABLE_KVBM=true \
--load .
- name: Start services with docker-compose - name: Start services with docker-compose
working-directory: ./deploy working-directory: ./deploy
run: | run: |
......
...@@ -177,6 +177,7 @@ jobs: ...@@ -177,6 +177,7 @@ jobs:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with: with:
framework: vllm framework: vllm
target: runtime
platforms: '["amd64", "arm64"]' platforms: '["amd64", "arm64"]'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
extra_tags: | extra_tags: |
...@@ -194,6 +195,7 @@ jobs: ...@@ -194,6 +195,7 @@ jobs:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with: with:
framework: sglang framework: sglang
target: runtime
platforms: '["amd64", "arm64"]' platforms: '["amd64", "arm64"]'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
extra_tags: | extra_tags: |
...@@ -211,11 +213,12 @@ jobs: ...@@ -211,11 +213,12 @@ jobs:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with: with:
framework: trtllm framework: trtllm
target: runtime
platforms: '["amd64", "arm64"]' platforms: '["amd64", "arm64"]'
cuda_versions: '["13.1"]' cuda_versions: '["13.1"]'
extra_tags: | extra_tags: |
${{ github.ref_name == 'main' && 'main-trtllm' || '' }} ${{ github.ref_name == 'main' && 'main-trtllm' || '' }}
${{ github.ref_name == 'main' && format('main-trt-llm-{0}', github.sha) || '' }} ${{ github.ref_name == 'main' && format('main-trtllm-{0}', github.sha) || '' }}
builder_name: ${{ needs.changed-files.outputs.builder_name }} builder_name: ${{ needs.changed-files.outputs.builder_name }}
secrets: inherit secrets: inherit
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
.vs/ .vs/
.vscode/ .vscode/
.helix .helix
*rendered.Dockerfile
[Bb]inlog/ [Bb]inlog/
[Bb][Uu][Ii][Ll][Dd]/ [Bb][Uu][Ii][Ll][Dd]/
[Oo][Bb][Jj]/ [Oo][Bb][Jj]/
......
# syntax=docker/dockerfile:1.10.0
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# NOTE FOR dynamo_base AND wheel_builder STAGES:
#
# All changes to dynamo_base and wheel_builder stages should be replicated across
# Dockerfile and Dockerfile.<framework> images.:
# - Dockerfile
# - Dockerfile.vllm
# - Dockerfile.sglang
# - Dockerfile.trtllm
# This duplication was introduced purposely to quickly enable Docker layer caching and
# deduplication. Please ensure these stages stay in sync until the duplication can be
# addressed.
##################################
########## Build Arguments ########
##################################
# This section contains build arguments that are common and shared across various
# Dockerfile.<frameworks>, so they should NOT have a default. The source of truth is from build.sh.
ARG BASE_IMAGE
ARG BASE_IMAGE_TAG
ARG EPP_IMAGE="us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1"
ARG PYTHON_VERSION
ARG ENABLE_KVBM
ARG ENABLE_GPU_MEMORY_SERVICE
ARG ENABLE_MEDIA_NIXL
ARG ENABLE_MEDIA_FFMPEG
ARG CARGO_BUILD_JOBS
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
# --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
# --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#TODO OPS-592: Leverage uname -m to determine ARCH instead of passing it as an arg
ARG ARCH=amd64
ARG ARCH_ALT=x86_64
# SCCACHE configuration
ARG USE_SCCACHE
ARG SCCACHE_BUCKET=""
ARG SCCACHE_REGION=""
# NIXL configuration
ARG NIXL_UCX_REF
ARG NIXL_REF
ARG NIXL_GDRCOPY_REF
ARG NIXL_LIBFABRIC_REF
##################################
########## Base Image ############
##################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
ARG ARCH
ARG ARCH_ALT
USER root
WORKDIR /opt/dynamo
# Install uv package manager
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
# Install NATS server
ENV NATS_VERSION="v2.10.28"
RUN --mount=type=cache,target=/var/cache/apt \
wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \
dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb
# Install etcd
ENV ETCD_VERSION="v3.5.21"
RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \
mkdir -p /usr/local/bin/etcd && \
tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \
rm /tmp/etcd.tar.gz
ENV PATH=/usr/local/bin/etcd/:$PATH
# Rust Setup
# Rust environment setup
ENV RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
PATH=/usr/local/cargo/bin:$PATH \
RUST_VERSION=1.90.0
# Define Rust target based on ARCH_ALT ARG
ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu
# Install Rust
RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \
chmod +x rustup-init && \
./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \
rm rustup-init && \
chmod -R a+w $RUSTUP_HOME $CARGO_HOME
##################################
##### Wheel Build Image ##########
##################################
# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction
ARG ARCH_ALT
FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder
# Redeclare ARGs for this stage
ARG ARCH
ARG ARCH_ALT
ARG CARGO_BUILD_JOBS
ARG ENABLE_MEDIA_FFMPEG
WORKDIR /workspace
# Copy CUDA from base stage
COPY --from=base /usr/local/cuda /usr/local/cuda
COPY --from=base /etc/ld.so.conf.d/hpcx.conf /etc/ld.so.conf.d/hpcx.conf
# Set environment variables first so they can be used in COPY commands
ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
RUSTUP_HOME=/usr/local/rustup \
CARGO_HOME=/usr/local/cargo \
CARGO_TARGET_DIR=/opt/dynamo/target \
PATH=/usr/local/cargo/bin:$PATH
# Copy artifacts from base stage
COPY --from=base $RUSTUP_HOME $RUSTUP_HOME
COPY --from=base $CARGO_HOME $CARGO_HOME
# Install system dependencies
RUN dnf install -y almalinux-release-synergy && \
dnf config-manager --set-enabled powertools && \
dnf install -y \
# Autotools (required for UCX, libfabric ./autogen.sh and ./configure)
autoconf \
automake \
libtool \
make \
# RPM build tools (required for gdrcopy's build-rpm-packages.sh)
rpm-build \
rpm-sign \
# Build tools
cmake \
ninja-build \
clang-devel \
# Install GCC toolset 14 (CUDA compatible, max version 14)
gcc-toolset-14-gcc \
gcc-toolset-14-gcc-c++ \
gcc-toolset-14-binutils \
flex \
wget \
# Kernel module build dependencies
dkms \
# Protobuf support
protobuf-compiler \
# RDMA/InfiniBand support (required for UCX build with --with-verbs)
libibverbs \
libibverbs-devel \
rdma-core \
rdma-core-devel \
libibumad \
libibumad-devel \
librdmacm-devel \
numactl-devel \
# Libfabric support
hwloc \
hwloc-devel && \
dnf clean all && rm -rf /var/cache/dnf/
# Set GCC toolset 14 as the default compiler (CUDA requires GCC <= 14)
ENV PATH="/opt/rh/gcc-toolset-14/root/usr/bin:${PATH}" \
LD_LIBRARY_PATH="/opt/rh/gcc-toolset-14/root/usr/lib64:${LD_LIBRARY_PATH}" \
CC="/opt/rh/gcc-toolset-14/root/usr/bin/gcc" \
CXX="/opt/rh/gcc-toolset-14/root/usr/bin/g++"
# Ensure a modern protoc is available (required for --experimental_allow_proto3_optional)
RUN set -eux; \
PROTOC_VERSION=25.3; \
case "${ARCH_ALT}" in \
x86_64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-x86_64.zip" ;; \
aarch64) PROTOC_ZIP="protoc-${PROTOC_VERSION}-linux-aarch_64.zip" ;; \
*) echo "Unsupported architecture: ${ARCH_ALT}" >&2; exit 1 ;; \
esac; \
wget --tries=3 --waitretry=5 -O /tmp/protoc.zip "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/${PROTOC_ZIP}"; \
rm -f /usr/local/bin/protoc /usr/bin/protoc; \
unzip -o /tmp/protoc.zip -d /usr/local bin/protoc include/*; \
chmod +x /usr/local/bin/protoc; \
ln -s /usr/local/bin/protoc /usr/bin/protoc; \
protoc --version
# Point build tools explicitly at the modern protoc
ENV PROTOC=/usr/local/bin/protoc
ENV CUDA_PATH=/usr/local/cuda \
PATH=/usr/local/cuda/bin:$PATH \
LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH \
NVIDIA_DRIVER_CAPABILITIES=video,compute,utility
# Create virtual environment for building wheels
ARG PYTHON_VERSION
ENV VIRTUAL_ENV=/workspace/.venv
# Cache uv downloads; uv handles its own locking for this cache.
RUN --mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION && \
uv pip install --upgrade meson pybind11 patchelf maturin[patchelf] tomlkit
ARG NIXL_UCX_REF
ARG NIXL_REF
ARG NIXL_GDRCOPY_REF
# Build and install gdrcopy
RUN git clone --depth 1 --branch ${NIXL_GDRCOPY_REF} https://github.com/NVIDIA/gdrcopy.git && \
cd gdrcopy/packages && \
CUDA=/usr/local/cuda ./build-rpm-packages.sh && \
rpm -Uvh gdrcopy-kmod-*.el8.noarch.rpm && \
rpm -Uvh gdrcopy-*.el8.${ARCH_ALT}.rpm && \
rpm -Uvh gdrcopy-devel-*.el8.noarch.rpm
# Install SCCACHE if requested
ARG USE_SCCACHE
ARG SCCACHE_BUCKET
ARG SCCACHE_REGION
COPY container/use-sccache.sh /tmp/use-sccache.sh
RUN if [ "$USE_SCCACHE" = "true" ]; then \
/tmp/use-sccache.sh install; \
fi
# Set SCCACHE environment variables
ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \
SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \
RUSTC_WRAPPER=${USE_SCCACHE:+sccache}
# Build FFmpeg from source
# Do not delete the source tarball for legal reasons
ARG FFMPEG_VERSION=7.1
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export RUSTC_WRAPPER="sccache"; \
fi && \
dnf install -y pkg-config && \
cd /tmp && \
curl -LO https://ffmpeg.org/releases/ffmpeg-${FFMPEG_VERSION}.tar.xz && \
tar xf ffmpeg-${FFMPEG_VERSION}.tar.xz && \
cd ffmpeg-${FFMPEG_VERSION} && \
./configure \
--prefix=/usr/local \
--disable-gpl \
--disable-nonfree \
--disable-programs \
--disable-doc \
--disable-static \
--disable-x86asm \
--disable-postproc \
--disable-network \
--disable-encoders \
--disable-muxers \
--disable-bsfs \
--disable-devices \
--disable-libdrm \
--enable-shared && \
make -j$(nproc) && \
make install && \
/tmp/use-sccache.sh show-stats "FFMPEG" && \
ldconfig && \
mkdir -p /usr/local/src/ffmpeg && \
mv /tmp/ffmpeg-${FFMPEG_VERSION}* /usr/local/src/ffmpeg/; \
fi
# Build and install UCX
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
cd /usr/local/src && \
git clone https://github.com/openucx/ucx.git && \
cd ucx && \
git checkout $NIXL_UCX_REF && \
./autogen.sh && \
./contrib/configure-release \
--prefix=/usr/local/ucx \
--enable-shared \
--disable-static \
--disable-doxygen-doc \
--enable-optimizations \
--enable-cma \
--enable-devel-headers \
--with-cuda=/usr/local/cuda \
--with-verbs \
--with-dm \
--with-gdrcopy=/usr/local \
--with-efa \
--enable-mt && \
make -j && \
make -j install-strip && \
/tmp/use-sccache.sh show-stats "UCX" && \
echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \
echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \
ldconfig
ARG NIXL_LIBFABRIC_REF
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
cd /usr/local/src && \
git clone https://github.com/ofiwg/libfabric.git && \
cd libfabric && \
git checkout $NIXL_LIBFABRIC_REF && \
./autogen.sh && \
./configure --prefix="/usr/local/libfabric" \
--disable-verbs \
--disable-psm3 \
--disable-opx \
--disable-usnic \
--disable-rstream \
--enable-efa \
--with-cuda=/usr/local/cuda \
--enable-cuda-dlopen \
--with-gdrcopy \
--enable-gdrcopy-dlopen && \
make -j$(nproc) && \
make install && \
/tmp/use-sccache.sh show-stats "LIBFABRIC" && \
echo "/usr/local/libfabric/lib" > /etc/ld.so.conf.d/libfabric.conf && \
ldconfig
# build and install nixl
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
source ${VIRTUAL_ENV}/bin/activate && \
git clone "https://github.com/ai-dynamo/nixl.git" && \
cd nixl && \
git checkout ${NIXL_REF} && \
CUDA_MAJOR=$(nvcc --version | grep -Eo 'release [0-9]+\.[0-9]+' | cut -d' ' -f2 | cut -d'.' -f1) && \
if [ "$CUDA_MAJOR" -ne 12 ] && [ "$CUDA_MAJOR" -ne 13 ]; then \
echo "Invalid CUDA_MAJOR: '$CUDA_MAJOR'" && \
exit 1; \
fi && \
PKG_NAME="nixl-cu${CUDA_MAJOR}" && \
./contrib/tomlutil.py --wheel-name $PKG_NAME pyproject.toml && \
mkdir build && \
meson setup build/ --prefix=/opt/nvidia/nvda_nixl --buildtype=release \
-Dcudapath_lib="/usr/local/cuda/lib64" \
-Dcudapath_inc="/usr/local/cuda/include" \
-Ducx_path="/usr/local/ucx" \
-Dlibfabric_path="/usr/local/libfabric" && \
cd build && \
ninja && \
ninja install && \
/tmp/use-sccache.sh show-stats "NIXL"
ENV NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib64 \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib64/plugins \
NIXL_PREFIX=/opt/nvidia/nvda_nixl
ENV LD_LIBRARY_PATH=${NIXL_LIB_DIR}:${NIXL_PLUGIN_DIR}:/usr/local/ucx/lib:/usr/local/ucx/lib/ucx:${LD_LIBRARY_PATH}
RUN echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \
echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \
ldconfig
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
--mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv && \
export SCCACHE_S3_KEY_PREFIX="${SCCACHE_S3_KEY_PREFIX:-${ARCH}}" && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CUDA_COMPILER_LAUNCHER="sccache"; \
fi && \
cd /workspace/nixl && \
uv build . --wheel --out-dir /opt/dynamo/dist/nixl --python $PYTHON_VERSION
# Copy source code (order matters for layer caching)
COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/
COPY launch/ /opt/dynamo/launch/
COPY lib/ /opt/dynamo/lib/
COPY components/ /opt/dynamo/components/
# Build dynamo wheels. The caches do not need the "shared" lock because Cargo has its own locking mechanism.
ARG ENABLE_KVBM
RUN --mount=type=secret,id=aws-key-id,env=AWS_ACCESS_KEY_ID \
--mount=type=secret,id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY \
--mount=type=cache,target=/root/.cargo/registry \
--mount=type=cache,target=/root/.cargo/git \
--mount=type=cache,target=/root/.cache/uv \
export UV_CACHE_DIR=/root/.cache/uv && \
export SCCACHE_S3_KEY_PREFIX=${SCCACHE_S3_KEY_PREFIX:-${ARCH}} && \
if [ "$USE_SCCACHE" = "true" ]; then \
export CMAKE_C_COMPILER_LAUNCHER="sccache" && \
export CMAKE_CXX_COMPILER_LAUNCHER="sccache" && \
export RUSTC_WRAPPER="sccache"; \
fi && \
source ${VIRTUAL_ENV}/bin/activate && \
cd /opt/dynamo && \
uv build --wheel --out-dir /opt/dynamo/dist && \
cd /opt/dynamo/lib/bindings/python && \
FEATURES=""; \
if [ "$ENABLE_MEDIA_NIXL" = "true" ]; then \
FEATURES="$FEATURES dynamo-llm/media-nixl"; \
fi; \
if [ "$ENABLE_MEDIA_FFMPEG" = "true" ]; then \
FEATURES="$FEATURES media-ffmpeg"; \
fi; \
if [ -n "$FEATURES" ]; then \
maturin build --release --features "$FEATURES" --out /opt/dynamo/dist; \
else \
maturin build --release --out /opt/dynamo/dist; \
fi && \
if [ "$ENABLE_KVBM" = "true" ]; then \
cd /opt/dynamo/lib/bindings/kvbm && \
maturin build --release --out target/wheels && \
auditwheel repair \
--exclude libnixl.so \
--exclude libnixl_build.so \
--exclude libnixl_common.so \
--exclude 'lib*.so*' \
--plat manylinux_2_28_${ARCH_ALT} \
--wheel-dir /opt/dynamo/dist \
target/wheels/*.whl; \
fi && \
/tmp/use-sccache.sh show-stats "Dynamo"
# Build gpu_memory_service wheel (C++ extension only needs Python headers, no CUDA/torch)
ARG ENABLE_GPU_MEMORY_SERVICE
RUN if [ "$ENABLE_GPU_MEMORY_SERVICE" = "true" ]; then \
source ${VIRTUAL_ENV}/bin/activate && \
uv build --wheel --out-dir /opt/dynamo/dist /opt/dynamo/lib/gpu_memory_service; \
fi
##############################################
########## Runtime image ##############
##############################################
FROM base AS runtime
ARG ARCH_ALT
ARG PYTHON_VERSION
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache /opt/dynamo \
# Non-recursive chown - only the directories themselves, not contents
&& chown dynamo:0 /home/dynamo /home/dynamo/.cache /opt/dynamo /workspace \
# No chmod needed: umask 002 handles new files, COPY --chmod handles copied content
# Set umask globally for all subsequent RUN commands (must be done as root before USER dynamo)
# NOTE: Setting ENV UMASK=002 does NOT work - umask is a shell builtin, not an environment variable
&& mkdir -p /etc/profile.d && echo 'umask 002' > /etc/profile.d/00-umask.sh
# NIXL environment variables
ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl \
NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \
NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins \
CARGO_TARGET_DIR=/opt/dynamo/target
# Copy ucx and nixl libs
COPY --chown=dynamo: --from=wheel_builder /usr/local/ucx/ /usr/local/ucx/
COPY --chown=dynamo: --from=wheel_builder ${NIXL_PREFIX}/ ${NIXL_PREFIX}/
COPY --chown=dynamo: --from=wheel_builder /opt/nvidia/nvda_nixl/lib64/. ${NIXL_LIB_DIR}/
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/nixl/ /opt/dynamo/wheelhouse/nixl/
COPY --chown=dynamo: --from=wheel_builder /workspace/nixl/build/src/bindings/python/nixl-meta/nixl-*.whl /opt/dynamo/wheelhouse/nixl/
# Copy ffmpeg
RUN --mount=type=bind,from=wheel_builder,source=/usr/local/,target=/tmp/usr/local/ \
cp -rnL /tmp/usr/local/include/libav* /tmp/usr/local/include/libsw* /usr/local/include/; \
cp -nL /tmp/usr/local/lib/libav*.so /tmp/usr/local/lib/libsw*.so /usr/local/lib/; \
cp -nL /tmp/usr/local/lib/pkgconfig/libav*.pc /tmp/usr/local/lib/pkgconfig/libsw*.pc /usr/lib/pkgconfig/; \
cp -r /tmp/usr/local/src/ffmpeg /usr/local/src/; \
true # in case ffmpeg not enabled
# Copy built artifacts
COPY --chown=dynamo: --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR
COPY --chown=dynamo: --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/
# Install Python for framework=none runtime (cuda-dl-base doesn't include Python)
# This is needed to create venv and install dynamo packages
ARG PYTHON_VERSION
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-venv && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python3
# Switch to dynamo user and create virtual environment
USER dynamo
ENV HOME=/home/dynamo
# Create and activate virtual environment
# Use login shell to pick up umask 002 from /etc/profile.d/00-umask.sh for group-writable files
SHELL ["/bin/bash", "-l", "-o", "pipefail", "-c"]
# Cache uv downloads; uv handles its own locking for the cache.
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv venv /opt/dynamo/venv --python ${PYTHON_VERSION}
ENV VIRTUAL_ENV=/opt/dynamo/venv \
PATH="/opt/dynamo/venv/bin:${PATH}"
# Install dynamo wheels (runtime packages only, no test dependencies)
# uv handles its own locking for the cache, no need to add sharing=locked
ARG ENABLE_KVBM
ARG ENABLE_GPU_MEMORY_SERVICE
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl && \
if [ "$ENABLE_GPU_MEMORY_SERVICE" = "true" ]; then \
GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
if [ -z "$GMS_WHEEL" ]; then \
echo "ERROR: ENABLE_GPU_MEMORY_SERVICE is true but no gpu_memory_service wheel found in wheelhouse" >&2; \
exit 1; \
fi; \
uv pip install "$GMS_WHEEL"; \
fi && \
if [ "$ENABLE_KVBM" = "true" ]; then \
KVBM_WHEEL=$(ls /opt/dynamo/wheelhouse/kvbm*.whl 2>/dev/null | head -1); \
if [ -z "$KVBM_WHEEL" ]; then \
echo "ERROR: ENABLE_KVBM is true but no KVBM wheel found in wheelhouse" >&2; \
exit 1; \
fi; \
uv pip install "$KVBM_WHEEL"; \
fi
ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
##############################################
########## Frontend entrypoint image #########
##############################################
FROM ${EPP_IMAGE} AS epp
FROM nvcr.io/nvidia/base/ubuntu:noble-20250619 AS frontend
ARG PYTHON_VERSION
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt-get update -y \
&& apt-get install -y --no-install-recommends \
# required for EPP
ca-certificates \
libstdc++6 \
# required for verification of GPG keys
gnupg2 \
# required for installing dependencies from git repositories
git \
git-lfs \
# Python runtime - required for virtual environment to work
python${PYTHON_VERSION}-dev \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Create dynamo user with group 0 for OpenShift compatibility
RUN userdel -r ubuntu > /dev/null 2>&1 || true \
&& useradd -m -s /bin/bash -g 0 dynamo \
&& [ `id -u dynamo` -eq 1000 ] \
&& mkdir -p /home/dynamo/.cache /opt/dynamo /workspace \
&& chown -R dynamo: /opt/dynamo /home/dynamo/.cache /workspace \
&& chmod -R g+w /opt/dynamo /home/dynamo/.cache /workspace
# Set HOME so ModelExpress can find the cache directory
ENV HOME=/home/dynamo
# Switch to dynamo user
USER dynamo
ENV DYNAMO_HOME=/opt/dynamo
WORKDIR /
COPY --chown=dynamo: --from=epp /epp /epp
COPY --chown=dynamo: container/launch_message/frontend.txt /opt/dynamo/.launch_screen
# Copy tests, benchmarks, deploy and components with correct ownership
COPY --chown=dynamo: tests /workspace/tests
COPY --chown=dynamo: examples /workspace/examples
COPY --chown=dynamo: benchmarks /workspace/benchmarks
COPY --chown=dynamo: deploy /workspace/deploy
COPY --chown=dynamo: components/ /workspace/components/
COPY --chown=dynamo: recipes/ /workspace/recipes/
# Copy attribution files with correct ownership
COPY --chown=dynamo: ATTRIBUTION* LICENSE /workspace/
ENV VIRTUAL_ENV=/opt/dynamo/venv
ENV PATH="/opt/dynamo/venv/bin:$PATH"
# Copy uv and wheelhouse from runtime stage
COPY --chown=dynamo: --from=runtime /bin/uv /bin/uvx /bin/
COPY --chown=dynamo: --from=runtime /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/
# Create virtual environment
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
mkdir -p /opt/dynamo/venv && \
uv venv /opt/dynamo/venv --python $PYTHON_VERSION
# Install common and test dependencies. In an ideal world, we'd use a mirror of PyPI for much more reliable downloads.
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
--mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \
--mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install \
--requirement /tmp/requirements.txt \
--requirement /tmp/requirements.test.txt
ARG ENABLE_KVBM
ARG ENABLE_GPU_MEMORY_SERVICE
# In an ideal world, we'd use a mirror of PyPI for much more reliable downloads.
RUN --mount=type=cache,target=/home/dynamo/.cache/uv,uid=1000,gid=0,mode=0775 \
export UV_CACHE_DIR=/home/dynamo/.cache/uv && \
uv pip install \
/opt/dynamo/wheelhouse/ai_dynamo_runtime*.whl \
/opt/dynamo/wheelhouse/ai_dynamo*any.whl \
/opt/dynamo/wheelhouse/nixl/nixl*.whl && \
if [ "$ENABLE_GPU_MEMORY_SERVICE" = "true" ]; then \
GMS_WHEEL=$(ls /opt/dynamo/wheelhouse/gpu_memory_service*.whl 2>/dev/null | head -1); \
if [ -z "$GMS_WHEEL" ]; then \
echo "ERROR: ENABLE_GPU_MEMORY_SERVICE is true but no gpu_memory_service wheel found in wheelhouse" >&2; \
exit 1; \
fi; \
uv pip install "$GMS_WHEEL"; \
fi && \
if [ "$ENABLE_KVBM" = "true" ]; then \
KVBM_WHEEL=$(ls /opt/dynamo/wheelhouse/kvbm*.whl 2>/dev/null | head -1); \
if [ -z "$KVBM_WHEEL" ]; then \
echo "ERROR: ENABLE_KVBM is true but no KVBM wheel found in wheelhouse" >&2; \
exit 1; \
fi; \
uv pip install "$KVBM_WHEEL"; \
fi && \
cd /workspace/benchmarks && \
export UV_GIT_LFS=1 UV_HTTP_TIMEOUT=300 UV_HTTP_RETRIES=5 && \
uv pip install .
# Setup environment for all users
USER root
RUN chmod 755 /opt/dynamo/.launch_screen && \
echo 'source /opt/dynamo/venv/bin/activate' >> /etc/bash.bashrc && \
echo 'cat /opt/dynamo/.launch_screen' >> /etc/bash.bashrc
USER dynamo
ENTRYPOINT ["/epp"]
CMD ["/bin/bash"]
# syntax=docker/dockerfile:1.10.0-labs
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
{% include "templates/args.Dockerfile" %}
# --- Base Image Stages
{% if framework != "dynamo" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% include "templates/wheel_builder.Dockerfile" %}
{% elif framework == "dynamo" %}
{% if target == "frontend" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% include "templates/wheel_builder.Dockerfile" %}
{% include "templates/dynamo_runtime.Dockerfile" %}
{% include "templates/frontend.Dockerfile" %}
{% elif target == "runtime" or target == "dev" or target == "local-dev" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% include "templates/wheel_builder.Dockerfile" %}
{% include "templates/dynamo_runtime.Dockerfile" %}
{% elif target == "wheel_builder" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% include "templates/wheel_builder.Dockerfile" %}
{% elif target == "base" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% endif %}
{% endif %}
# --- Framework Stages
{% if framework == "sglang" %}
# SGLang is the only framework without a `framework` target currently, needs special treatment
{% include "templates/sglang_runtime.Dockerfile" %}
{% elif target == "framework" and framework != "dynamo" %}
{% include "templates/" ~ framework ~ "_framework.Dockerfile" %}
{% elif ( target == "runtime" or target == "dev" or target == "local-dev" ) and framework != "dynamo" %}
{% include "templates/" ~ framework ~ "_framework.Dockerfile" %}
{% include "templates/" ~ framework ~ "_runtime.Dockerfile" %}
{% else %}
# --- No framework stages included
{% endif %}
{% if make_efa == true and target == "runtime" %}
{% include "templates/aws.Dockerfile" %}
{% endif %}
# --- Development Stages
{% if target == "dev" or target == "local-dev" %}
{% include "templates/dev.Dockerfile" %}
{% if make_efa == true %}
{% include "templates/aws.Dockerfile" %}
{% endif %}
{% if target == "local-dev" %}
{% include "templates/local_dev.Dockerfile" %}
{% endif %}
{% else %}
# --- No development stages included
{% endif %}
...@@ -6,22 +6,21 @@ The NVIDIA Dynamo project uses containerized development and deployment to maint ...@@ -6,22 +6,21 @@ The NVIDIA Dynamo project uses containerized development and deployment to maint
### Core Components ### Core Components
- **`build.sh`** - A Docker image builder that creates containers for different AI inference frameworks (vLLM, TensorRT-LLM, SGLang). It handles framework-specific dependencies, multi-stage builds, and development vs production configurations. - **`render.py`** - A render script used to generate Dockerfiles for AI inference frameworks (vLLM, TensorRT-LLM, SGLang). The generated Dockerfile includes the needed multi-stage steps for development vs production configurations
- **`run.sh`** - A container runtime manager that launches Docker containers with proper GPU access, volume mounts, and environment configurations. It supports different development workflows from root-based legacy setups to user-based development environments. - **`run.sh`** - A container runtime manager that launches Docker containers with proper GPU access, volume mounts, and environment configurations. It supports different development workflows from root-based legacy setups to user-based development environments.
- **Multiple Dockerfiles** - Framework-specific Dockerfiles that define the container images: ## Rendering Requirements:
- `Dockerfile.vllm` - For vLLM inference backend - Python
- `Dockerfile.trtllm` - For TensorRT-LLM inference backend - Python Packages:
- `Dockerfile.sglang` - For SGLang inference backend - pyyaml
- `Dockerfile` - Base/standalone configuration - jinja2
- `Dockerfile.epp` - For building the Endpoint Picker (EPP) image
### Stage Summary for Frameworks ### Stage Summary for Frameworks
<details> <details>
<summary>Show Stage Summary Table</summary> <summary>Show Stage Summary Table</summary>
Dockerfile.${FRAMEWORK} General Structure Dockerfile General Structure
Below is a summary of the general file structure for the framework Dockerfile stages. Some exceptions exist. Below is a summary of the general file structure for the framework Dockerfile stages. Some exceptions exist.
...@@ -80,14 +79,13 @@ The scripts in this directory abstract away the complexity of Docker commands wh ...@@ -80,14 +79,13 @@ The scripts in this directory abstract away the complexity of Docker commands wh
### Convenience Scripts vs Direct Docker Commands ### Convenience Scripts vs Direct Docker Commands
The `build.sh` and `run.sh` scripts are convenience wrappers that simplify common Docker operations. They automatically handle: The `run.sh` script and rendering scripts are convenience that simplify common Docker operations. They automatically handle:
- Framework-specific image selection and tagging
- GPU access configuration and runtime selection - GPU access configuration and runtime selection
- Volume mount setup for development workflows - Volume mount setup for development workflows
- Environment variable management - Environment variable management
- Build argument construction for multi-stage builds - Build argument construction for multi-stage builds
**You can always use Docker commands directly** if you prefer more control or want to customize beyond what the scripts provide. The scripts use `--dry-run` flags to show you the exact Docker commands they would execute, making it easy to understand and modify the underlying operations. **You can always use Docker commands directly** if you prefer more control or want to customize beyond what the scripts provide. The `run.sh` uses a `--dry-run` flag to show you the exact commands they would execute, making it easy to understand and modify the underlying operations.
## Development Targets Feature Matrix ## Development Targets Feature Matrix
...@@ -117,10 +115,11 @@ The `build.sh` and `run.sh` scripts are convenience wrappers that simplify commo ...@@ -117,10 +115,11 @@ The `build.sh` and `run.sh` scripts are convenience wrappers that simplify commo
### 1. runtime target (runs as non-root dynamo user): ### 1. runtime target (runs as non-root dynamo user):
```bash ```bash
# Build runtime image # Build runtime image
./build.sh --framework vllm --target runtime python container/render.py --framework vllm --target runtime --short-output
docker build -t dynamo:latest-vllm-runtime -f rendered.Dockerfile .
# Run runtime container # Run runtime container
./run.sh --image dynamo:latest-vllm-runtime -it container/run.sh --image dynamo:latest-vllm-runtime -it
``` ```
### 2. local-dev + `run.sh` (runs as dynamo user with matched host UID/GID): ### 2. local-dev + `run.sh` (runs as dynamo user with matched host UID/GID):
...@@ -133,20 +132,20 @@ Use VS Code/Cursor Dev Container Extension with devcontainer.json configuration. ...@@ -133,20 +132,20 @@ Use VS Code/Cursor Dev Container Extension with devcontainer.json configuration.
## Build and Run Scripts Overview ## Build and Run Scripts Overview
### build.sh - Docker Image Builder ### render.py - Docker Image Generator
The `build.sh` script is responsible for building Docker images for different AI inference frameworks. It supports multiple frameworks and configurations: The `render.py` script is responsible for generating Dockerfiles for different AI inference frameworks. It supports multiple frameworks and configurations:
**Purpose:** **Purpose:**
- Builds Docker images for NVIDIA Dynamo with support for vLLM, TensorRT-LLM, SGLang, or standalone configurations - Generates Dockerfiles for NVIDIA Dynamo with support for vLLM, TensorRT-LLM, SGLang, or standalone configurations
- Handles framework-specific dependencies and optimizations - Handles framework-specific dependencies and optimizations
- Manages build contexts, caching, and multi-stage builds - Manages build contexts, caching, and multi-stage builds
- Configures development vs production targets - Configures development vs production targets
**Key Features:** **Key Features:**
- **Framework Support**: vLLM (default when --framework not specified), TensorRT-LLM, SGLang, or NONE - **Framework Support**: vLLM (default when --framework not specified), TensorRT-LLM, SGLang, or NONE (standalone Dynamo)
- **Multi-stage Builds**: Build process with base images - **Multi-stage Builds**: Build process with base images
- **Development Targets**: Supports `dev`, `runtime`, and `local-dev` targets via `build.sh`. - **Development Targets**: Supports `dev`, `runtime`, and `local-dev` targets via `render.py`.
- **Build Caching**: Docker layer caching and sccache support - **Build Caching**: Docker layer caching and sccache support
- **GPU Optimization**: CUDA, EFA, and NIXL support - **GPU Optimization**: CUDA, EFA, and NIXL support
...@@ -221,52 +220,49 @@ Current cache types (as mounted in various Dockerfiles): ...@@ -221,52 +220,49 @@ Current cache types (as mounted in various Dockerfiles):
Note: `uv` commands set `UV_CACHE_DIR` per `RUN` so `uv` always uses the same path as the cache mount (instead of relying on `$HOME`). Note: `uv` commands set `UV_CACHE_DIR` per `RUN` so `uv` always uses the same path as the cache mount (instead of relying on `$HOME`).
**How `dev` / `local-dev` builds work:**
- `dev` and `local-dev` targets are defined in `container/dev/Dockerfile.dev`.
- The framework Dockerfiles (`Dockerfile.vllm`, `Dockerfile.trtllm`, `Dockerfile.sglang`, `Dockerfile`) define shared stages used by `Dockerfile.dev` (e.g. `runtime`, `dynamo_base`, `wheel_builder`).
- To build a single coherent Dockerfile, `build.sh` generates a temporary Dockerfile that is a literal concatenation of:
- the selected framework Dockerfile, then
- `container/dev/Dockerfile.dev`
`build.sh` then continues building normally using the temp Dockerfile path.
**Requirements and debugging:**
- By default the temp Dockerfile is deleted at the end of `build.sh`. To keep it for inspection, set `KEEP_DEV_DOCKERFILE_TEMP=1`.
> **💡 Tip**: The `dev` and `local-dev` images have source code baked in, but **using `--mount-workspace` with `run.sh` is recommended for development** to bind mount your local workspace for live editing. > **💡 Tip**: The `dev` and `local-dev` images have source code baked in, but **using `--mount-workspace` with `run.sh` is recommended for development** to bind mount your local workspace for live editing.
**Common Usage Examples:** **Common Usage Examples:**
```bash ```bash
# Build vLLM dev image called dynamo:latest-vllm (default). This runs as root and is for development. # Build vLLM dev image called dynamo:latest-vllm (default). This runs as root and is for development.
./build.sh python container/render.py --framework=vllm --target=dev --short-output
docker build -t dynamo:latest-vllm-dev -f rendered.Dockerfile .
# Build a local-dev image. The local-dev image will run as `dynamo` with UID/GID matched to your host user, # Build a local-dev image. The local-dev image will run as `dynamo` with UID/GID matched to your host user,
# which is useful when mounting partitions for development. # which is useful when mounting partitions for development.
./build.sh --framework vllm --target local-dev python container/render.py --framework=vllm --target=local-dev --short-output
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
# Build TensorRT-LLM development image called dynamo:latest-trtllm # Build TensorRT-LLM development image called dynamo:latest-trtllm
./build.sh --framework trtllm python container/render.py --framework=trtllm --target=runtime --short-output
docker build -t dynamo:latest-trtllm-runtime -f rendered.Dockerfile .
# Build with custom tag
./build.sh --framework sglang --tag my-custom-tag
# Dry run to see commands
./build.sh --dry-run
# Build with no cache
./build.sh --no-cache
# Build with build arguments
./build.sh --build-arg CUSTOM_ARG=value
``` ```
### Building the Frontend Image ### Building the Frontend Image
The frontend image is a specialized container that includes the Dynamo components (Dynamo, NIXL, etc) along with the Endpoint Picker (EPP) for Kubernetes Gateway API Inference Extension integration. This image is primarily used for inference gateway deployments. The frontend image is a specialized container that includes the Dynamo components (Dynamo, NIXL, etc) along with the Endpoint Picker (EPP) for Kubernetes Gateway API Inference Extension integration. This image is primarily used for inference gateway deployments.
**Build EPP Image**
```bash
sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
. "$HOME/.cargo/env"
cargo install cbindgen
pushd deploy/inference-gateway/epp
make all
popd
EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
EPP_IMAGE="dynamo/dynamo-epp:${EPP_GIT_TAG}"
```
**Build Frontend Image**
```bash ```bash
# Build the frontend image (automatically builds EPP image as a dependency) # Build the frontend image (automatically builds EPP image as a dependency)
./build.sh --framework none --target frontend python container/render.py --framework=dynamo --target=frontend --short-output
docker build -t dynamo:frontend --build-arg EPP_IMAGE=${EPP_IMAGE} -f rendered.Dockerfile .
``` ```
The build process automatically: The build process automatically:
...@@ -313,34 +309,34 @@ The `run.sh` script launches Docker containers with the appropriate configuratio ...@@ -313,34 +309,34 @@ The `run.sh` script launches Docker containers with the appropriate configuratio
```bash ```bash
# Basic container launch with dev image (runs as root by default, non-interactive) # Basic container launch with dev image (runs as root by default, non-interactive)
./run.sh --image dynamo:latest-vllm -v $HOME/.cache:/root/.cache container/run.sh --image dynamo:latest-vllm -v $HOME/.cache:/root/.cache
# Interactive development with workspace mounted using dev image (runs as root) # Interactive development with workspace mounted using dev image (runs as root)
./run.sh --image dynamo:latest-vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Interactive development with local-dev image (runs as dynamo user with matched host UID/GID) # Interactive development with local-dev image (runs as dynamo user with matched host UID/GID)
./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Use specific image and framework for development # Use specific image and framework for development
./run.sh --image v0.1.0.dev.08cc44965-vllm-local-dev --framework vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image v0.1.0.dev.08cc44965-vllm-local-dev --framework vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Interactive development shell with workspace mounted (local-dev) # Interactive development shell with workspace mounted (local-dev)
./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it -- bash container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it -- bash
# Development with custom environment variables # Development with custom environment variables
./run.sh --image dynamo:latest-vllm-local-dev -e CUDA_VISIBLE_DEVICES=0,1 --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-local-dev -e CUDA_VISIBLE_DEVICES=0,1 --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Dry run to see docker command # Dry run to see docker command
./run.sh --dry-run container/run.sh --dry-run
# Development with custom volume mounts # Development with custom volume mounts
./run.sh --image dynamo:latest-vllm-local-dev -v /host/path:/container/path --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-local-dev -v /host/path:/container/path --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Run runtime image as non-root dynamo user (for production) # Run runtime image as non-root dynamo user (for production)
./run.sh --image dynamo:latest-vllm-runtime -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-runtime -v $HOME/.cache:/home/dynamo/.cache
# Run dev image as specific user (override default root) # Run dev image as specific user (override default root)
./run.sh --image dynamo:latest-vllm --user dynamo -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm --user dynamo -v $HOME/.cache:/home/dynamo/.cache
``` ```
### Network Configuration Options ### Network Configuration Options
...@@ -350,8 +346,8 @@ The `run.sh` script supports different networking modes via the `--network` flag ...@@ -350,8 +346,8 @@ The `run.sh` script supports different networking modes via the `--network` flag
#### Host Networking (Default) #### Host Networking (Default)
```bash ```bash
# Examples with dynamo user # Examples with dynamo user
./run.sh --image dynamo:latest-vllm-local-dev --network host -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-local-dev --network host -v $HOME/.cache:/home/dynamo/.cache
./run.sh --image dynamo:latest-vllm-local-dev -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-local-dev -v $HOME/.cache:/home/dynamo/.cache
``` ```
**Use cases:** **Use cases:**
- High-performance ML inference (default for GPU workloads) - High-performance ML inference (default for GPU workloads)
...@@ -364,7 +360,7 @@ The `run.sh` script supports different networking modes via the `--network` flag ...@@ -364,7 +360,7 @@ The `run.sh` script supports different networking modes via the `--network` flag
#### Bridge Networking (Isolated) #### Bridge Networking (Isolated)
```bash ```bash
# CI/testing with isolated bridge networking and host cache sharing (no -it for automated CI) # CI/testing with isolated bridge networking and host cache sharing (no -it for automated CI)
./run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache
``` ```
**Use cases:** **Use cases:**
- Secure isolation from host network - Secure isolation from host network
...@@ -377,10 +373,10 @@ The `run.sh` script supports different networking modes via the `--network` flag ...@@ -377,10 +373,10 @@ The `run.sh` script supports different networking modes via the `--network` flag
#### No Networking ⚠️ **LIMITED FUNCTIONALITY** #### No Networking ⚠️ **LIMITED FUNCTIONALITY**
```bash ```bash
# Complete network isolation - no external connectivity # Complete network isolation - no external connectivity
./run.sh --image dynamo:latest-vllm --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Same with local-dev image (dynamo user with matched host UID/GID) # Same with local-dev image (dynamo user with matched host UID/GID)
./run.sh --image dynamo:latest-vllm-local-dev --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-local-dev --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
``` ```
**⚠️ WARNING: `--network none` severely limits Dynamo functionality:** **⚠️ WARNING: `--network none` severely limits Dynamo functionality:**
- **No model downloads** - HuggingFace models cannot be downloaded - **No model downloads** - HuggingFace models cannot be downloaded
...@@ -427,11 +423,12 @@ See Docker documentation for custom network creation and management. ...@@ -427,11 +423,12 @@ See Docker documentation for custom network creation and management.
### Development Workflow ### Development Workflow
```bash ```bash
# 1. Build local-dev image (builds runtime, then dev as intermediate, then local-dev as final image) # 1. Build local-dev image (builds runtime, then dev as intermediate, then local-dev as final image)
./build.sh --framework vllm --target local-dev python container/render.py --framework=vllm --target=local-dev --short-output
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
# 2. Run development container using the local-dev image # 2. Run development container using the local-dev image
# RECOMMENDED: --mount-workspace for live editing in dev and local-dev images # RECOMMENDED: --mount-workspace for live editing in dev and local-dev images
./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it
# 3. Inside container, run inference (requires both frontend and backend) # 3. Inside container, run inference (requires both frontend and backend)
# Start frontend # Start frontend
...@@ -444,19 +441,21 @@ python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.20 & ...@@ -444,19 +441,21 @@ python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.20 &
### Production Workflow ### Production Workflow
```bash ```bash
# 1. Build production runtime image (runs as non-root dynamo user) # 1. Build production runtime image (runs as non-root dynamo user)
./build.sh --framework vllm --target runtime python container/render.py --framework=vllm --target=runtime --short-output
docker build -t dynamo:latest-vllm-runtime -f rendered.Dockerfile .
# 2. Run production container as non-root dynamo user # 2. Run production container as non-root dynamo user
./run.sh --image dynamo:latest-vllm-runtime --gpus all -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-runtime --gpus all -v $HOME/.cache:/home/dynamo/.cache
``` ```
### Testing Workflow ### Testing Workflow
```bash ```bash
# 1. Build dev image # 1. Build dev image
./build.sh --framework vllm --no-cache python container/render.py --framework=vllm --target=dev --short-output
docker build -t dynamo:latest-vllm-dev -f rendered.Dockerfile .
# 2. Run tests with network isolation for reproducible results (no -it needed for CI) # 2. Run tests with network isolation for reproducible results (no -it needed for CI)
./run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache -- python -m pytest tests/ container/run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache -- python -m pytest tests/
# 3. Inside the container with bridge networking, start services # 3. Inside the container with bridge networking, start services
# Note: Services are only accessible from the same container - no port conflicts with host # Note: Services are only accessible from the same container - no port conflicts with host
......
#!/usr/bin/env bash
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ "${BASH_VERSINFO[0]}" -lt 4 ]; then
echo "Error: Bash version 4.0 or higher is required. Current version: ${BASH_VERSINFO[0]}.${BASH_VERSINFO[1]}"
exit 1
fi
set -e
TAG=
PRIMARY_TAG=
RUN_PREFIX=
PLATFORM=linux/amd64
# Get short commit hash
commit_id=${commit_id:-$(git rev-parse --short HEAD)}
# if COMMIT_ID matches a TAG use that
current_tag=${current_tag:-$(git describe --tags --exact-match 2>/dev/null | sed 's/^v//' || true)}
# Get latest version from release branches or tags
# Strategy:
# 1. Check for release/X.Y.Z branches (most reliable for development)
# 2. Fall back to git tags, excluding test-rc tags
# 3. Default to 0.0.1 if nothing found
# Try to find the latest release branch first
latest_release_branch=$(git branch -r 2>/dev/null | grep -E 'origin/release/[0-9]+\.[0-9]+\.[0-9]+$' | sed 's|.*/||' | sort -V | tail -1 || true)
if [[ -n ${latest_release_branch} ]]; then
latest_tag=${latest_tag:-$latest_release_branch}
echo "INFO: Using version from latest release branch: ${latest_tag}"
else
# Fall back to tags, excluding test-rc tags
latest_tag=${latest_tag:-$(git tag -l 'v*' --sort=-version:refname | grep -v 'test-rc' | head -1 | sed 's/^v//' || true)}
fi
if [[ -z ${latest_tag} ]]; then
latest_tag="0.0.1"
echo "No git release tag or branch found, setting to unknown version: ${latest_tag}"
fi
# Use tag if available, otherwise use latest_tag.dev.commit_id
VERSION=v${current_tag:-$latest_tag.dev.$commit_id}
PYTHON_PACKAGE_VERSION=${current_tag:-$latest_tag.dev+$commit_id}
# Frameworks
#
# Each framework has a corresponding base image. Additional
# dependencies are specified in the /container/deps folder and
# installed within framework specific sections of the Dockerfile.
declare -A FRAMEWORKS=(["VLLM"]=1 ["TRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
DEFAULT_FRAMEWORK=VLLM
SOURCE_DIR=$(dirname "$(readlink -f "$0")")
DOCKERFILE=${SOURCE_DIR}/Dockerfile
BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")
# Base Images
TRTLLM_BASE_IMAGE=nvcr.io/nvidia/pytorch
TRTLLM_BASE_IMAGE_TAG=25.12-py3
# Important Note: Because of ABI compatibility issues between TensorRT-LLM and NGC PyTorch,
# we need to build the TensorRT-LLM wheel from source.
#
# There are two ways to build the dynamo image with TensorRT-LLM.
# 1. Use the local TensorRT-LLM wheel directory.
# 2. Use the TensorRT-LLM wheel on artifactory.
#
# If using option 1, the TENSORRTLLM_PIP_WHEEL_DIR must be a path to a directory
# containing TensorRT-LLM wheel file along with commit.txt file with the
# <arch>_<commit ID> as contents. If no valid trtllm wheel is found, the script
# will attempt to build the wheel from source and store the built wheel in the
# specified directory. TRTLLM_COMMIT from the TensorRT-LLM main branch will be
# used to build the wheel.
#
# If using option 2, the TENSORRTLLM_PIP_WHEEL must be the TensorRT-LLM wheel
# package that will be installed from the specified TensorRT-LLM PyPI Index URL.
# This option will ignore the TRTLLM_COMMIT option. As the TensorRT-LLM wheel from PyPI
# is not ABI compatible with NGC PyTorch, you can use TENSORRTLLM_INDEX_URL to specify
# a private PyPI index URL which has your pre-built TensorRT-LLM wheel.
#
# By default, we will use option 1. If you want to use option 2, you can set
# TENSORRTLLM_PIP_WHEEL to the TensorRT-LLM wheel on artifactory.
#
DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
# TensorRT-LLM commit to use for building the trtllm wheel if not provided.
# Important Note: This commit is not used in our CI pipeline. See the CI
# variables to learn how to run a pipeline with a specific commit.
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="45d7022cc33903509fd8045bbc577d77dd1d3e2f" # 1.3.0rc1
TRTLLM_COMMIT=""
TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0"
TRTLLM_GIT_URL=""
# TensorRT-LLM PyPI index URL
DEFAULT_TENSORRTLLM_INDEX_URL="https://pypi.nvidia.com/"
# TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
# Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.3.0rc1"
# TensorRT-LLM wheels on PyPI might not be compatible with the NGC PyTorch.
# For incompatible versions, we install the wheel from the NGC image during the Docker build.
# The following versions are not ABI compatible with the NGC PyTorch.
TRTLLM_ABI_INCOMPATIBLE_VERSIONS=("1.3.0rc1")
TENSORRTLLM_PIP_WHEEL=""
TRTLLM_WHEEL_IMAGE=""
VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# FIXME: OPS-612 NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
VLLM_BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
VLLM_BASE_IMAGE_TAG_CU13="25.11-cuda13.0-devel-ubuntu24.04"
VLLM_RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
VLLM_RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04"
VLLM_RUNTIME_IMAGE_TAG_CU13="13.0.2-runtime-ubuntu24.04"
NONE_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
SGLANG_BASE_IMAGE_TAG_CU13="25.11-cuda13.0-devel-ubuntu24.04"
SGLANG_CUDA_VERSION="12.9.1"
SGLANG_CUDA_VERSION_CU13="13.0.1"
SGLANG_RUNTIME_IMAGE_TAG_CU13="v0.5.8-cu130-runtime"
PYTHON_VERSION="3.12"
NIXL_REF=0.9.0
NIXL_UCX_REF=v1.20.0
NIXL_GDRCOPY_REF=v2.5.1
NIXL_LIBFABRIC_REF=v2.3.0
# AWS EFA installer version
EFA_VERSION=1.45.1
NO_CACHE=""
NO_LOAD=""
PUSH=""
# KVBM (KV Cache Block Manager) - default disabled, enabled automatically for VLLM/TRTLLM
# or can be explicitly enabled via --enable-kvbm flag
ENABLE_KVBM=false
# GPU Memory Service - default disabled, enabled automatically for VLLM/SGLANG
# or can be explicitly enabled via --enable-gpu-memory-service flag
ENABLE_GPU_MEMORY_SERVICE=false
# sccache configuration for S3
USE_SCCACHE=""
SCCACHE_BUCKET=""
SCCACHE_REGION=""
get_options() {
while :; do
case $1 in
-h | -\? | --help)
show_help
exit
;;
--platform)
if [ "$2" ]; then
PLATFORM=$2
shift
else
missing_requirement "$1"
fi
;;
--framework)
if [ "$2" ]; then
FRAMEWORK=$2
shift
else
missing_requirement "$1"
fi
;;
--cuda-version)
if [ "$2" ]; then
echo "INFO: Setting CUDA_VERSION to $2"
CUDA_VERSION=$2
BUILD_ARGS+=" --build-arg CUDA_VERSION=$2 "
shift
else
missing_requirement "$1"
fi
;;
--nixl-ref)
if [ "$2" ]; then
NIXL_REF=$2
shift
else
missing_requirement "$1"
fi
;;
--tensorrtllm-pip-wheel-dir)
if [ "$2" ]; then
TENSORRTLLM_PIP_WHEEL_DIR=$2
shift
else
missing_requirement "$1"
fi
;;
--tensorrtllm-commit)
if [ "$2" ]; then
TRTLLM_COMMIT=$2
shift
else
missing_requirement "$1"
fi
;;
--tensorrtllm-pip-wheel)
if [ "$2" ]; then
TENSORRTLLM_PIP_WHEEL=$2
shift
else
missing_requirement "$1"
fi
;;
--tensorrtllm-index-url)
if [ "$2" ]; then
TENSORRTLLM_INDEX_URL=$2
shift
else
missing_requirement "$1"
fi
;;
--tensorrtllm-git-url)
if [ "$2" ]; then
TRTLLM_GIT_URL=$2
shift
else
missing_requirement "$1"
fi
;;
--base-image)
if [ "$2" ]; then
BASE_IMAGE=$2
shift
else
missing_requirement "$1"
fi
;;
--base-image-tag)
if [ "$2" ]; then
BASE_IMAGE_TAG=$2
shift
else
missing_requirement "$1"
fi
;;
--target)
if [ "$2" ]; then
TARGET=$2
shift
else
missing_requirement "$1"
fi
;;
--uid)
if [ "$2" ]; then
CUSTOM_UID=$2
shift
else
missing_requirement "$1"
fi
;;
--gid)
if [ "$2" ]; then
CUSTOM_GID=$2
shift
else
missing_requirement "$1"
fi
;;
--build-arg)
if [ "$2" ]; then
BUILD_ARGS+="--build-arg $2 "
shift
else
missing_requirement "$1"
fi
;;
--tag)
if [ "$2" ]; then
if [ -z "$TAG" ]; then
TAG="--tag $2"
PRIMARY_TAG="$2"
else
TAG+=" --tag $2"
fi
shift
else
missing_requirement "$1"
fi
;;
--dry-run)
RUN_PREFIX="echo"
DRY_RUN="true"
echo ""
echo "=============================="
echo "DRY RUN: COMMANDS PRINTED ONLY"
echo "=============================="
echo ""
;;
--no-cache)
NO_CACHE=" --no-cache"
;;
--no-load)
NO_LOAD=true
;;
--push)
PUSH=" --push"
;;
--cache-from)
if [ "$2" ]; then
CACHE_FROM+="--cache-from $2 "
shift
else
missing_requirement "$1"
fi
;;
--cache-to)
if [ "$2" ]; then
CACHE_TO+="--cache-to $2 "
shift
else
missing_requirement "$1"
fi
;;
--build-context)
if [ "$2" ]; then
BUILD_CONTEXT_ARG="--build-context $2"
shift
else
missing_requirement "$1"
fi
;;
--enable-kvbm)
ENABLE_KVBM=true
;;
--enable-gpu-memory-service)
ENABLE_GPU_MEMORY_SERVICE=true
;;
--enable-media-nixl)
ENABLE_MEDIA_NIXL=true
;;
--enable-media-ffmpeg)
ENABLE_MEDIA_FFMPEG=true
;;
--make-efa)
MAKE_EFA=true
;;
--use-sccache)
USE_SCCACHE=true
;;
--sccache-bucket)
if [ "$2" ]; then
SCCACHE_BUCKET=$2
shift
else
missing_requirement "$1"
fi
;;
--sccache-region)
if [ "$2" ]; then
SCCACHE_REGION=$2
shift
else
missing_requirement "$1"
fi
;;
--vllm-max-jobs)
# Set MAX_JOBS for vLLM compilation (only used by Dockerfile.vllm)
if [ "$2" ]; then
MAX_JOBS=$2
shift
else
missing_requirement "$1"
fi
;;
--efa-version)
if [ "$2" ]; then
EFA_VERSION=$2
shift
else
missing_requirement "$1"
fi
;;
--no-tag-latest)
NO_TAG_LATEST=true
;;
-?*)
error 'ERROR: Unknown option: ' "$1"
;;
?*)
error 'ERROR: Unknown option: ' "$1"
;;
*)
break
;;
esac
shift
done
# Validate that --uid and --gid are only used with local-dev target
if [[ -n "${CUSTOM_UID:-}" || -n "${CUSTOM_GID:-}" ]]; then
if [[ "${TARGET:-}" != "local-dev" && "${TARGET:-}" != "local-dev-aws" ]]; then
error "ERROR: --uid and --gid can only be used with --target local-dev or --target local-dev-aws"
fi
fi
if [ -z "$FRAMEWORK" ]; then
FRAMEWORK=$DEFAULT_FRAMEWORK
fi
if [ -n "$FRAMEWORK" ]; then
FRAMEWORK=${FRAMEWORK^^}
if [[ -z "${FRAMEWORKS[$FRAMEWORK]}" ]]; then
error 'ERROR: Unknown framework: ' "$FRAMEWORK"
fi
if [ -z "$BASE_IMAGE_TAG" ]; then
BASE_IMAGE_TAG=${FRAMEWORK}_BASE_IMAGE_TAG
BASE_IMAGE_TAG=${!BASE_IMAGE_TAG}
echo "INFO: Using default base image tag for $FRAMEWORK: $BASE_IMAGE_TAG"
fi
if [ -z "$BASE_IMAGE" ]; then
BASE_IMAGE=${FRAMEWORK}_BASE_IMAGE
BASE_IMAGE=${!BASE_IMAGE}
fi
if [[ $FRAMEWORK == "VLLM" ]] && [[ $CUDA_VERSION == "13."* ]]; then
BASE_IMAGE_TAG=$VLLM_BASE_IMAGE_TAG_CU13
BUILD_ARGS+=" --build-arg BASE_IMAGE_TAG=${VLLM_BASE_IMAGE_TAG_CU13} "
RUNTIME_IMAGE_TAG=$VLLM_RUNTIME_IMAGE_TAG_CU13
BUILD_ARGS+=" --build-arg RUNTIME_IMAGE_TAG=${VLLM_RUNTIME_IMAGE_TAG_CU13} "
echo "INFO: Overriding base image tag for vLLM with CUDA 13: $BASE_IMAGE_TAG AND RUNTIME_IMAGE_TAG: $RUNTIME_IMAGE_TAG"
fi
if [[ $FRAMEWORK == "SGLANG" ]] && [[ $CUDA_VERSION == "13."* ]]; then
BASE_IMAGE_TAG=$SGLANG_BASE_IMAGE_TAG_CU13
BUILD_ARGS+=" --build-arg BASE_IMAGE_TAG=${SGLANG_BASE_IMAGE_TAG_CU13} "
SGLANG_CUDA_VERSION="${SGLANG_CUDA_VERSION_CU13}"
RUNTIME_IMAGE_TAG="${SGLANG_RUNTIME_IMAGE_TAG_CU13}"
BUILD_ARGS+=" --build-arg RUNTIME_IMAGE_TAG=${RUNTIME_IMAGE_TAG} "
echo "INFO: Overriding base image tag for SGLang with CUDA 13: $BASE_IMAGE_TAG AND RUNTIME_IMAGE_TAG: $RUNTIME_IMAGE_TAG"
fi
if [ -z "$BASE_IMAGE" ]; then
error "ERROR: Framework $FRAMEWORK without BASE_IMAGE"
fi
BASE_VERSION=${FRAMEWORK}_BASE_VERSION
BASE_VERSION=${!BASE_VERSION}
fi
if [ -z "$TAG" ]; then
TAG="--tag dynamo:${VERSION}-${FRAMEWORK,,}"
PRIMARY_TAG="dynamo:${VERSION}-${FRAMEWORK,,}"
if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
TAG="${TAG}-${TARGET}"
PRIMARY_TAG="${PRIMARY_TAG}-${TARGET}"
fi
fi
if [ -n "$PLATFORM" ]; then
PLATFORM="--platform ${PLATFORM}"
fi
if [ -n "$TARGET" ]; then
TARGET_STR="--target ${TARGET}"
else
TARGET_STR="--target dev"
fi
# Validate sccache configuration
if [ "$USE_SCCACHE" = true ]; then
if [ -z "$SCCACHE_BUCKET" ]; then
error "ERROR: --sccache-bucket is required when --use-sccache is specified"
fi
if [ -z "$SCCACHE_REGION" ]; then
error "ERROR: --sccache-region is required when --use-sccache is specified"
fi
fi
}
show_image_options() {
echo ""
echo "Building Dynamo Image: '${TAG}'"
echo ""
echo " Base: '${BASE_IMAGE}'"
echo " Base_Image_Tag: '${BASE_IMAGE_TAG}'"
if [[ $FRAMEWORK == "TRTLLM" ]]; then
echo " Tensorrtllm_Pip_Wheel: '${PRINT_TRTLLM_WHEEL_FILE}'"
fi
echo " Build Context: '${BUILD_CONTEXT}'"
echo " Build Arguments: '${BUILD_ARGS}'"
echo " Framework: '${FRAMEWORK}'"
if [ "$USE_SCCACHE" = true ]; then
echo " sccache: Enabled"
echo " sccache Bucket: '${SCCACHE_BUCKET}'"
echo " sccache Region: '${SCCACHE_REGION}'"
if [ -n "$SCCACHE_S3_KEY_PREFIX" ]; then
echo " sccache S3 Key Prefix: '${SCCACHE_S3_KEY_PREFIX}'"
fi
fi
echo ""
}
show_help() {
echo "usage: build.sh"
echo " [--base-image base image]"
echo " [--base-image-tag base image tag]"
echo " [--platform platform for docker build]"
echo " [--framework framework one of ${!FRAMEWORKS[*]}]"
echo " [--tensorrtllm-pip-wheel-dir path to tensorrtllm pip wheel directory]"
echo " [--tensorrtllm-commit tensorrtllm commit/tag/branch to use for building the trtllm wheel if the wheel is not provided]"
echo " [--tensorrtllm-pip-wheel tensorrtllm pip wheel on artifactory]"
echo " [--tensorrtllm-index-url tensorrtllm PyPI index URL if providing the wheel from artifactory]"
echo " [--tensorrtllm-git-url tensorrtllm git repository URL for cloning]"
echo " [--build-arg additional build args to pass to docker build]"
echo " [--cache-from cache location to start from]"
echo " [--cache-to location where to cache the build output]"
echo " [--tag tag for image (can be specified multiple times)]"
echo " [--uid user ID for local-dev images (only with --target local-dev)]"
echo " [--gid group ID for local-dev images (only with --target local-dev)]"
echo " [--no-cache disable docker build cache]"
echo " [--no-load do not load the image into docker (disables default --load)]"
echo " [--push push the image to the registry]"
echo " [--dry-run print docker commands without running]"
echo " [--build-context name=path to add build context]"
echo " [--release-build perform a release build]"
echo " [--make-efa Adds AWS EFA layer on top of the built image (works with any target)]"
echo " [--enable-kvbm Enables KVBM support in Python 3.12]"
echo " [--enable-gpu-memory-service Enables GPU Memory Service support]"
echo " [--enable-media-nixl Enable media processing with NIXL support (default: true for frameworks, false for none)]"
echo " [--enable-media-ffmpeg Enable media processing with FFMPEG support (default: true for frameworks, false for none)]"
echo " [--use-sccache enable sccache for Rust/C/C++ compilation caching]"
echo " [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]"
echo " [--sccache-region S3 region for sccache (required with --use-sccache)]"
echo " [--vllm-max-jobs number of parallel jobs for compilation (only used by vLLM framework)]"
echo " [--efa-version AWS EFA installer version (default: 1.45.1)]"
echo " [--no-tag-latest do not add latest-{framework} tag to built image]"
echo ""
echo " Note: When using --use-sccache, AWS credentials must be set:"
echo " export AWS_ACCESS_KEY_ID=your_access_key"
echo " export AWS_SECRET_ACCESS_KEY=your_secret_key"
exit 0
}
missing_requirement() {
error "ERROR: $1 requires an argument."
}
error() {
printf '%s %s\n' "$1" "$2" >&2
exit 1
}
get_options "$@"
# Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64
ARCH="amd64"
if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
ARCH="arm64"
BUILD_ARGS+=" --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 "
fi
# Set the commit sha in the container so we can inspect what build this relates to
DYNAMO_COMMIT_SHA=${DYNAMO_COMMIT_SHA:-$(git rev-parse HEAD)}
BUILD_ARGS+=" --build-arg DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA "
# Update DOCKERFILE if framework is VLLM
if [[ $FRAMEWORK == "VLLM" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm
elif [[ $FRAMEWORK == "TRTLLM" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.trtllm
elif [[ $FRAMEWORK == "NONE" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile
elif [[ $FRAMEWORK == "SGLANG" ]]; then
DOCKERFILE=${SOURCE_DIR}/Dockerfile.sglang
fi
# Add NIXL_REF as a build argument
BUILD_ARGS+=" --build-arg NIXL_REF=${NIXL_REF} "
# Add NIXL_LIBFABRIC_REF as a build argument
BUILD_ARGS+=" --build-arg NIXL_LIBFABRIC_REF=${NIXL_LIBFABRIC_REF} "
# Add EFA_VERSION as a build argument
BUILD_ARGS+=" --build-arg EFA_VERSION=${EFA_VERSION} "
# Function to build AWS EFA images from base runtime or dev images
build_aws_with_header() {
local base_image="$1"
local tags="$2"
local aws_target="$3" # runtime-aws or dev-aws
local success_msg="$4"
DOCKERFILE_AWS="${SOURCE_DIR}/Dockerfile.aws"
if [[ ! -f "$DOCKERFILE_AWS" ]]; then
echo "ERROR: Dockerfile.aws not found at: $DOCKERFILE_AWS"
exit 1
fi
echo ""
echo "Building AWS EFA image from base: $base_image"
echo "Target stage: $aws_target"
# Show the docker command being executed if not in dry-run mode
if [ -z "$RUN_PREFIX" ]; then
set -x
fi
$RUN_PREFIX docker build --progress=plain \
--build-arg BASE_IMAGE="$base_image" \
--build-arg EFA_VERSION="${EFA_VERSION}" \
--target "$aws_target" \
--file "$DOCKERFILE_AWS" \
$PLATFORM \
$tags \
"$SOURCE_DIR" || {
{ set +x; } 2>/dev/null
echo "ERROR: Failed to build AWS EFA image"
exit 1
}
{ set +x; } 2>/dev/null
echo "$success_msg"
}
BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG"
if [ -n "${GITHUB_TOKEN}" ]; then
BUILD_ARGS+=" --build-arg GITHUB_TOKEN=${GITHUB_TOKEN} "
fi
if [ -n "${GITLAB_TOKEN}" ]; then
BUILD_ARGS+=" --build-arg GITLAB_TOKEN=${GITLAB_TOKEN} "
fi
check_wheel_file() {
local wheel_dir="$1"
# Check if directory exists
if [ ! -d "$wheel_dir" ]; then
echo "Error: Directory '$wheel_dir' does not exist"
return 1
fi
# Look for .whl files
wheel_count=$(find "$wheel_dir" -name "*.whl" | wc -l)
if [ "$wheel_count" -eq 0 ]; then
echo "WARN: No .whl files found in '$wheel_dir'"
return 1
elif [ "$wheel_count" -gt 1 ]; then
echo "Warning: Multiple wheel files found in '$wheel_dir'. Will use first one found."
find "$wheel_dir" -name "*.whl" | head -n 1
return 0
fi
echo "Found $wheel_count wheel in $wheel_dir"
return 0
}
get_trtllm_version_from_pip_wheel() {
local wheel_spec="$1"
if [[ "$wheel_spec" =~ == ]]; then
local version
version=$(echo "$wheel_spec" | sed -n 's/.*==\([0-9a-zA-Z\.\-]*\).*/\1/p')
if _is_semver_ref "$version"; then
echo "${version#v}"
return 0
fi
fi
echo ""
return 0
}
trtllm_version_incompatible() {
local version="$1"
for incompatible_version in "${TRTLLM_ABI_INCOMPATIBLE_VERSIONS[@]}"; do
if [[ "$version" == "$incompatible_version" ]]; then
return 0
fi
done
return 1
}
_is_semver_ref() {
local ref="$1"
local semver_regex='^v?(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)([-+][0-9A-Za-z.-]+|[A-Za-z][0-9A-Za-z.-]+)?$'
[[ "$ref" =~ $semver_regex ]]
}
get_github_trtllm_ref() {
local commit="$1"
if _is_semver_ref "$commit"; then
if [[ "$commit" =~ ^v ]]; then
echo "$commit"
else
echo "v${commit}"
fi
return 0
fi
echo "$commit"
return 0
}
function determine_user_intention_trtllm() {
# The tensorrt llm installation flags are not quite mutually exclusive
# since the user should be able to point at a directory of their choosing
# for storing a trtllm wheel built from source.
#
# This function attempts to discern the intention of the user by
# applying checks, or rules, for each of the scenarios.
#
# /return: Calculated intention. One of "download", "install", "build".
#
# The three different methods of installing TRTLLM with build.sh are:
# 1. Download
# required: --tensorrtllm-pip-wheel
# optional: --tensorrtllm-index-url
# optional: --tensorrtllm-commit
#
# 2. Install from pre-built
# required: --tensorrtllm-pip-wheel-dir
# optional: --tensorrtllm-commit
#
# 3. Build from source
# required: --tensorrtllm-git-url
# optional: --tensorrtllm-commit
# optional: --tensorrtllm-pip-wheel-dir
local intention_download="false"
local intention_install="false"
local intention_build="false"
local intention_count=0
TRTLLM_INTENTION=${TRTLLM_INTENTION}
# Install from pre-built
if [[ -n "$TENSORRTLLM_PIP_WHEEL_DIR" && ! -n "$TRTLLM_GIT_URL" ]]; then
intention_install="true";
intention_count=$((intention_count+1))
TRTLLM_INTENTION="install"
fi
echo " Intent to Install TRTLLM: $intention_install"
# Build from source
if [[ -n "$TRTLLM_GIT_URL" ]]; then
intention_build="true";
intention_count=$((intention_count+1))
TRTLLM_INTENTION="build"
fi
echo " Intent to Build TRTLLM: $intention_build"
# Download from repository
if [[ -n "$TENSORRTLLM_INDEX_URL" ]] && [[ -n "$TENSORRTLLM_PIP_WHEEL" ]]; then
intention_download="true";
intention_count=$((intention_count+1));
TRTLLM_INTENTION="download"
echo "INFO: Installing $TENSORRTLLM_PIP_WHEEL trtllm version from index: $TENSORRTLLM_INDEX_URL"
elif [[ -n "$TENSORRTLLM_PIP_WHEEL" ]]; then
intention_download="true";
intention_count=$((intention_count+1));
TRTLLM_INTENTION="download"
echo "INFO: Installing $TENSORRTLLM_PIP_WHEEL trtllm version from default pip index."
fi
# If nothing is set then we default to downloading the wheel
# with the defaults sepcified at the top this file.
if [[ -z "${TENSORRTLLM_INDEX_URL}" ]] && [[ -z "${TENSORRTLLM_PIP_WHEEL}" ]] && [[ "${intention_count}" -eq 0 ]]; then
intention_download="true";
intention_count=$((intention_count+1))
TRTLLM_INTENTION="download"
echo "INFO: Inferring download because both TENSORRTLLM_PIP_WHEEL and TENSORRTLLM_INDEX_URL are not set."
fi
echo " Intent to Download TRTLLM: $intention_download"
if [[ ! "$intention_count" -eq 1 ]]; then
echo -e "[ERROR] Could not figure out the trtllm installation intent from the current flags. Please check your build.sh command against the following"
echo -e " The grouped flags are mutually exclusive:"
echo -e " To download and install use both: --tensorrtllm-index-url, --tensorrtllm-pip-wheel"
echo -e " To install from a pre-built wheel use: --tensorrtllm-pip-wheel-dir"
echo -e " To build from source and install use both: --tensorrtllm-commit, --tensorrtllm-git-url"
exit 1
fi
}
if [[ $FRAMEWORK == "TRTLLM" ]]; then
echo -e "Determining the user's TRTLLM installation intent..."
determine_user_intention_trtllm # From this point forward, can assume correct TRTLLM flags
if [[ "$TRTLLM_INTENTION" == "download" ]]; then
TENSORRTLLM_INDEX_URL=${TENSORRTLLM_INDEX_URL:-$DEFAULT_TENSORRTLLM_INDEX_URL}
TENSORRTLLM_PIP_WHEEL=${TENSORRTLLM_PIP_WHEEL:-$DEFAULT_TENSORRTLLM_PIP_WHEEL}
TRTLLM_WHEEL_VERSION=$(get_trtllm_version_from_pip_wheel "${TENSORRTLLM_PIP_WHEEL}")
if trtllm_version_incompatible "${TRTLLM_WHEEL_VERSION}"; then
TRTLLM_WHEEL_IMAGE="nvcr.io/nvidia/tensorrt-llm/release:${TRTLLM_WHEEL_VERSION}"
BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=0"
BUILD_ARGS+=" --build-arg TRTLLM_WHEEL_IMAGE=${TRTLLM_WHEEL_IMAGE}"
PRINT_TRTLLM_WHEEL_FILE=${TRTLLM_WHEEL_IMAGE}
else
BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=0"
BUILD_ARGS+=" --build-arg TENSORRTLLM_PIP_WHEEL=${TENSORRTLLM_PIP_WHEEL}"
BUILD_ARGS+=" --build-arg TENSORRTLLM_INDEX_URL=${TENSORRTLLM_INDEX_URL}"
PRINT_TRTLLM_WHEEL_FILE=${TENSORRTLLM_PIP_WHEEL}
fi
# Create a dummy directory to satisfy the build context requirement
# There is no way to conditionally copy the build context in dockerfile.
mkdir -p /tmp/trtllm_wheel_context
BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=/tmp/trtllm_wheel_context"
elif [[ "$TRTLLM_INTENTION" == "install" ]]; then
echo "Checking for TensorRT-LLM wheel in ${TENSORRTLLM_PIP_WHEEL_DIR}"
if ! check_wheel_file "${TENSORRTLLM_PIP_WHEEL_DIR}"; then
echo "ERROR: Valid trtllm wheel file not found in ${TENSORRTLLM_PIP_WHEEL_DIR}"
echo " If this is not intended you can try building from source with the following variables set instead:"
echo ""
echo " --tensorrtllm-git-url https://github.com/NVIDIA/TensorRT-LLM --tensorrtllm-commit $TRTLLM_COMMIT"
exit 1
fi
echo "Installing TensorRT-LLM from local wheel directory"
BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=1"
BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=${TENSORRTLLM_PIP_WHEEL_DIR}"
PRINT_TRTLLM_WHEEL_FILE=$(find $TENSORRTLLM_PIP_WHEEL_DIR -name "*.whl" | head -n 1)
elif [[ "$TRTLLM_INTENTION" == "build" ]]; then
TENSORRTLLM_PIP_WHEEL_DIR=${TENSORRTLLM_PIP_WHEEL_DIR:=$DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR}
echo "TRTLLM pip wheel output directory is: ${TENSORRTLLM_PIP_WHEEL_DIR}"
if [ "$DRY_RUN" != "true" ]; then
GIT_URL_ARG=""
if [ -n "${TRTLLM_GIT_URL}" ]; then
GIT_URL_ARG="-u ${TRTLLM_GIT_URL}"
fi
if ! env -i ${SOURCE_DIR}/build_trtllm_wheel.sh -o ${TENSORRTLLM_PIP_WHEEL_DIR} -c ${TRTLLM_COMMIT} -a ${ARCH} -n ${NIXL_REF} ${GIT_URL_ARG}; then
error "ERROR: Failed to build TensorRT-LLM wheel"
fi
BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=1"
BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=${TENSORRTLLM_PIP_WHEEL_DIR}"
PRINT_TRTLLM_WHEEL_FILE=$(find $TENSORRTLLM_PIP_WHEEL_DIR -name "*.whl" | head -n 1)
fi
else
echo 'No intention was set. This error should have been detected in "determine_user_intention_trtllm()". Exiting...'
exit 1
fi
# Need to know the commit of TRTLLM so we can determine the
# TensorRT installation associated with TRTLLM.
if [[ -z "$TRTLLM_COMMIT" ]]; then
# Attempt to default since the commit will work with a hash or a tag/branch
if [[ ! -z "$TENSORRTLLM_PIP_WHEEL" ]]; then
TRTLLM_COMMIT=$(get_trtllm_version_from_pip_wheel "${TENSORRTLLM_PIP_WHEEL}")
if [[ -z "$TRTLLM_COMMIT" ]]; then
echo -e "[ERROR] Could not parse a semver version from TENSORRTLLM_PIP_WHEEL: ${TENSORRTLLM_PIP_WHEEL}"
exit 1
fi
echo "Attempting to default TRTLLM_COMMIT to \"$TRTLLM_COMMIT\" for installation of TensorRT."
else
echo -e "[ERROR] TRTLLM framework was set as a target but the TRTLLM_COMMIT variable was not set."
echo -e " Could not find a suitible default by infering from TENSORRTLLM_PIP_WHEEL."
echo -e " TRTLLM_COMMIT is needed to install the correct version of TensorRT associated with TensorRT-LLM."
exit 1
fi
fi
GITHUB_TRTLLM_REF=$(get_github_trtllm_ref "${TRTLLM_COMMIT}")
BUILD_ARGS+=" --build-arg GITHUB_TRTLLM_COMMIT=${GITHUB_TRTLLM_REF}"
fi
# ENABLE_KVBM: Used in base Dockerfile for block-manager feature.
# Declared but not currently used in Dockerfile.{vllm,trtllm}.
# Force KVBM to be enabled for VLLM and TRTLLM frameworks
if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]]; then
echo "Forcing enable_kvbm to true in ${FRAMEWORK} image build"
ENABLE_KVBM=true
fi
# For other frameworks, ENABLE_KVBM defaults to false unless --enable-kvbm flag was provided
if [[ ${ENABLE_KVBM} == "true" ]]; then
echo "Enabling KVBM in the dynamo image"
BUILD_ARGS+=" --build-arg ENABLE_KVBM=${ENABLE_KVBM} "
fi
# ENABLE_GPU_MEMORY_SERVICE: Used in Dockerfiles for gpu_memory_service wheel.
# Declared but not currently used in Dockerfile.trtllm.
# Force GPU Memory Service to be enabled for VLLM and SGLANG frameworks
if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
echo "Forcing enable_gpu_memory_service to true in ${FRAMEWORK} image build"
ENABLE_GPU_MEMORY_SERVICE=true
fi
# For other frameworks, ENABLE_GPU_MEMORY_SERVICE defaults to false unless --enable-gpu-memory-service flag was provided
if [[ ${ENABLE_GPU_MEMORY_SERVICE} == "true" ]]; then
echo "Enabling GPU Memory Service in the dynamo image"
BUILD_ARGS+=" --build-arg ENABLE_GPU_MEMORY_SERVICE=${ENABLE_GPU_MEMORY_SERVICE} "
fi
# ENABLE_MEDIA_NIXL: Enable media processing with NIXL support
# Used in base Dockerfile for maturin build feature flag.
# Can be explicitly overridden with --enable-media-nixl flag
if [ -z "${ENABLE_MEDIA_NIXL}" ]; then
if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
ENABLE_MEDIA_NIXL=true
else
ENABLE_MEDIA_NIXL=false
fi
fi
BUILD_ARGS+=" --build-arg ENABLE_MEDIA_NIXL=${ENABLE_MEDIA_NIXL} "
# ENABLE_MEDIA_FFMPEG: Enable media processing with FFMPEG support
# Used in base Dockerfile for maturin build feature flag.
# Can be explicitly overridden with --enable-media-ffmpeg flag
if [ -z "${ENABLE_MEDIA_FFMPEG}" ]; then
if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
ENABLE_MEDIA_FFMPEG=true
else
ENABLE_MEDIA_FFMPEG=false
fi
fi
BUILD_ARGS+=" --build-arg ENABLE_MEDIA_FFMPEG=${ENABLE_MEDIA_FFMPEG} "
# NIXL_UCX_REF: Used in base Dockerfile only.
if [ -n "${NIXL_UCX_REF}" ]; then
BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} "
fi
# NIXL_GDRCOPY_REF: Used in dynamo base stages.
if [ -n "${NIXL_GDRCOPY_REF}" ]; then
BUILD_ARGS+=" --build-arg NIXL_GDRCOPY_REF=${NIXL_GDRCOPY_REF} "
fi
# MAX_JOBS is only used by Dockerfile.vllm
if [ -n "${MAX_JOBS}" ]; then
BUILD_ARGS+=" --build-arg MAX_JOBS=${MAX_JOBS} "
fi
if [[ $FRAMEWORK == "SGLANG" ]]; then
echo "Customizing Python, CUDA, and framework images for sglang images"
BUILD_ARGS+=" --build-arg CUDA_VERSION=${SGLANG_CUDA_VERSION}"
fi
BUILD_ARGS+=" --build-arg PYTHON_VERSION=${PYTHON_VERSION}"
# Add sccache build arguments
if [ "$USE_SCCACHE" = true ]; then
BUILD_ARGS+=" --build-arg USE_SCCACHE=true"
BUILD_ARGS+=" --build-arg SCCACHE_BUCKET=${SCCACHE_BUCKET}"
BUILD_ARGS+=" --build-arg SCCACHE_REGION=${SCCACHE_REGION}"
BUILD_ARGS+=" --secret id=aws-key-id,env=AWS_ACCESS_KEY_ID"
BUILD_ARGS+=" --secret id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY"
fi
if [[ "$PLATFORM" == *"linux/arm64"* && "${FRAMEWORK}" == "SGLANG" ]]; then
# Add arguments required for sglang blackwell build
BUILD_ARGS+=" --build-arg GRACE_BLACKWELL=true --build-arg BUILD_TYPE=blackwell_aarch64"
fi
# Dev/local-dev targets: build from a concatenated Dockerfile:
# <framework Dockerfile> + container/dev/Dockerfile.dev
if [[ -z "${TARGET:-}" || "${TARGET:-}" == "dev" || "${TARGET:-}" == "local-dev" ]]; then
_gen_dev_dockerfile_temp() {
local fw_df dev_df out
fw_df="$1"
dev_df="${SOURCE_DIR}/dev/Dockerfile.dev"
if [[ ! -f "${fw_df}" ]]; then
error "ERROR:" "Framework Dockerfile not found: ${fw_df}"
fi
if [[ ! -f "${dev_df}" ]]; then
error "ERROR:" "Dev Dockerfile not found: ${dev_df}"
fi
out="$(mktemp -t dynamo-dev-combined.XXXXXX.Dockerfile)"
cat "${fw_df}" "${dev_df}" > "${out}"
printf '\n' >> "${out}"
if [[ ! -s "${out}" ]]; then
rm -f "${out}"
error "ERROR:" "Temp Dockerfile was generated but is empty"
fi
printf '%s\n' "${out}"
}
DOCKERFILE="$(_gen_dev_dockerfile_temp "${DOCKERFILE}")"
# Ensure we clean up the temp Dockerfile (opt-out with KEEP_DEV_DOCKERFILE_TEMP=1 for debugging).
if [[ "${KEEP_DEV_DOCKERFILE_TEMP:-}" != "1" ]]; then
trap 'rm -f "${DOCKERFILE}" 2>/dev/null || true' EXIT
fi
# Dockerfile.dev expects a lowercase framework string.
BUILD_ARGS+=" --build-arg FRAMEWORK=${FRAMEWORK,,} "
# Preserve historical tagging behavior for dev/local-dev (build.sh used to delegate out).
base="${PRIMARY_TAG}"
base="${base%-runtime}"
base="${base%-local-dev}"
base="${base%-dev}"
if [[ -z "${TARGET:-}" || "${TARGET}" == "dev" ]]; then
TAG="--tag ${base}-dev"
else
TAG="--tag ${base}-local-dev"
# Default UID/GID behavior: current user if not specified.
if [[ -z "${CUSTOM_UID:-}" ]]; then
CUSTOM_UID="$(id -u)"
fi
if [[ -z "${CUSTOM_GID:-}" ]]; then
CUSTOM_GID="$(id -g)"
fi
BUILD_ARGS+=" --build-arg USER_UID=${CUSTOM_UID} --build-arg USER_GID=${CUSTOM_GID} "
fi
fi
LATEST_TAG=""
if [ -z "${NO_TAG_LATEST}" ]; then
if [[ -z "${TARGET:-}" || "${TARGET}" == "dev" ]]; then
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
elif [[ "${TARGET}" == "local-dev" ]]; then
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}-local-dev"
else
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
LATEST_TAG="${LATEST_TAG}-${TARGET}"
fi
fi
fi
show_image_options
# Handle FRONTEND target: build EPP image first
if [[ ${TARGET^^} == "FRONTEND" ]]; then
echo "Building FRONTEND image - requires EPP image"
echo ""
echo "Building EPP image for Frontend using Makefile..."
# EPP directory with the new self-contained build
EPP_DIR="${BUILD_CONTEXT}/deploy/inference-gateway/epp"
# Set DOCKER_PROXY from ECR_HOSTNAME if available (for pulling base images through proxy)
# This prevents rate-limiting when building in CI across multiple PRs
DOCKER_PROXY_ARG=""
if [[ -n "${ECR_HOSTNAME}" ]]; then
DOCKER_PROXY="${ECR_HOSTNAME}/dockerhub/"
DOCKER_PROXY_ARG="DOCKER_PROXY=${DOCKER_PROXY}"
echo "Using DOCKER_PROXY: ${DOCKER_PROXY}"
fi
# Build EPP image using the Makefile
# The Makefile handles: building Dynamo library, building Docker image, loading it locally
$RUN_PREFIX make -C "${EPP_DIR}" all DYNAMO_DIR="${BUILD_CONTEXT}" ${DOCKER_PROXY_ARG}
# Compute EPP image tag (must match Makefile's IMAGE_TAG)
# IMAGE_TAG = $(IMAGE_REPO):$(GIT_TAG)
# IMAGE_REPO = $(DOCKER_SERVER)/$(IMAGE_NAME)
# Image lives in local cache only, not pushed to any registry
EPP_DOCKER_SERVER="dynamo"
EPP_IMAGE_NAME="dynamo-epp"
EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
EPP_IMAGE_TAG="${EPP_DOCKER_SERVER}/${EPP_IMAGE_NAME}:${EPP_GIT_TAG}"
echo "Successfully built EPP image: ${EPP_IMAGE_TAG}"
# Add build args for frontend image
BUILD_ARGS+=" --build-arg EPP_IMAGE=${EPP_IMAGE_TAG}"
fi
# Always build the main image first
# Create build log directory for BuildKit reports
BUILD_LOG_DIR="${BUILD_CONTEXT}/build-logs"
mkdir -p "${BUILD_LOG_DIR}"
SINGLE_BUILD_LOG="${BUILD_LOG_DIR}/single-stage-build.log"
# Determine --load flag (default on unless --no-load or --push specified)
LOAD_FLAG=""
if [ "$NO_LOAD" != "true" ] && [ -z "$PUSH" ]; then
LOAD_FLAG=" --load"
fi
# Use BuildKit for enhanced metadata
if docker buildx version &>/dev/null; then
$RUN_PREFIX docker buildx build --progress=plain ${LOAD_FLAG} ${PUSH} -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${SINGLE_BUILD_LOG}"
BUILD_EXIT_CODE=${PIPESTATUS[0]}
else
$RUN_PREFIX DOCKER_BUILDKIT=1 docker build --progress=plain -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${SINGLE_BUILD_LOG}"
BUILD_EXIT_CODE=${PIPESTATUS[0]}
fi
if [ ${BUILD_EXIT_CODE} -ne 0 ]; then
exit ${BUILD_EXIT_CODE}
fi
# Handle --make-efa flag: add AWS EFA layer on top of the built image
# This runs BEFORE local-dev so the flow is: dev -> dev-aws -> local-dev-aws
if [[ "${MAKE_EFA:-}" == "true" ]]; then
# Get the base image that was just built (use PRIMARY_TAG to avoid parsing issues)
BASE_IMAGE_FOR_EFA="${PRIMARY_TAG}"
# Determine the EFA stage based on the target
# runtime target -> runtime-aws stage
# dev/local-dev target -> dev-aws stage
if [[ "${TARGET:-dev}" == "runtime" ]]; then
EFA_STAGE="runtime-aws"
else
EFA_STAGE="dev-aws"
fi
# Build AWS tags by appending -aws to existing tags
AWS_TAGS=""
if [[ -n "$TAG" ]]; then
AWS_TAG=$(echo "$TAG" | sed 's/--tag //')
AWS_TAGS+=" --tag ${AWS_TAG}-aws"
fi
if [[ -n "$LATEST_TAG" ]]; then
AWS_LATEST_TAG=$(echo "$LATEST_TAG" | sed 's/--tag //')
AWS_TAGS+=" --tag ${AWS_LATEST_TAG}-aws"
fi
build_aws_with_header "$BASE_IMAGE_FOR_EFA" "$AWS_TAGS" "$EFA_STAGE" "Successfully built ${EFA_STAGE} image"
fi
{ set +x; } 2>/dev/null
\ No newline at end of file
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# This file represents the default ARG values of Dockerfiles generated
# by render.py. These are the recommended default values for users and
# is the source of truth for the values used in our delivered images.
#
# Some ARGs have multiple valid values and can be changed for local testing,
# you can do so locally in this file, or pass the --build-arg into docker build
# when building.
dynamo:
base_image: nvcr.io/nvidia/cuda-dl-base
base_image_tag: 25.01-cuda12.8-devel-ubuntu24.04
epp_image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1
frontend_image: nvcr.io/nvidia/base/ubuntu:noble-20250619
python_version: "3.12"
nats_version: v2.10.28
etcd_version: v3.5.21
nixl_ref: 0.9.0
nixl_ucx_ref: v1.20.0
nixl_gdrcopy_ref: v2.5.1
nixl_ucx_efa_ref: 9d2b88a1f67faf9876f267658bd077b379b8bb76
nixl_libfabric_ref: v2.3.0
enable_kvbm: "false"
enable_media_nixl: "false"
enable_media_ffmpeg: "false"
enable_gpu_memory_service: "false"
ffmpeg_version: "7.1"
efa_version: 1.45.1
vllm:
base_image: nvcr.io/nvidia/cuda-dl-base
runtime_image: nvcr.io/nvidia/cuda
runtime_image_tag: ${CUDA_VERSION}.0-runtime-ubuntu24.04
vllm_ref: v0.14.1
flashinf_ref: v0.5.3
lmcache_ref: 0.3.12
max_jobs: "10"
enable_media_nixl: "true"
enable_media_ffmpeg: "true"
enable_gpu_memory_service: "true"
enable_kvbm: "true"
cuda12.9:
base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
cuda13.0:
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
sglang:
base_image: nvcr.io/nvidia/cuda-dl-base
runtime_image: lmsysorg/sglang
cuda12.9:
base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
runtime_image_tag: v0.5.7-runtime
cuda13.0:
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
runtime_image_tag: v0.5.8-cu130-runtime
enable_media_nixl: "true"
enable_media_ffmpeg: "true"
enable_gpu_memory_service: "true"
enable_kvbm: "false"
trtllm:
base_image: nvcr.io/nvidia/pytorch
base_image_tag: 25.12-py3
runtime_image: nvcr.io/nvidia/cuda-dl-base
runtime_image_tag: 25.10-cuda13.0-runtime-ubuntu24.04
enable_media_nixl: "true"
enable_media_ffmpeg: "true"
enable_gpu_memory_service: "false"
enable_kvbm: "true"
python_version: "3.12"
index_url: https://pypi.nvidia.com/
pip_wheel_dir: /tmp/trtllm_wheel/
pip_wheel: tensorrt-llm==1.3.0rc1
trtllm_wheel_image: nvcr.io/nvidia/tensorrt-llm/release:${TENSORRTLLM_PIP_WHEEL#*==}
github_trtllm_commit: 1.2.0rc6
torch_version: 2.10.0a0+b4e4ee81d3.nv25.12
torch_tensorrt_version: 2.10.0a0
torchvision_version: 0.25.0a0+ca221243
torchao_ver: 0.15.0+git01374eb5
torchdata_ver: 0.11.0
torchtitan_ver: 0.2.0
jinja2_version: 3.1.6
sympy_version: 1.14.0
pytorch_triton_ver: 3.5.1+gitbfeb0668.nv25.12
flash_attn_version: 2.7.4.post1+25.12
flashinfer_python_ver: 0.6.1
has_trtllm_context: "0"
#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import argparse
import re
import sys
from pathlib import Path
import yaml
from jinja2 import Environment, FileSystemLoader
def parse_args():
parser = argparse.ArgumentParser(
description="Renders dynamo Dockerfiles from templates"
)
parser.add_argument(
"--framework",
type=str,
default="vllm",
help="Dockerfile framework to use [dynamo, vllm, sglang, trtllm]",
)
parser.add_argument(
"--target",
type=str,
default="runtime",
help="Dockerfile target to use. Non-exhaustive examples: [runtime, dev, local-dev]",
)
parser.add_argument(
"--platform",
type=str,
default="amd64",
help="Dockerfile platform to use. [amdg64, arm64]",
)
parser.add_argument(
"--cuda-version",
type=str,
default="12.9",
help="CUDA version to use. [12.9, 13.0]",
)
parser.add_argument("--make-efa", action="store_true", help="Enable AWS EFA")
parser.add_argument(
"--short-output",
action="store_true",
help="Output filename is just rendered.Dockerfile",
)
parser.add_argument(
"--show-result",
action="store_true",
help="Prints the rendered Dockerfile to stdout.",
)
args = parser.parse_args()
return args
def validate_args(args):
# TODO: Add validation logic
return
def render(args, context, script_dir):
env = Environment(
loader=FileSystemLoader(script_dir), trim_blocks=False, lstrip_blocks=True
)
template = env.get_template("Dockerfile.template")
rendered = template.render(
context=context,
framework=args.framework,
target=args.target,
platform=args.platform,
cuda_version=args.cuda_version,
make_efa=args.make_efa,
)
# Replace all instances of 3+ newlines with 2 newlines
cleaned = re.sub(r"\n{3,}", "\n\n", rendered)
if args.short_output:
filename = "rendered.Dockerfile"
else:
filename = f"{args.framework}-{args.target}-cuda{args.cuda_version}-{args.platform}-rendered.Dockerfile"
with open(f"{script_dir}/{filename}", "w") as f:
f.write(cleaned)
if args.show_result:
print("##############")
print("# Dockerfile #")
print("##############")
print(cleaned)
print("##############")
print(f"INFO: Generated Dockerfile written to {script_dir}/{filename}")
return
def main():
args = parse_args()
validate_args(args)
script_dir = Path(sys.argv[0]).parent
with open(f"{script_dir}/context.yaml", "r") as f:
context = yaml.safe_load(f)
render(args, context, script_dir)
if args.target == "local-dev":
print(
"INFO: Remember to add --build-arg values for USER_UID and USER_GID when building a local-dev image!"
)
print(
" Recommendation: --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g)"
)
if __name__ == "__main__":
main()
{#
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
##########################
#### Build Arguments #####
##########################
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
# --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
# --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#TODO OPS-592: Leverage uname -m to determine ARCH instead of passing it as an arg
ARG ARCH={{ platform }}
ARG ARCH_ALT={{ "x86_64" if platform == "amd64" else "aarch64" }}
# Python/CUDA configuration
ARG PYTHON_VERSION={{ context.dynamo.python_version }}
ARG CUDA_VERSION={{ cuda_version }}
ARG CUDA_MAJOR=${CUDA_VERSION%%.*}
{% if framework == "vllm" or framework == "sglang" -%}
{% set cuda_context_key = "cuda" + cuda_version %}
# Base image configuration
ARG BASE_IMAGE={{ context[framework].base_image }}
ARG BASE_IMAGE_TAG={{ context[framework][cuda_context_key].base_image_tag }}
{% elif framework != "vllm" and framework != "sglang" -%}
ARG BASE_IMAGE={{ context[framework].base_image }}
ARG BASE_IMAGE_TAG={{ context[framework].base_image_tag }}
{%- endif %}
{% if framework == "sglang" -%}
{% set cuda_context_key = "cuda" + cuda_version %}
# Base image configuration
ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
ARG RUNTIME_IMAGE_TAG={{ context[framework][cuda_context_key].runtime_image_tag }}
{% elif framework != "dynamo" -%}
ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
ARG RUNTIME_IMAGE_TAG={{ context[framework].runtime_image_tag }}
{%- endif %}
# Build configuration
ARG ENABLE_KVBM={{ context[framework].enable_kvbm }}
ARG CARGO_BUILD_JOBS
ARG NATS_VERSION={{ context.dynamo.nats_version }}
ARG ETCD_VERSION={{ context.dynamo.etcd_version }}
ARG ENABLE_MEDIA_NIXL={{ context[framework].enable_media_nixl }}
ARG ENABLE_MEDIA_FFMPEG={{ context[framework].enable_media_ffmpeg }}
ARG FFMPEG_VERSION={{ context.dynamo.ffmpeg_version }}
ARG ENABLE_GPU_MEMORY_SERVICE={{ context[framework].enable_gpu_memory_service }}
# SCCACHE configuration
ARG USE_SCCACHE
ARG SCCACHE_BUCKET=""
ARG SCCACHE_REGION=""
# NIXL configuration
ARG NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
ARG NIXL_REF={{ context.dynamo.nixl_ref }}
ARG NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
ARG NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
{% if target == "dev" or target == "local-dev" %}
ARG FRAMEWORK={{ framework }}
{% endif %}
{% if target == "frontend" %}
ARG EPP_IMAGE={{ context.dynamo.epp_image }}
ARG FRONTEND_IMAGE={{ context.dynamo.frontend_image }}
{% endif %}
{% if framework == "vllm" -%}
# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF={{ context.vllm.vllm_ref }}
ARG MAX_JOBS={{ context.vllm.max_jobs }}
# FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
ARG FLASHINF_REF={{ context.vllm.flashinf_ref }}
ARG LMCACHE_REF={{ context.vllm.lmcache_ref }}
# If left blank, then we will fallback to vLLM defaults
ARG DEEPGEMM_REF=""
{%- endif -%}
{% if framework == "trtllm" %}
# TensorRT-LLM specific configuration
ARG HAS_TRTLLM_CONTEXT={{ context.trtllm.has_trtllm_context }}
ARG TENSORRTLLM_PIP_WHEEL={{ context.trtllm.pip_wheel }}
ARG TENSORRTLLM_INDEX_URL={{ context.trtllm.index_url }}
ARG GITHUB_TRTLLM_COMMIT={{ context.trtllm.github_trtllm_commit }}
ARG TRTLLM_WHEEL_IMAGE={{ context.trtllm.trtllm_wheel_image }}
# Copy pytorch installation from NGC PyTorch
ARG FLASHINFER_PYTHON_VER={{ context.trtllm.flashinfer_python_ver }}
ARG PYTORCH_TRITON_VER={{ context.trtllm.pytorch_triton_ver }}
ARG TORCHAO_VER={{ context.trtllm.torchao_ver }}
ARG TORCHDATA_VER={{ context.trtllm.torchdata_ver }}
ARG TORCHTITAN_VER={{ context.trtllm.torchtitan_ver }}
ARG TORCH_VER={{ context.trtllm.torch_version }}
ARG TORCH_TENSORRT_VER={{ context.trtllm.torch_tensorrt_version }}
ARG TORCHVISION_VER={{ context.trtllm.torchvision_version }}
ARG JINJA2_VER={{ context.trtllm.jinja2_version }}
ARG SYMPY_VER={{ context.trtllm.sympy_version }}
ARG FLASH_ATTN_VER={{ context.trtllm.flash_attn_version }}
# Python configuration
ARG TRTLLM_PYTHON_VERSION={{ context[framework].python_version }}
{%- endif -%}
{% if make_efa == true %}
ARG EFA_VERSION={{ context.dynamo.efa_version }}
ARG EFA_BASE_IMAGE={{ "runtime" if target=="runtime" else "dev" }}
{%- endif -%}
\ No newline at end of file
# syntax=docker/dockerfile:1.10.0 {#
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
#}
#############################
########## AWS EFA ##########
#############################
# #
# PURPOSE: AWS EFA support layer # This stage extends the runtime/dev stage with AWS EFA installer
#
# This Dockerfile adds AWS EFA (Elastic Fabric Adapter) support on top of
# the runtime or dev stages from framework Dockerfiles (vllm, sglang, trtllm).
#
# Usage (via build.sh with --make-efa flag):
# ./build.sh --framework vllm --target runtime --make-efa
# ./build.sh --framework vllm --target local-dev --make-efa
ARG BASE_IMAGE
ARG EFA_VERSION
###########################################################
########## Runtime with AWS EFA ##########################
###########################################################
#
# This stage extends the runtime stage with AWS EFA installer
# which includes: libfabric and aws-ofi-nccl plugin # which includes: libfabric and aws-ofi-nccl plugin
# #
# Use this stage when deploying on AWS infrastructure with EFA support # Use this stage when deploying on AWS infrastructure with EFA support
FROM ${BASE_IMAGE} AS runtime-aws FROM ${EFA_BASE_IMAGE} AS aws
ARG EFA_VERSION ARG EFA_VERSION
{% if target == "runtime" %}
USER root USER root
{% endif %}
# Install AWS EFA installer with bundled libfabric and aws-ofi-nccl # Install AWS EFA installer with bundled libfabric and aws-ofi-nccl
# Flags explanation: # Flags explanation:
...@@ -48,43 +38,9 @@ RUN mkdir -p /tmp/efa && \ ...@@ -48,43 +38,9 @@ RUN mkdir -p /tmp/efa && \
ENV EFA_VERSION="${EFA_VERSION}" ENV EFA_VERSION="${EFA_VERSION}"
{% if target == "runtime" %}
USER dynamo USER dynamo
{% endif %}
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
########################################################################
########## Development with AWS EFA (run.sh, runs as root user) ########
########################################################################
#
# PURPOSE: Development environment with AWS EFA support
#
# This stage extends dev stages with development tools for building and
# debugging on EFA-enabled AWS instances.
FROM ${BASE_IMAGE} AS dev-aws
ARG EFA_VERSION
# Dev stage runs as root, no USER switch needed
# Install AWS EFA installer with bundled libfabric and aws-ofi-nccl
# Flags explanation:
# --skip-kmod: Skip kernel module installation (handled by host)
# --skip-limit-conf: Skip ulimit configuration (handled by container runtime)
# --no-verify: Skip GPG verification (optional, can be removed if verification is needed)
RUN mkdir -p /tmp/efa && \
cd /tmp/efa && \
curl --retry 3 --retry-delay 2 -fsSL -o aws-efa-installer-${EFA_VERSION}.tar.gz \
https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_VERSION}.tar.gz && \
tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz && \
cd aws-efa-installer && \
apt-get update && \
./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify && \
rm -rf /tmp/efa && \
rm -rf /opt/amazon/aws-ofi-nccl && \
ldconfig
ENV EFA_VERSION="${EFA_VERSION}"
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment