Unverified Commit ac020629 authored by Dillon Cullinan's avatar Dillon Cullinan Committed by GitHub
Browse files

feat: Dockerfile templating (#5633)


Signed-off-by: default avatarDillon Cullinan <dcullinan@nvidia.com>
parent 5755a8de
...@@ -143,23 +143,12 @@ Build the appropriate framework image (e.g., `dynamo:latest-vllm-local-dev`) fro ...@@ -143,23 +143,12 @@ Build the appropriate framework image (e.g., `dynamo:latest-vllm-local-dev`) fro
```bash ```bash
# Single command approach (recommended) # Single command approach (recommended)
export FRAMEWORK=VLLM # Note: any of VLLM, SGLANG, TRTLLM can be used export FRAMEWORK=VLLM # Note: any of VLLM, SGLANG, TRTLLM can be used
./container/build.sh --framework $FRAMEWORK --target local-dev python container/render.py --framework=${FRAMEWORK} --target=local-dev --short-output
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile .
# Now you've created both dynamo:latest-vllm and dynamo:latest-vllm-local-dev # Now you've created both dynamo:latest-vllm and dynamo:latest-vllm-local-dev
``` ```
Alternatively, you can build a development container, then build local-dev:
```bash
export FRAMEWORK=VLLM
./container/build.sh --framework $FRAMEWORK
# Now you have a development image dynamo:latest-vllm
./container/build.sh --dev-image dynamo:latest-${FRAMEWORK,,}
# Now you have a local-dev image dynamo:latest-vllm-local-dev
```
The local-dev image will give you local user permissions matching your host user and includes extra developer utilities (debugging tools, text editors, system monitors, etc.). The local-dev image will give you local user permissions matching your host user and includes extra developer utilities (debugging tools, text editors, system monitors, etc.).
### Step 1: Choose Your Framework ### Step 1: Choose Your Framework
...@@ -427,10 +416,8 @@ If you see errors like "container is not running" or "An error occurred setting ...@@ -427,10 +416,8 @@ If you see errors like "container is not running" or "An error occurred setting
# If missing, build the dev image first, then build local-dev # If missing, build the dev image first, then build local-dev
export FRAMEWORK=VLLM # Replace with VLLM, SGLANG, or TRTLLM export FRAMEWORK=VLLM # Replace with VLLM, SGLANG, or TRTLLM
./container/build.sh --framework $FRAMEWORK python container/render.py --framework=${FRAMEWORK} --target=local-dev --short-output
# change to lower case portable way across shells docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile .
./container/build.sh --dev-image dynamo:latest-$(echo "$FRAMEWORK" | tr '[:upper:]' '[:lower:]') --framework "$FRAMEWORK"
# Now you have dynamo:latest-vllm-local-dev
``` ```
2. **Container startup failure:** 2. **Container startup failure:**
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
**/target/* **/target/*
**/*safetensors **/*safetensors
container/Dockerfile* container/Dockerfile*
container/*.Dockerfile
.venv .venv
.venv-docs .venv-docs
......
name: 'Docker Build' name: 'Docker Build'
description: 'Build Dynamo container images' description: 'Build Dynamo container images'
inputs: inputs:
# --- Common Docker Inputs
framework: framework:
description: 'Framework to build' description: 'Framework to build'
required: true required: true
...@@ -13,9 +14,14 @@ inputs: ...@@ -13,9 +14,14 @@ inputs:
description: 'Docker platform to build on, ie. linux/amd64' description: 'Docker platform to build on, ie. linux/amd64'
required: false required: false
default: 'linux/amd64' default: 'linux/amd64'
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: true
image_tag: image_tag:
description: 'Custom image tag (optional, defaults to framework:latest)' description: 'Custom image tag (optional, defaults to framework:latest)'
required: false required: false
# --- Secret Inputs
ci_token: ci_token:
description: 'CI Token' description: 'CI Token'
required: false required: false
...@@ -34,21 +40,6 @@ inputs: ...@@ -34,21 +40,6 @@ inputs:
aws_secret_access_key: aws_secret_access_key:
description: 'AWS Secret Access Key' description: 'AWS Secret Access Key'
required: false required: false
base_image_tag:
description: 'Optional override for base image tag passed to build.sh'
required: false
runtime_image_tag:
description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
required: false
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: true
enable_kvbm:
description: 'Enable KVBM support (optional)'
required: false
dynamo_base_image:
description: 'Pre-built Dynamo base image to use instead of building from scratch'
required: false
outputs: outputs:
image_tag: image_tag:
...@@ -70,6 +61,44 @@ runs: ...@@ -70,6 +61,44 @@ runs:
shell: bash shell: bash
run: | run: |
docker system prune -af docker system prune -af
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0
with:
python-version: '3.12'
pip-install: jinja2 pyyaml
- name: Generate Dockerfile
shell: bash
run: |
echo "::group::Generating Dockerfile"
echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
python ./container/render.py \
--target=${{ inputs.target }} \
--framework=${{ inputs.framework }} \
--platform=${{ inputs.platform }} \
--cuda-version=${{ inputs.cuda_version }} \
--show-result \
--short-output
echo "::endgroup::"
- name: Build EPP image
if: ${{ inputs.target == 'frontend' }}
shell: bash
env:
ECR_HOSTNAME: ${{ inputs.aws_account_id }}.dkr.ecr.${{ inputs.aws_default_region }}.amazonaws.com
run: |
sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
. "$HOME/.cargo/env"
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
cargo install cbindgen
DOCKER_PROXY="${ECR_HOSTNAME}/dockerhub/"
pushd deploy/inference-gateway/epp
make all DOCKER_PROXY=${DOCKER_PROXY}
popd
EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
EPP_IMAGE="dynamo/dynamo-epp:${EPP_GIT_TAG}"
echo "EPP_IMAGE=${EPP_IMAGE}" >> $GITHUB_ENV
- name: Build image - name: Build image
id: build id: build
shell: bash shell: bash
...@@ -107,50 +136,34 @@ runs: ...@@ -107,50 +136,34 @@ runs:
echo "BUILD_LOG_FILE=${BUILD_LOG_FILE}" >> $GITHUB_ENV echo "BUILD_LOG_FILE=${BUILD_LOG_FILE}" >> $GITHUB_ENV
echo "📝 Build log will be saved to: ${BUILD_LOG_FILE}" echo "📝 Build log will be saved to: ${BUILD_LOG_FILE}"
# Collect optional overrides provided by the workflow
# Set base cache args and set --cache-to if this is a main commit # Set base cache args and set --cache-to if this is a main commit
# TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export # TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export
EXTRA_ARGS="" CACHE_ARGS=""
if [[ "${{ inputs.target }}" != "frontend" ]]; then if [[ "${{ inputs.target }}" != "frontend" ]]; then
EXTRA_ARGS="--cache-to type=inline " CACHE_ARGS="--cache-to type=inline "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache " CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} " CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache " CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
# Release branches also use release cache # Release branches also use release cache
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max " CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
elif [[ "$GITHUB_REF_NAME" == "main" ]]; then elif [[ "$GITHUB_REF_NAME" == "main" ]]; then
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max " CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
fi fi
fi fi
echo "$EXTRA_ARGS" EPP_IMAGE_ARG=""
# Collect optional overrides provided by the workflow if [[ ${{ inputs.target }} == "frontend" ]]; then
if [ -n "${{ inputs.base_image_tag }}" ]; then EPP_IMAGE_ARG="--build-arg EPP_IMAGE=${EPP_IMAGE}"
EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} "
fi
if [ -n "${{ inputs.runtime_image_tag }}" ]; then
EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} "
fi
if [ -n "${{ inputs.cuda_version }}" ]; then
EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} "
fi
if [ -n "${{ inputs.dynamo_base_image }}" ]; then
EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}"
fi
if [ -n "${{ inputs.enable_kvbm }}" ]; then
EXTRA_ARGS+=" --build-arg ENABLE_KVBM=${{ inputs.enable_kvbm }}"
fi fi
# Execute build and capture output (show on console AND save to file) docker buildx build \
./container/build.sh --tag "$IMAGE_TAG" \ --progress=plain \
--target ${{ inputs.target }} \ --tag "$IMAGE_TAG" \
--vllm-max-jobs 10 \ --load \
--framework ${{ inputs.framework }} \ -f ./container/rendered.Dockerfile \
--platform ${{ inputs.platform }} \ $CACHE_ARGS \
--use-sccache \ $EPP_IMAGE_ARG . 2>&1 | tee "${BUILD_LOG_FILE}"
--sccache-bucket "$SCCACHE_S3_BUCKET" \
--sccache-region "$AWS_DEFAULT_REGION" $EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
BUILD_EXIT_CODE=${PIPESTATUS[0]} BUILD_EXIT_CODE=${PIPESTATUS[0]}
......
...@@ -13,6 +13,9 @@ inputs: ...@@ -13,6 +13,9 @@ inputs:
description: 'Docker platform to build on, ie. linux/amd64' description: 'Docker platform to build on, ie. linux/amd64'
required: false required: false
default: 'linux/amd64' default: 'linux/amd64'
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: true
image_tag: image_tag:
description: 'Custom image tag' description: 'Custom image tag'
required: true required: true
...@@ -34,21 +37,6 @@ inputs: ...@@ -34,21 +37,6 @@ inputs:
aws_secret_access_key: aws_secret_access_key:
description: 'AWS Secret Access Key' description: 'AWS Secret Access Key'
required: false required: false
base_image_tag:
description: 'Optional override for base image tag passed to build.sh'
required: false
runtime_image_tag:
description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
required: false
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: true
enable_kvbm:
description: 'Enable KVBM support (optional)'
required: false
dynamo_base_image:
description: 'Pre-built Dynamo base image to use instead of building from scratch'
required: false
no_cache: no_cache:
description: 'Disable Docker build cache' description: 'Disable Docker build cache'
required: false required: false
...@@ -117,21 +105,20 @@ runs: ...@@ -117,21 +105,20 @@ runs:
# Collect optional overrides provided by the workflow # Collect optional overrides provided by the workflow
# Set base cache args and set --cache-to if this is a main commit # Set base cache args and set --cache-to if this is a main commit
# TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export # TODO: Fix this - Skip cache for frontend target - a different docker driver is used for the EPP build, which causes issues with cache export
EXTRA_ARGS="" CACHE_ARGS=""
if [[ "${{ inputs.target }}" != "frontend" ]]; then if [[ "${{ inputs.target }}" != "frontend" ]]; then
EXTRA_ARGS="--cache-to type=inline " CACHE_ARGS="--cache-to type=inline "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache " CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} " CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
EXTRA_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache " CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
# Release branches also use release cache # Release branches also use release cache
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max " CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
elif [[ "$GITHUB_REF_NAME" == "main" ]]; then elif [[ "$GITHUB_REF_NAME" == "main" ]]; then
EXTRA_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max " CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
fi fi
fi fi
echo "$CACHE_ARGS"
echo "$EXTRA_ARGS"
# Collect optional overrides provided by the workflow # Collect optional overrides provided by the workflow
if [[ "${{ inputs.ci }}" == "true" ]]; then if [[ "${{ inputs.ci }}" == "true" ]]; then
...@@ -139,21 +126,6 @@ runs: ...@@ -139,21 +126,6 @@ runs:
EXTRA_ARGS+=" --ci" EXTRA_ARGS+=" --ci"
fi fi
if [ -n "${{ inputs.base_image_tag }}" ]; then
EXTRA_ARGS+="--base-image-tag ${{ inputs.base_image_tag }} "
fi
if [ -n "${{ inputs.runtime_image_tag }}" ]; then
EXTRA_ARGS+="--build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }} "
fi
if [ -n "${{ inputs.cuda_version }}" ]; then
EXTRA_ARGS+="--build-arg CUDA_VERSION=${{ inputs.cuda_version }} "
fi
if [ -n "${{ inputs.dynamo_base_image }}" ]; then
EXTRA_ARGS+=" --dynamo-base-image ${{ inputs.dynamo_base_image }}"
fi
if [ -n "${{ inputs.enable_kvbm }}" ]; then
EXTRA_ARGS+=" --build-arg ENABLE_KVBM=${{ inputs.enable_kvbm }}"
fi
if [ "${{ inputs.no_cache }}" == "true" ]; then if [ "${{ inputs.no_cache }}" == "true" ]; then
EXTRA_ARGS+=" --no-cache" EXTRA_ARGS+=" --no-cache"
fi fi
...@@ -161,9 +133,9 @@ runs: ...@@ -161,9 +133,9 @@ runs:
EXTRA_ARGS+=" --build-arg CARGO_BUILD_JOBS=4 --use-sccache" EXTRA_ARGS+=" --build-arg CARGO_BUILD_JOBS=4 --use-sccache"
fi fi
if [ "${{ inputs.push_image }}" == "true" ]; then if [ "${{ inputs.push_image }}" == "true" ]; then
EXTRA_ARGS+=" --push --no-load" EXTRA_ARGS+=" --push"
elif [ "${{ inputs.no_load }}" == "true" ]; then elif [ "${{ inputs.no_load }}" == "false" ]; then
EXTRA_ARGS+=" --no-load" EXTRA_ARGS+=" --load"
fi fi
# Add extra tags (each as a separate --tag argument) # Add extra tags (each as a separate --tag argument)
...@@ -176,16 +148,14 @@ runs: ...@@ -176,16 +148,14 @@ runs:
done <<< "$EXTRA_TAGS" done <<< "$EXTRA_TAGS"
fi fi
# Execute build and capture output (show on console AND save to file) docker buildx build \
./container/build.sh --tag "$IMAGE_TAG" \ --progress=plain \
--target ${{ inputs.target }} \ --tag "$IMAGE_TAG" \
--vllm-max-jobs 10 \ --platform linux/${{ inputs.platform }} \
--no-tag-latest \ -f ./container/rendered.Dockerfile \
--framework ${{ inputs.framework }} \ $CACHE_ARGS \
--platform ${{ inputs.platform }} \ $EXTRA_ARGS \
--sccache-bucket "$SCCACHE_S3_BUCKET" \ $EPP_IMAGE_ARG . 2>&1 | tee "${BUILD_LOG_FILE}"
--sccache-region "$AWS_DEFAULT_REGION" \
$EXTRA_ARGS 2>&1 | tee "${BUILD_LOG_FILE}"
BUILD_EXIT_CODE=${PIPESTATUS[0]} BUILD_EXIT_CODE=${PIPESTATUS[0]}
......
...@@ -63,6 +63,9 @@ ignore: ...@@ -63,6 +63,9 @@ ignore:
- 'container/run.sh' - 'container/run.sh'
- 'container/use-sccache.sh' - 'container/use-sccache.sh'
- 'container/dev/**' - 'container/dev/**'
- 'container/templates/aws.Dockerfile'
- 'container/templates/local_dev.Dockerfile'
- 'container/templates/dev.Dockerfile'
ci: &ci ci: &ci
- '.github/workflows/**' - '.github/workflows/**'
...@@ -78,6 +81,12 @@ core: ...@@ -78,6 +81,12 @@ core:
- *ci - *ci
- 'container/build.sh' - 'container/build.sh'
- 'container/Dockerfile' - 'container/Dockerfile'
- 'container/render.py'
- 'container/Dockerfile.template'
- 'container/context.yaml'
- 'container/templates/args.Dockerfile'
- 'container/templates/dynamo_*'
- 'container/templates/wheel_builder.Dockerfile'
- '.dockerignore' - '.dockerignore'
- 'container/deps/*' - 'container/deps/*'
- '.cargo/config.toml' - '.cargo/config.toml'
...@@ -120,6 +129,7 @@ vllm: ...@@ -120,6 +129,7 @@ vllm:
- 'container/deps/vllm/**' - 'container/deps/vllm/**'
- 'examples/backends/vllm/**' - 'examples/backends/vllm/**'
- 'components/src/dynamo/vllm/**' - 'components/src/dynamo/vllm/**'
- 'container/templates/vllm_*'
sglang: sglang:
- '!**/*.md' - '!**/*.md'
...@@ -127,6 +137,7 @@ sglang: ...@@ -127,6 +137,7 @@ sglang:
- 'container/Dockerfile.sglang' - 'container/Dockerfile.sglang'
- 'examples/backends/sglang/**' - 'examples/backends/sglang/**'
- 'components/src/dynamo/sglang/**' - 'components/src/dynamo/sglang/**'
- 'container/templates/sglang_*'
trtllm: trtllm:
- '!**/*.md' - '!**/*.md'
...@@ -136,6 +147,7 @@ trtllm: ...@@ -136,6 +147,7 @@ trtllm:
- 'examples/backends/trtllm/**' - 'examples/backends/trtllm/**'
- 'components/src/dynamo/trtllm/**' - 'components/src/dynamo/trtllm/**'
- 'container/build_trtllm_wheel.sh' - 'container/build_trtllm_wheel.sh'
- 'container/templates/trtllm_*'
frontend: frontend:
- '!**/*.md' - '!**/*.md'
...@@ -153,6 +165,7 @@ frontend: ...@@ -153,6 +165,7 @@ frontend:
- 'components/src/dynamo/frontend/**' - 'components/src/dynamo/frontend/**'
- 'components/src/dynamo/common/**' - 'components/src/dynamo/common/**'
- 'deploy/inference-gateway/**' - 'deploy/inference-gateway/**'
- 'container/templates/frontend.Dockerfile'
rust: rust:
- '.github/workflows/pre-merge.yml' - '.github/workflows/pre-merge.yml'
......
...@@ -68,21 +68,6 @@ jobs: ...@@ -68,21 +68,6 @@ jobs:
uses: actions/setup-go@v5 uses: actions/setup-go@v5
with: with:
go-version: '1.24' go-version: '1.24'
- name: Install dependencies
shell: bash
run: |
set -euo pipefail
# Install system dependencies from apt
sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
# Install Rust (cargo + rustc)
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
# Make cargo available to later steps
echo "$HOME/.cargo/bin" >> "$GITHUB_PATH"
- name: Install cbindgen
shell: bash
run: |
set -euo pipefail
cargo install cbindgen
- name: Docker Login - name: Docker Login
uses: ./.github/actions/docker-login uses: ./.github/actions/docker-login
with: with:
...@@ -95,10 +80,10 @@ jobs: ...@@ -95,10 +80,10 @@ jobs:
id: build-image id: build-image
uses: ./.github/actions/docker-build uses: ./.github/actions/docker-build
env: env:
PLATFORMS: linux/${{ matrix.platform.arch }} PLATFORMS: ${{ matrix.platform.arch }}
TARGETARCH: ${{ matrix.platform.arch }} TARGETARCH: ${{ matrix.platform.arch }}
with: with:
framework: none framework: dynamo
target: frontend target: frontend
platform: ${{ env.PLATFORMS }} platform: ${{ env.PLATFORMS }}
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
......
...@@ -10,6 +10,10 @@ on: ...@@ -10,6 +10,10 @@ on:
description: 'Framework name (vllm, sglang, trtllm)' description: 'Framework name (vllm, sglang, trtllm)'
required: true required: true
type: string type: string
target:
description: 'Target stage for Docker rendering'
required: true
type: string
platforms: platforms:
description: 'Platforms to build (JSON array, e.g., ["amd64", "arm64"])' description: 'Platforms to build (JSON array, e.g., ["amd64", "arm64"])'
required: true required: true
...@@ -86,6 +90,7 @@ jobs: ...@@ -86,6 +90,7 @@ jobs:
with: with:
framework: ${{ inputs.framework }} framework: ${{ inputs.framework }}
platform: ${{ matrix.platform }} platform: ${{ matrix.platform }}
target: ${{ inputs.target }}
cuda_version: ${{ matrix.cuda_version }} cuda_version: ${{ matrix.cuda_version }}
extra_tags: ${{ inputs.extra_tags }} extra_tags: ${{ inputs.extra_tags }}
no_cache: ${{ inputs.no_cache }} no_cache: ${{ inputs.no_cache }}
......
...@@ -10,6 +10,10 @@ on: ...@@ -10,6 +10,10 @@ on:
description: 'Framework name (vllm, sglang, trtllm)' description: 'Framework name (vllm, sglang, trtllm)'
required: true required: true
type: string type: string
target:
description: 'Target stage for Docker rendering'
required: true
type: string
platform: platform:
description: 'Platform to build (amd64 or arm64)' description: 'Platform to build (amd64 or arm64)'
required: true required: true
...@@ -105,13 +109,17 @@ jobs: ...@@ -105,13 +109,17 @@ jobs:
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0 uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with: with:
lfs: true lfs: true
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 #v6.2.0
with:
python-version: '3.12'
pip-install: jinja2 pyyaml
- name: Calculate extra tags with platform suffix # will get redundant upon multi arch builds support - name: Calculate extra tags with platform suffix # will get redundant upon multi arch builds support
id: extra-tags id: extra-tags
shell: bash shell: bash
env: env:
EXTRA_TAGS: ${{ inputs.extra_tags }} EXTRA_TAGS: ${{ inputs.extra_tags }}
PLATFORM: ${{ inputs.platform }} PLATFORM: linux/${{ inputs.platform }}
run: | run: |
if [ -n "$EXTRA_TAGS" ]; then if [ -n "$EXTRA_TAGS" ]; then
RESULT="" RESULT=""
...@@ -134,7 +142,6 @@ jobs: ...@@ -134,7 +142,6 @@ jobs:
azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }} azure_acr_hostname: ${{ secrets.AZURE_ACR_HOSTNAME }}
azure_acr_user: ${{ secrets.AZURE_ACR_USER }} azure_acr_user: ${{ secrets.AZURE_ACR_USER }}
azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }} azure_acr_password: ${{ secrets.AZURE_ACR_PASSWORD }}
- name: Calculate target tag - name: Calculate target tag
id: calculate-target-tag id: calculate-target-tag
shell: bash shell: bash
...@@ -146,7 +153,6 @@ jobs: ...@@ -146,7 +153,6 @@ jobs:
echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT echo "default_target_image_uri=${DEFAULT_TARGET_IMAGE_URI}" >> $GITHUB_OUTPUT
echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT echo "target_tag_plain=${TARGET_TAG_PLAIN}" >> $GITHUB_OUTPUT
echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT echo "cuda_version_plain=${CUDA_VERSION}" >> $GITHUB_OUTPUT
- name: Initialize Dynamo Builder - name: Initialize Dynamo Builder
uses: ./.github/actions/init-dynamo-builder uses: ./.github/actions/init-dynamo-builder
with: with:
...@@ -154,28 +160,37 @@ jobs: ...@@ -154,28 +160,37 @@ jobs:
flavor: ${{ inputs.framework }} flavor: ${{ inputs.framework }}
arch: ${{ inputs.platform }} arch: ${{ inputs.platform }}
cuda_version: ${{ inputs.cuda_version }} cuda_version: ${{ inputs.cuda_version }}
- name: Print Build Container inputs - name: Print Build Container inputs
run: | run: |
echo "=== Build Container Inputs ===" echo "=== Build Container Inputs ==="
echo "image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}" echo "image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}"
echo "framework: ${{ inputs.framework }}" echo "framework: ${{ inputs.framework }}"
echo "target: runtime" echo "target: runtime"
echo "platform: linux/${{ inputs.platform }}" echo "platform: ${{ inputs.platform }}"
echo "cuda_version: ${{ inputs.cuda_version }}" echo "cuda_version: ${{ inputs.cuda_version }}"
echo "no_cache: ${{ inputs.no_cache }}" echo "no_cache: ${{ inputs.no_cache }}"
echo "extra_tags: ${{ steps.extra-tags.outputs.tags }}" echo "extra_tags: ${{ steps.extra-tags.outputs.tags }}"
echo "push_image: ${{ inputs.push_image }}" echo "push_image: ${{ inputs.push_image }}"
echo "no_load: ${{ inputs.no_load }}" echo "no_load: ${{ inputs.no_load }}"
- name: Generate Dockerfile
shell: bash
run: |
echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
python ./container/render.py \
--target=${{ inputs.target }} \
--framework=${{ inputs.framework }} \
--platform=${{ inputs.platform }} \
--cuda-version=${{ inputs.cuda_version }} \
--show-result \
--short-output
- name: Build Container - name: Build Container
id: build-image id: build-image
uses: ./.github/actions/docker-remote-build uses: ./.github/actions/docker-remote-build
with: with:
image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }} image_tag: ${{ steps.calculate-target-tag.outputs.default_target_image_uri }}
framework: ${{ inputs.framework }} framework: ${{ inputs.framework }}
target: runtime target: ${{ inputs.target }}
platform: linux/${{ inputs.platform }} platform: ${{ inputs.platform }}
cuda_version: ${{ inputs.cuda_version }} cuda_version: ${{ inputs.cuda_version }}
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
......
...@@ -99,17 +99,15 @@ jobs: ...@@ -99,17 +99,15 @@ jobs:
with: with:
framework: ${{ matrix.framework }} framework: ${{ matrix.framework }}
target: runtime target: runtime
platform: linux/amd64 platform: amd64
base_image_tag: '' cuda_version: '12.9'
runtime_image_tag: '' image_tag: runtime-${{ matrix.framework }}-amd64:${{ github.run_id }}
cuda_version: ''
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }} sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
image_tag: runtime-${{ matrix.framework }}-amd64:${{ github.run_id }}
- name: Tag and Push Runtime Images - name: Tag and Push Runtime Images
uses: ./.github/actions/docker-tag-push uses: ./.github/actions/docker-tag-push
with: with:
...@@ -132,13 +130,9 @@ jobs: ...@@ -132,13 +130,9 @@ jobs:
matrix: matrix:
include: include:
- framework: vllm - framework: vllm
base_image_tag: '25.06-cuda12.9-devel-ubuntu24.04'
runtime_image_tag: '12.9.0-runtime-ubuntu24.04'
cuda_version: '12.9' cuda_version: '12.9'
- framework: sglang - framework: sglang
base_image_tag: '' cuda_version: '12.9'
runtime_image_tag: ''
cuda_version: ''
env: env:
ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com ECR_HOSTNAME: ${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com
steps: steps:
...@@ -158,17 +152,15 @@ jobs: ...@@ -158,17 +152,15 @@ jobs:
with: with:
framework: ${{ matrix.framework }} framework: ${{ matrix.framework }}
target: runtime target: runtime
platform: linux/arm64 platform: arm64
base_image_tag: ${{ matrix.base_image_tag }}
runtime_image_tag: ${{ matrix.runtime_image_tag }}
cuda_version: ${{ matrix.cuda_version }} cuda_version: ${{ matrix.cuda_version }}
image_tag: runtime-${{ matrix.framework }}-arm64:${{ github.run_id }}
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }} sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
image_tag: runtime-${{ matrix.framework }}-arm64:${{ github.run_id }}
- name: Tag and Push Runtime Images - name: Tag and Push Runtime Images
uses: ./.github/actions/docker-tag-push uses: ./.github/actions/docker-tag-push
with: with:
...@@ -210,17 +202,15 @@ jobs: ...@@ -210,17 +202,15 @@ jobs:
with: with:
framework: ${{ matrix.framework }} framework: ${{ matrix.framework }}
target: runtime target: runtime
platform: linux/amd64 platform: amd64
base_image_tag: ''
runtime_image_tag: ''
cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }} cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
image_tag: runtime-${{ matrix.framework }}-cuda13-amd64:${{ github.run_id }}
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }} sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
image_tag: runtime-${{ matrix.framework }}-cuda13-amd64:${{ github.run_id }}
- name: Tag and Push CUDA 13 Runtime Images - name: Tag and Push CUDA 13 Runtime Images
uses: ./.github/actions/docker-tag-push uses: ./.github/actions/docker-tag-push
with: with:
...@@ -262,17 +252,15 @@ jobs: ...@@ -262,17 +252,15 @@ jobs:
with: with:
framework: ${{ matrix.framework }} framework: ${{ matrix.framework }}
target: runtime target: runtime
platform: linux/arm64 platform: arm64
base_image_tag: ''
runtime_image_tag: ''
cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }} cuda_version: ${{ matrix.framework == 'trtllm' && '13.1' || '13.0' }}
image_tag: runtime-${{ matrix.framework }}-cuda13-arm64:${{ github.run_id }}
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }} sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }} aws_account_id: ${{ secrets.AWS_ACCOUNT_ID }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
image_tag: runtime-${{ matrix.framework }}-cuda13-arm64:${{ github.run_id }}
- name: Tag and Push CUDA 13 Runtime Images - name: Tag and Push CUDA 13 Runtime Images
uses: ./.github/actions/docker-tag-push uses: ./.github/actions/docker-tag-push
with: with:
......
...@@ -66,11 +66,27 @@ jobs: ...@@ -66,11 +66,27 @@ jobs:
id: define_image_tag id: define_image_tag
run: | run: |
echo "image_tag=dynamo:latest-dev" >> $GITHUB_OUTPUT echo "image_tag=dynamo:latest-dev" >> $GITHUB_OUTPUT
- name: Generate Dockerfile
shell: bash
run: |
echo "Generating Dockerfile for target: ${{ inputs.target }} and framework: ${{ inputs.framework }}"
python ./container/render.py \
--target=dev \
--framework=dynamo \
--platform=amd64 \
--show-result \
--short-output
- name: Build image - name: Build image
env: env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }} GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
run: | run: |
./container/build.sh --tag ${{ steps.define_image_tag.outputs.image_tag }} --target dev --framework none --enable-kvbm --enable-media-ffmpeg docker buildx build \
--progress=plain \
--tag ${{ steps.define_image_tag.outputs.image_tag }} \
-f ./container/rendered.Dockerfile \
--build-arg ENABLE_MEDIA_FFMPEG=true \
--build-arg ENABLE_KVBM=true \
--load .
- name: Start services with docker-compose - name: Start services with docker-compose
working-directory: ./deploy working-directory: ./deploy
run: | run: |
......
...@@ -177,6 +177,7 @@ jobs: ...@@ -177,6 +177,7 @@ jobs:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with: with:
framework: vllm framework: vllm
target: runtime
platforms: '["amd64", "arm64"]' platforms: '["amd64", "arm64"]'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
extra_tags: | extra_tags: |
...@@ -194,6 +195,7 @@ jobs: ...@@ -194,6 +195,7 @@ jobs:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with: with:
framework: sglang framework: sglang
target: runtime
platforms: '["amd64", "arm64"]' platforms: '["amd64", "arm64"]'
cuda_versions: '["12.9", "13.0"]' cuda_versions: '["12.9", "13.0"]'
extra_tags: | extra_tags: |
...@@ -211,11 +213,12 @@ jobs: ...@@ -211,11 +213,12 @@ jobs:
uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml uses: ./.github/workflows/build-test-distribute-flavor-matrix.yml
with: with:
framework: trtllm framework: trtllm
target: runtime
platforms: '["amd64", "arm64"]' platforms: '["amd64", "arm64"]'
cuda_versions: '["13.1"]' cuda_versions: '["13.1"]'
extra_tags: | extra_tags: |
${{ github.ref_name == 'main' && 'main-trtllm' || '' }} ${{ github.ref_name == 'main' && 'main-trtllm' || '' }}
${{ github.ref_name == 'main' && format('main-trt-llm-{0}', github.sha) || '' }} ${{ github.ref_name == 'main' && format('main-trtllm-{0}', github.sha) || '' }}
builder_name: ${{ needs.changed-files.outputs.builder_name }} builder_name: ${{ needs.changed-files.outputs.builder_name }}
secrets: inherit secrets: inherit
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
.vs/ .vs/
.vscode/ .vscode/
.helix .helix
*rendered.Dockerfile
[Bb]inlog/ [Bb]inlog/
[Bb][Uu][Ii][Ll][Dd]/ [Bb][Uu][Ii][Ll][Dd]/
[Oo][Bb][Jj]/ [Oo][Bb][Jj]/
......
This diff is collapsed.
# syntax=docker/dockerfile:1.10.0-labs
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
{% include "templates/args.Dockerfile" %}
# --- Base Image Stages
{% if framework != "dynamo" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% include "templates/wheel_builder.Dockerfile" %}
{% elif framework == "dynamo" %}
{% if target == "frontend" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% include "templates/wheel_builder.Dockerfile" %}
{% include "templates/dynamo_runtime.Dockerfile" %}
{% include "templates/frontend.Dockerfile" %}
{% elif target == "runtime" or target == "dev" or target == "local-dev" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% include "templates/wheel_builder.Dockerfile" %}
{% include "templates/dynamo_runtime.Dockerfile" %}
{% elif target == "wheel_builder" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% include "templates/wheel_builder.Dockerfile" %}
{% elif target == "base" %}
{% include "templates/dynamo_base.Dockerfile" %}
{% endif %}
{% endif %}
# --- Framework Stages
{% if framework == "sglang" %}
# SGLang is the only framework without a `framework` target currently, needs special treatment
{% include "templates/sglang_runtime.Dockerfile" %}
{% elif target == "framework" and framework != "dynamo" %}
{% include "templates/" ~ framework ~ "_framework.Dockerfile" %}
{% elif ( target == "runtime" or target == "dev" or target == "local-dev" ) and framework != "dynamo" %}
{% include "templates/" ~ framework ~ "_framework.Dockerfile" %}
{% include "templates/" ~ framework ~ "_runtime.Dockerfile" %}
{% else %}
# --- No framework stages included
{% endif %}
{% if make_efa == true and target == "runtime" %}
{% include "templates/aws.Dockerfile" %}
{% endif %}
# --- Development Stages
{% if target == "dev" or target == "local-dev" %}
{% include "templates/dev.Dockerfile" %}
{% if make_efa == true %}
{% include "templates/aws.Dockerfile" %}
{% endif %}
{% if target == "local-dev" %}
{% include "templates/local_dev.Dockerfile" %}
{% endif %}
{% else %}
# --- No development stages included
{% endif %}
...@@ -6,22 +6,21 @@ The NVIDIA Dynamo project uses containerized development and deployment to maint ...@@ -6,22 +6,21 @@ The NVIDIA Dynamo project uses containerized development and deployment to maint
### Core Components ### Core Components
- **`build.sh`** - A Docker image builder that creates containers for different AI inference frameworks (vLLM, TensorRT-LLM, SGLang). It handles framework-specific dependencies, multi-stage builds, and development vs production configurations. - **`render.py`** - A render script used to generate Dockerfiles for AI inference frameworks (vLLM, TensorRT-LLM, SGLang). The generated Dockerfile includes the needed multi-stage steps for development vs production configurations
- **`run.sh`** - A container runtime manager that launches Docker containers with proper GPU access, volume mounts, and environment configurations. It supports different development workflows from root-based legacy setups to user-based development environments. - **`run.sh`** - A container runtime manager that launches Docker containers with proper GPU access, volume mounts, and environment configurations. It supports different development workflows from root-based legacy setups to user-based development environments.
- **Multiple Dockerfiles** - Framework-specific Dockerfiles that define the container images: ## Rendering Requirements:
- `Dockerfile.vllm` - For vLLM inference backend - Python
- `Dockerfile.trtllm` - For TensorRT-LLM inference backend - Python Packages:
- `Dockerfile.sglang` - For SGLang inference backend - pyyaml
- `Dockerfile` - Base/standalone configuration - jinja2
- `Dockerfile.epp` - For building the Endpoint Picker (EPP) image
### Stage Summary for Frameworks ### Stage Summary for Frameworks
<details> <details>
<summary>Show Stage Summary Table</summary> <summary>Show Stage Summary Table</summary>
Dockerfile.${FRAMEWORK} General Structure Dockerfile General Structure
Below is a summary of the general file structure for the framework Dockerfile stages. Some exceptions exist. Below is a summary of the general file structure for the framework Dockerfile stages. Some exceptions exist.
...@@ -80,14 +79,13 @@ The scripts in this directory abstract away the complexity of Docker commands wh ...@@ -80,14 +79,13 @@ The scripts in this directory abstract away the complexity of Docker commands wh
### Convenience Scripts vs Direct Docker Commands ### Convenience Scripts vs Direct Docker Commands
The `build.sh` and `run.sh` scripts are convenience wrappers that simplify common Docker operations. They automatically handle: The `run.sh` script and rendering scripts are convenience that simplify common Docker operations. They automatically handle:
- Framework-specific image selection and tagging
- GPU access configuration and runtime selection - GPU access configuration and runtime selection
- Volume mount setup for development workflows - Volume mount setup for development workflows
- Environment variable management - Environment variable management
- Build argument construction for multi-stage builds - Build argument construction for multi-stage builds
**You can always use Docker commands directly** if you prefer more control or want to customize beyond what the scripts provide. The scripts use `--dry-run` flags to show you the exact Docker commands they would execute, making it easy to understand and modify the underlying operations. **You can always use Docker commands directly** if you prefer more control or want to customize beyond what the scripts provide. The `run.sh` uses a `--dry-run` flag to show you the exact commands they would execute, making it easy to understand and modify the underlying operations.
## Development Targets Feature Matrix ## Development Targets Feature Matrix
...@@ -117,10 +115,11 @@ The `build.sh` and `run.sh` scripts are convenience wrappers that simplify commo ...@@ -117,10 +115,11 @@ The `build.sh` and `run.sh` scripts are convenience wrappers that simplify commo
### 1. runtime target (runs as non-root dynamo user): ### 1. runtime target (runs as non-root dynamo user):
```bash ```bash
# Build runtime image # Build runtime image
./build.sh --framework vllm --target runtime python container/render.py --framework vllm --target runtime --short-output
docker build -t dynamo:latest-vllm-runtime -f rendered.Dockerfile .
# Run runtime container # Run runtime container
./run.sh --image dynamo:latest-vllm-runtime -it container/run.sh --image dynamo:latest-vllm-runtime -it
``` ```
### 2. local-dev + `run.sh` (runs as dynamo user with matched host UID/GID): ### 2. local-dev + `run.sh` (runs as dynamo user with matched host UID/GID):
...@@ -133,20 +132,20 @@ Use VS Code/Cursor Dev Container Extension with devcontainer.json configuration. ...@@ -133,20 +132,20 @@ Use VS Code/Cursor Dev Container Extension with devcontainer.json configuration.
## Build and Run Scripts Overview ## Build and Run Scripts Overview
### build.sh - Docker Image Builder ### render.py - Docker Image Generator
The `build.sh` script is responsible for building Docker images for different AI inference frameworks. It supports multiple frameworks and configurations: The `render.py` script is responsible for generating Dockerfiles for different AI inference frameworks. It supports multiple frameworks and configurations:
**Purpose:** **Purpose:**
- Builds Docker images for NVIDIA Dynamo with support for vLLM, TensorRT-LLM, SGLang, or standalone configurations - Generates Dockerfiles for NVIDIA Dynamo with support for vLLM, TensorRT-LLM, SGLang, or standalone configurations
- Handles framework-specific dependencies and optimizations - Handles framework-specific dependencies and optimizations
- Manages build contexts, caching, and multi-stage builds - Manages build contexts, caching, and multi-stage builds
- Configures development vs production targets - Configures development vs production targets
**Key Features:** **Key Features:**
- **Framework Support**: vLLM (default when --framework not specified), TensorRT-LLM, SGLang, or NONE - **Framework Support**: vLLM (default when --framework not specified), TensorRT-LLM, SGLang, or NONE (standalone Dynamo)
- **Multi-stage Builds**: Build process with base images - **Multi-stage Builds**: Build process with base images
- **Development Targets**: Supports `dev`, `runtime`, and `local-dev` targets via `build.sh`. - **Development Targets**: Supports `dev`, `runtime`, and `local-dev` targets via `render.py`.
- **Build Caching**: Docker layer caching and sccache support - **Build Caching**: Docker layer caching and sccache support
- **GPU Optimization**: CUDA, EFA, and NIXL support - **GPU Optimization**: CUDA, EFA, and NIXL support
...@@ -221,52 +220,49 @@ Current cache types (as mounted in various Dockerfiles): ...@@ -221,52 +220,49 @@ Current cache types (as mounted in various Dockerfiles):
Note: `uv` commands set `UV_CACHE_DIR` per `RUN` so `uv` always uses the same path as the cache mount (instead of relying on `$HOME`). Note: `uv` commands set `UV_CACHE_DIR` per `RUN` so `uv` always uses the same path as the cache mount (instead of relying on `$HOME`).
**How `dev` / `local-dev` builds work:**
- `dev` and `local-dev` targets are defined in `container/dev/Dockerfile.dev`.
- The framework Dockerfiles (`Dockerfile.vllm`, `Dockerfile.trtllm`, `Dockerfile.sglang`, `Dockerfile`) define shared stages used by `Dockerfile.dev` (e.g. `runtime`, `dynamo_base`, `wheel_builder`).
- To build a single coherent Dockerfile, `build.sh` generates a temporary Dockerfile that is a literal concatenation of:
- the selected framework Dockerfile, then
- `container/dev/Dockerfile.dev`
`build.sh` then continues building normally using the temp Dockerfile path.
**Requirements and debugging:**
- By default the temp Dockerfile is deleted at the end of `build.sh`. To keep it for inspection, set `KEEP_DEV_DOCKERFILE_TEMP=1`.
> **💡 Tip**: The `dev` and `local-dev` images have source code baked in, but **using `--mount-workspace` with `run.sh` is recommended for development** to bind mount your local workspace for live editing. > **💡 Tip**: The `dev` and `local-dev` images have source code baked in, but **using `--mount-workspace` with `run.sh` is recommended for development** to bind mount your local workspace for live editing.
**Common Usage Examples:** **Common Usage Examples:**
```bash ```bash
# Build vLLM dev image called dynamo:latest-vllm (default). This runs as root and is for development. # Build vLLM dev image called dynamo:latest-vllm (default). This runs as root and is for development.
./build.sh python container/render.py --framework=vllm --target=dev --short-output
docker build -t dynamo:latest-vllm-dev -f rendered.Dockerfile .
# Build a local-dev image. The local-dev image will run as `dynamo` with UID/GID matched to your host user, # Build a local-dev image. The local-dev image will run as `dynamo` with UID/GID matched to your host user,
# which is useful when mounting partitions for development. # which is useful when mounting partitions for development.
./build.sh --framework vllm --target local-dev python container/render.py --framework=vllm --target=local-dev --short-output
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
# Build TensorRT-LLM development image called dynamo:latest-trtllm # Build TensorRT-LLM development image called dynamo:latest-trtllm
./build.sh --framework trtllm python container/render.py --framework=trtllm --target=runtime --short-output
docker build -t dynamo:latest-trtllm-runtime -f rendered.Dockerfile .
# Build with custom tag
./build.sh --framework sglang --tag my-custom-tag
# Dry run to see commands
./build.sh --dry-run
# Build with no cache
./build.sh --no-cache
# Build with build arguments
./build.sh --build-arg CUSTOM_ARG=value
``` ```
### Building the Frontend Image ### Building the Frontend Image
The frontend image is a specialized container that includes the Dynamo components (Dynamo, NIXL, etc) along with the Endpoint Picker (EPP) for Kubernetes Gateway API Inference Extension integration. This image is primarily used for inference gateway deployments. The frontend image is a specialized container that includes the Dynamo components (Dynamo, NIXL, etc) along with the Endpoint Picker (EPP) for Kubernetes Gateway API Inference Extension integration. This image is primarily used for inference gateway deployments.
**Build EPP Image**
```bash
sudo apt-get update && sudo apt-get install -y git build-essential protobuf-compiler libclang-dev
curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain stable
. "$HOME/.cargo/env"
cargo install cbindgen
pushd deploy/inference-gateway/epp
make all
popd
EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
EPP_IMAGE="dynamo/dynamo-epp:${EPP_GIT_TAG}"
```
**Build Frontend Image**
```bash ```bash
# Build the frontend image (automatically builds EPP image as a dependency) # Build the frontend image (automatically builds EPP image as a dependency)
./build.sh --framework none --target frontend python container/render.py --framework=dynamo --target=frontend --short-output
docker build -t dynamo:frontend --build-arg EPP_IMAGE=${EPP_IMAGE} -f rendered.Dockerfile .
``` ```
The build process automatically: The build process automatically:
...@@ -313,34 +309,34 @@ The `run.sh` script launches Docker containers with the appropriate configuratio ...@@ -313,34 +309,34 @@ The `run.sh` script launches Docker containers with the appropriate configuratio
```bash ```bash
# Basic container launch with dev image (runs as root by default, non-interactive) # Basic container launch with dev image (runs as root by default, non-interactive)
./run.sh --image dynamo:latest-vllm -v $HOME/.cache:/root/.cache container/run.sh --image dynamo:latest-vllm -v $HOME/.cache:/root/.cache
# Interactive development with workspace mounted using dev image (runs as root) # Interactive development with workspace mounted using dev image (runs as root)
./run.sh --image dynamo:latest-vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Interactive development with local-dev image (runs as dynamo user with matched host UID/GID) # Interactive development with local-dev image (runs as dynamo user with matched host UID/GID)
./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Use specific image and framework for development # Use specific image and framework for development
./run.sh --image v0.1.0.dev.08cc44965-vllm-local-dev --framework vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image v0.1.0.dev.08cc44965-vllm-local-dev --framework vllm --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Interactive development shell with workspace mounted (local-dev) # Interactive development shell with workspace mounted (local-dev)
./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it -- bash container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it -- bash
# Development with custom environment variables # Development with custom environment variables
./run.sh --image dynamo:latest-vllm-local-dev -e CUDA_VISIBLE_DEVICES=0,1 --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-local-dev -e CUDA_VISIBLE_DEVICES=0,1 --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Dry run to see docker command # Dry run to see docker command
./run.sh --dry-run container/run.sh --dry-run
# Development with custom volume mounts # Development with custom volume mounts
./run.sh --image dynamo:latest-vllm-local-dev -v /host/path:/container/path --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-local-dev -v /host/path:/container/path --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Run runtime image as non-root dynamo user (for production) # Run runtime image as non-root dynamo user (for production)
./run.sh --image dynamo:latest-vllm-runtime -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-runtime -v $HOME/.cache:/home/dynamo/.cache
# Run dev image as specific user (override default root) # Run dev image as specific user (override default root)
./run.sh --image dynamo:latest-vllm --user dynamo -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm --user dynamo -v $HOME/.cache:/home/dynamo/.cache
``` ```
### Network Configuration Options ### Network Configuration Options
...@@ -350,8 +346,8 @@ The `run.sh` script supports different networking modes via the `--network` flag ...@@ -350,8 +346,8 @@ The `run.sh` script supports different networking modes via the `--network` flag
#### Host Networking (Default) #### Host Networking (Default)
```bash ```bash
# Examples with dynamo user # Examples with dynamo user
./run.sh --image dynamo:latest-vllm-local-dev --network host -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-local-dev --network host -v $HOME/.cache:/home/dynamo/.cache
./run.sh --image dynamo:latest-vllm-local-dev -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-local-dev -v $HOME/.cache:/home/dynamo/.cache
``` ```
**Use cases:** **Use cases:**
- High-performance ML inference (default for GPU workloads) - High-performance ML inference (default for GPU workloads)
...@@ -364,7 +360,7 @@ The `run.sh` script supports different networking modes via the `--network` flag ...@@ -364,7 +360,7 @@ The `run.sh` script supports different networking modes via the `--network` flag
#### Bridge Networking (Isolated) #### Bridge Networking (Isolated)
```bash ```bash
# CI/testing with isolated bridge networking and host cache sharing (no -it for automated CI) # CI/testing with isolated bridge networking and host cache sharing (no -it for automated CI)
./run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache
``` ```
**Use cases:** **Use cases:**
- Secure isolation from host network - Secure isolation from host network
...@@ -377,10 +373,10 @@ The `run.sh` script supports different networking modes via the `--network` flag ...@@ -377,10 +373,10 @@ The `run.sh` script supports different networking modes via the `--network` flag
#### No Networking ⚠️ **LIMITED FUNCTIONALITY** #### No Networking ⚠️ **LIMITED FUNCTIONALITY**
```bash ```bash
# Complete network isolation - no external connectivity # Complete network isolation - no external connectivity
./run.sh --image dynamo:latest-vllm --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
# Same with local-dev image (dynamo user with matched host UID/GID) # Same with local-dev image (dynamo user with matched host UID/GID)
./run.sh --image dynamo:latest-vllm-local-dev --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-local-dev --network none --mount-workspace -it -v $HOME/.cache:/home/dynamo/.cache
``` ```
**⚠️ WARNING: `--network none` severely limits Dynamo functionality:** **⚠️ WARNING: `--network none` severely limits Dynamo functionality:**
- **No model downloads** - HuggingFace models cannot be downloaded - **No model downloads** - HuggingFace models cannot be downloaded
...@@ -427,11 +423,12 @@ See Docker documentation for custom network creation and management. ...@@ -427,11 +423,12 @@ See Docker documentation for custom network creation and management.
### Development Workflow ### Development Workflow
```bash ```bash
# 1. Build local-dev image (builds runtime, then dev as intermediate, then local-dev as final image) # 1. Build local-dev image (builds runtime, then dev as intermediate, then local-dev as final image)
./build.sh --framework vllm --target local-dev python container/render.py --framework=vllm --target=local-dev --short-output
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
# 2. Run development container using the local-dev image # 2. Run development container using the local-dev image
# RECOMMENDED: --mount-workspace for live editing in dev and local-dev images # RECOMMENDED: --mount-workspace for live editing in dev and local-dev images
./run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it container/run.sh --image dynamo:latest-vllm-local-dev --mount-workspace -v $HOME/.cache:/home/dynamo/.cache -it
# 3. Inside container, run inference (requires both frontend and backend) # 3. Inside container, run inference (requires both frontend and backend)
# Start frontend # Start frontend
...@@ -444,19 +441,21 @@ python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.20 & ...@@ -444,19 +441,21 @@ python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.20 &
### Production Workflow ### Production Workflow
```bash ```bash
# 1. Build production runtime image (runs as non-root dynamo user) # 1. Build production runtime image (runs as non-root dynamo user)
./build.sh --framework vllm --target runtime python container/render.py --framework=vllm --target=runtime --short-output
docker build -t dynamo:latest-vllm-runtime -f rendered.Dockerfile .
# 2. Run production container as non-root dynamo user # 2. Run production container as non-root dynamo user
./run.sh --image dynamo:latest-vllm-runtime --gpus all -v $HOME/.cache:/home/dynamo/.cache container/run.sh --image dynamo:latest-vllm-runtime --gpus all -v $HOME/.cache:/home/dynamo/.cache
``` ```
### Testing Workflow ### Testing Workflow
```bash ```bash
# 1. Build dev image # 1. Build dev image
./build.sh --framework vllm --no-cache python container/render.py --framework=vllm --target=dev --short-output
docker build -t dynamo:latest-vllm-dev -f rendered.Dockerfile .
# 2. Run tests with network isolation for reproducible results (no -it needed for CI) # 2. Run tests with network isolation for reproducible results (no -it needed for CI)
./run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache -- python -m pytest tests/ container/run.sh --image dynamo:latest-vllm --mount-workspace --network bridge -v $HOME/.cache:/home/dynamo/.cache -- python -m pytest tests/
# 3. Inside the container with bridge networking, start services # 3. Inside the container with bridge networking, start services
# Note: Services are only accessible from the same container - no port conflicts with host # Note: Services are only accessible from the same container - no port conflicts with host
......
This diff is collapsed.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# This file represents the default ARG values of Dockerfiles generated
# by render.py. These are the recommended default values for users and
# is the source of truth for the values used in our delivered images.
#
# Some ARGs have multiple valid values and can be changed for local testing,
# you can do so locally in this file, or pass the --build-arg into docker build
# when building.
dynamo:
base_image: nvcr.io/nvidia/cuda-dl-base
base_image_tag: 25.01-cuda12.8-devel-ubuntu24.04
epp_image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:v0.5.1
frontend_image: nvcr.io/nvidia/base/ubuntu:noble-20250619
python_version: "3.12"
nats_version: v2.10.28
etcd_version: v3.5.21
nixl_ref: 0.9.0
nixl_ucx_ref: v1.20.0
nixl_gdrcopy_ref: v2.5.1
nixl_ucx_efa_ref: 9d2b88a1f67faf9876f267658bd077b379b8bb76
nixl_libfabric_ref: v2.3.0
enable_kvbm: "false"
enable_media_nixl: "false"
enable_media_ffmpeg: "false"
enable_gpu_memory_service: "false"
ffmpeg_version: "7.1"
efa_version: 1.45.1
vllm:
base_image: nvcr.io/nvidia/cuda-dl-base
runtime_image: nvcr.io/nvidia/cuda
runtime_image_tag: ${CUDA_VERSION}.0-runtime-ubuntu24.04
vllm_ref: v0.14.1
flashinf_ref: v0.5.3
lmcache_ref: 0.3.12
max_jobs: "10"
enable_media_nixl: "true"
enable_media_ffmpeg: "true"
enable_gpu_memory_service: "true"
enable_kvbm: "true"
cuda12.9:
base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
cuda13.0:
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
sglang:
base_image: nvcr.io/nvidia/cuda-dl-base
runtime_image: lmsysorg/sglang
cuda12.9:
base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
runtime_image_tag: v0.5.7-runtime
cuda13.0:
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
runtime_image_tag: v0.5.8-cu130-runtime
enable_media_nixl: "true"
enable_media_ffmpeg: "true"
enable_gpu_memory_service: "true"
enable_kvbm: "false"
trtllm:
base_image: nvcr.io/nvidia/pytorch
base_image_tag: 25.12-py3
runtime_image: nvcr.io/nvidia/cuda-dl-base
runtime_image_tag: 25.10-cuda13.0-runtime-ubuntu24.04
enable_media_nixl: "true"
enable_media_ffmpeg: "true"
enable_gpu_memory_service: "false"
enable_kvbm: "true"
python_version: "3.12"
index_url: https://pypi.nvidia.com/
pip_wheel_dir: /tmp/trtllm_wheel/
pip_wheel: tensorrt-llm==1.3.0rc1
trtllm_wheel_image: nvcr.io/nvidia/tensorrt-llm/release:${TENSORRTLLM_PIP_WHEEL#*==}
github_trtllm_commit: 1.2.0rc6
torch_version: 2.10.0a0+b4e4ee81d3.nv25.12
torch_tensorrt_version: 2.10.0a0
torchvision_version: 0.25.0a0+ca221243
torchao_ver: 0.15.0+git01374eb5
torchdata_ver: 0.11.0
torchtitan_ver: 0.2.0
jinja2_version: 3.1.6
sympy_version: 1.14.0
pytorch_triton_ver: 3.5.1+gitbfeb0668.nv25.12
flash_attn_version: 2.7.4.post1+25.12
flashinfer_python_ver: 0.6.1
has_trtllm_context: "0"
#!/usr/bin/env python3
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import argparse
import re
import sys
from pathlib import Path
import yaml
from jinja2 import Environment, FileSystemLoader
def parse_args():
parser = argparse.ArgumentParser(
description="Renders dynamo Dockerfiles from templates"
)
parser.add_argument(
"--framework",
type=str,
default="vllm",
help="Dockerfile framework to use [dynamo, vllm, sglang, trtllm]",
)
parser.add_argument(
"--target",
type=str,
default="runtime",
help="Dockerfile target to use. Non-exhaustive examples: [runtime, dev, local-dev]",
)
parser.add_argument(
"--platform",
type=str,
default="amd64",
help="Dockerfile platform to use. [amdg64, arm64]",
)
parser.add_argument(
"--cuda-version",
type=str,
default="12.9",
help="CUDA version to use. [12.9, 13.0]",
)
parser.add_argument("--make-efa", action="store_true", help="Enable AWS EFA")
parser.add_argument(
"--short-output",
action="store_true",
help="Output filename is just rendered.Dockerfile",
)
parser.add_argument(
"--show-result",
action="store_true",
help="Prints the rendered Dockerfile to stdout.",
)
args = parser.parse_args()
return args
def validate_args(args):
# TODO: Add validation logic
return
def render(args, context, script_dir):
env = Environment(
loader=FileSystemLoader(script_dir), trim_blocks=False, lstrip_blocks=True
)
template = env.get_template("Dockerfile.template")
rendered = template.render(
context=context,
framework=args.framework,
target=args.target,
platform=args.platform,
cuda_version=args.cuda_version,
make_efa=args.make_efa,
)
# Replace all instances of 3+ newlines with 2 newlines
cleaned = re.sub(r"\n{3,}", "\n\n", rendered)
if args.short_output:
filename = "rendered.Dockerfile"
else:
filename = f"{args.framework}-{args.target}-cuda{args.cuda_version}-{args.platform}-rendered.Dockerfile"
with open(f"{script_dir}/{filename}", "w") as f:
f.write(cleaned)
if args.show_result:
print("##############")
print("# Dockerfile #")
print("##############")
print(cleaned)
print("##############")
print(f"INFO: Generated Dockerfile written to {script_dir}/{filename}")
return
def main():
args = parse_args()
validate_args(args)
script_dir = Path(sys.argv[0]).parent
with open(f"{script_dir}/context.yaml", "r") as f:
context = yaml.safe_load(f)
render(args, context, script_dir)
if args.target == "local-dev":
print(
"INFO: Remember to add --build-arg values for USER_UID and USER_GID when building a local-dev image!"
)
print(
" Recommendation: --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g)"
)
if __name__ == "__main__":
main()
{#
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
##########################
#### Build Arguments #####
##########################
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH_ALT: Used for Rust targets, manylinux suffix (e.g., x86_64, aarch64)
#
# Default values are for x86/amd64:
# --build-arg ARCH=amd64 --build-arg ARCH_ALT=x86_64
#
# For arm64/aarch64, build with:
# --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64
#TODO OPS-592: Leverage uname -m to determine ARCH instead of passing it as an arg
ARG ARCH={{ platform }}
ARG ARCH_ALT={{ "x86_64" if platform == "amd64" else "aarch64" }}
# Python/CUDA configuration
ARG PYTHON_VERSION={{ context.dynamo.python_version }}
ARG CUDA_VERSION={{ cuda_version }}
ARG CUDA_MAJOR=${CUDA_VERSION%%.*}
{% if framework == "vllm" or framework == "sglang" -%}
{% set cuda_context_key = "cuda" + cuda_version %}
# Base image configuration
ARG BASE_IMAGE={{ context[framework].base_image }}
ARG BASE_IMAGE_TAG={{ context[framework][cuda_context_key].base_image_tag }}
{% elif framework != "vllm" and framework != "sglang" -%}
ARG BASE_IMAGE={{ context[framework].base_image }}
ARG BASE_IMAGE_TAG={{ context[framework].base_image_tag }}
{%- endif %}
{% if framework == "sglang" -%}
{% set cuda_context_key = "cuda" + cuda_version %}
# Base image configuration
ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
ARG RUNTIME_IMAGE_TAG={{ context[framework][cuda_context_key].runtime_image_tag }}
{% elif framework != "dynamo" -%}
ARG RUNTIME_IMAGE={{ context[framework].runtime_image }}
ARG RUNTIME_IMAGE_TAG={{ context[framework].runtime_image_tag }}
{%- endif %}
# Build configuration
ARG ENABLE_KVBM={{ context[framework].enable_kvbm }}
ARG CARGO_BUILD_JOBS
ARG NATS_VERSION={{ context.dynamo.nats_version }}
ARG ETCD_VERSION={{ context.dynamo.etcd_version }}
ARG ENABLE_MEDIA_NIXL={{ context[framework].enable_media_nixl }}
ARG ENABLE_MEDIA_FFMPEG={{ context[framework].enable_media_ffmpeg }}
ARG FFMPEG_VERSION={{ context.dynamo.ffmpeg_version }}
ARG ENABLE_GPU_MEMORY_SERVICE={{ context[framework].enable_gpu_memory_service }}
# SCCACHE configuration
ARG USE_SCCACHE
ARG SCCACHE_BUCKET=""
ARG SCCACHE_REGION=""
# NIXL configuration
ARG NIXL_UCX_REF={{ context.dynamo.nixl_ucx_ref }}
ARG NIXL_REF={{ context.dynamo.nixl_ref }}
ARG NIXL_GDRCOPY_REF={{ context.dynamo.nixl_gdrcopy_ref }}
ARG NIXL_LIBFABRIC_REF={{ context.dynamo.nixl_libfabric_ref }}
{% if target == "dev" or target == "local-dev" %}
ARG FRAMEWORK={{ framework }}
{% endif %}
{% if target == "frontend" %}
ARG EPP_IMAGE={{ context.dynamo.epp_image }}
ARG FRONTEND_IMAGE={{ context.dynamo.frontend_image }}
{% endif %}
{% if framework == "vllm" -%}
# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF={{ context.vllm.vllm_ref }}
ARG MAX_JOBS={{ context.vllm.max_jobs }}
# FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
ARG FLASHINF_REF={{ context.vllm.flashinf_ref }}
ARG LMCACHE_REF={{ context.vllm.lmcache_ref }}
# If left blank, then we will fallback to vLLM defaults
ARG DEEPGEMM_REF=""
{%- endif -%}
{% if framework == "trtllm" %}
# TensorRT-LLM specific configuration
ARG HAS_TRTLLM_CONTEXT={{ context.trtllm.has_trtllm_context }}
ARG TENSORRTLLM_PIP_WHEEL={{ context.trtllm.pip_wheel }}
ARG TENSORRTLLM_INDEX_URL={{ context.trtllm.index_url }}
ARG GITHUB_TRTLLM_COMMIT={{ context.trtllm.github_trtllm_commit }}
ARG TRTLLM_WHEEL_IMAGE={{ context.trtllm.trtllm_wheel_image }}
# Copy pytorch installation from NGC PyTorch
ARG FLASHINFER_PYTHON_VER={{ context.trtllm.flashinfer_python_ver }}
ARG PYTORCH_TRITON_VER={{ context.trtllm.pytorch_triton_ver }}
ARG TORCHAO_VER={{ context.trtllm.torchao_ver }}
ARG TORCHDATA_VER={{ context.trtllm.torchdata_ver }}
ARG TORCHTITAN_VER={{ context.trtllm.torchtitan_ver }}
ARG TORCH_VER={{ context.trtllm.torch_version }}
ARG TORCH_TENSORRT_VER={{ context.trtllm.torch_tensorrt_version }}
ARG TORCHVISION_VER={{ context.trtllm.torchvision_version }}
ARG JINJA2_VER={{ context.trtllm.jinja2_version }}
ARG SYMPY_VER={{ context.trtllm.sympy_version }}
ARG FLASH_ATTN_VER={{ context.trtllm.flash_attn_version }}
# Python configuration
ARG TRTLLM_PYTHON_VERSION={{ context[framework].python_version }}
{%- endif -%}
{% if make_efa == true %}
ARG EFA_VERSION={{ context.dynamo.efa_version }}
ARG EFA_BASE_IMAGE={{ "runtime" if target=="runtime" else "dev" }}
{%- endif -%}
\ No newline at end of file
# syntax=docker/dockerfile:1.10.0 {#
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
#}
#############################
########## AWS EFA ##########
#############################
# #
# PURPOSE: AWS EFA support layer # This stage extends the runtime/dev stage with AWS EFA installer
#
# This Dockerfile adds AWS EFA (Elastic Fabric Adapter) support on top of
# the runtime or dev stages from framework Dockerfiles (vllm, sglang, trtllm).
#
# Usage (via build.sh with --make-efa flag):
# ./build.sh --framework vllm --target runtime --make-efa
# ./build.sh --framework vllm --target local-dev --make-efa
ARG BASE_IMAGE
ARG EFA_VERSION
###########################################################
########## Runtime with AWS EFA ##########################
###########################################################
#
# This stage extends the runtime stage with AWS EFA installer
# which includes: libfabric and aws-ofi-nccl plugin # which includes: libfabric and aws-ofi-nccl plugin
# #
# Use this stage when deploying on AWS infrastructure with EFA support # Use this stage when deploying on AWS infrastructure with EFA support
FROM ${BASE_IMAGE} AS runtime-aws FROM ${EFA_BASE_IMAGE} AS aws
ARG EFA_VERSION ARG EFA_VERSION
{% if target == "runtime" %}
USER root USER root
{% endif %}
# Install AWS EFA installer with bundled libfabric and aws-ofi-nccl # Install AWS EFA installer with bundled libfabric and aws-ofi-nccl
# Flags explanation: # Flags explanation:
...@@ -48,43 +38,9 @@ RUN mkdir -p /tmp/efa && \ ...@@ -48,43 +38,9 @@ RUN mkdir -p /tmp/efa && \
ENV EFA_VERSION="${EFA_VERSION}" ENV EFA_VERSION="${EFA_VERSION}"
{% if target == "runtime" %}
USER dynamo USER dynamo
{% endif %}
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD []
########################################################################
########## Development with AWS EFA (run.sh, runs as root user) ########
########################################################################
#
# PURPOSE: Development environment with AWS EFA support
#
# This stage extends dev stages with development tools for building and
# debugging on EFA-enabled AWS instances.
FROM ${BASE_IMAGE} AS dev-aws
ARG EFA_VERSION
# Dev stage runs as root, no USER switch needed
# Install AWS EFA installer with bundled libfabric and aws-ofi-nccl
# Flags explanation:
# --skip-kmod: Skip kernel module installation (handled by host)
# --skip-limit-conf: Skip ulimit configuration (handled by container runtime)
# --no-verify: Skip GPG verification (optional, can be removed if verification is needed)
RUN mkdir -p /tmp/efa && \
cd /tmp/efa && \
curl --retry 3 --retry-delay 2 -fsSL -o aws-efa-installer-${EFA_VERSION}.tar.gz \
https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_VERSION}.tar.gz && \
tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz && \
cd aws-efa-installer && \
apt-get update && \
./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify && \
rm -rf /tmp/efa && \
rm -rf /opt/amazon/aws-ofi-nccl && \
ldconfig
ENV EFA_VERSION="${EFA_VERSION}"
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment