Unverified Commit 3188c70a authored by Dillon Cullinan's avatar Dillon Cullinan Committed by GitHub
Browse files

chore: Templating Feedback Followup (#6125)


Signed-off-by: default avatarDillon Cullinan <dcullinan@nvidia.com>
parent 33ddd8fb
......@@ -143,7 +143,7 @@ Build the appropriate framework image (e.g., `dynamo:latest-vllm-local-dev`) fro
```bash
# Single command approach (recommended)
export FRAMEWORK=VLLM # Note: any of VLLM, SGLANG, TRTLLM can be used
python container/render.py --framework=${FRAMEWORK} --target=local-dev --short-output
python container/render.py --framework=${FRAMEWORK} --target=local-dev --output-short-filename
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile .
# Now you've created both dynamo:latest-vllm and dynamo:latest-vllm-local-dev
......@@ -416,7 +416,7 @@ If you see errors like "container is not running" or "An error occurred setting
# If missing, build the dev image first, then build local-dev
export FRAMEWORK=VLLM # Replace with VLLM, SGLANG, or TRTLLM
python container/render.py --framework=${FRAMEWORK} --target=local-dev --short-output
python container/render.py --framework=${FRAMEWORK} --target=local-dev --output-short-filename
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile .
```
......
......@@ -42,7 +42,9 @@
**/target/*
**/*safetensors
container/Dockerfile*
container/*.Dockerfile
container/**/*.Dockerfile
container/render.py
container/context.yaml
.venv
.venv-docs
......
......@@ -15,7 +15,7 @@ inputs:
required: false
default: 'linux/amd64'
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
description: 'CUDA version to use'
required: true
image_tag:
description: 'Custom image tag (optional, defaults to framework:latest)'
......@@ -77,7 +77,7 @@ runs:
--platform=${{ inputs.platform }} \
--cuda-version=${{ inputs.cuda_version }} \
--show-result \
--short-output
--output-short-filename
echo "::endgroup::"
- name: Build EPP image
if: ${{ inputs.target == 'frontend' }}
......
......@@ -14,7 +14,7 @@ inputs:
required: false
default: 'linux/amd64'
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
description: 'CUDA version to use'
required: true
image_tag:
description: 'Custom image tag'
......@@ -98,7 +98,7 @@ runs:
# Create build logs directory
mkdir -p build-logs
BUILD_LOG_FILE="build-logs/build-${{ inputs.framework }}-$(echo '${{ inputs.platform }}' | sed 's/linux\///').log"
BUILD_LOG_FILE="build-logs/build-${{ inputs.framework }}-${{ inputs.platform }}.log"
echo "BUILD_LOG_FILE=${BUILD_LOG_FILE}" >> $GITHUB_ENV
echo "📝 Build log will be saved to: ${BUILD_LOG_FILE}"
......@@ -108,14 +108,14 @@ runs:
CACHE_ARGS=""
if [[ "${{ inputs.target }}" != "frontend" ]]; then
CACHE_ARGS="--cache-to type=inline "
CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/} "
CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache "
CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM}-cache "
CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:main-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM} "
CACHE_ARGS+="--cache-from type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM}-cache "
if [[ "$GITHUB_REF_NAME" =~ ^release ]]; then
# Release branches also use release cache
CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:release-${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM}-cache,mode=max "
elif [[ "$GITHUB_REF_NAME" == "main" ]]; then
CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM##*/}-cache,mode=max "
CACHE_ARGS+="--cache-to type=registry,ref=${ECR_HOSTNAME}/ai-dynamo/dynamo:${{ inputs.framework }}-cuda${CUDA_VERSION_MAJOR}-${PLATFORM}-cache,mode=max "
fi
fi
echo "$CACHE_ARGS"
......
......@@ -55,11 +55,7 @@ ignore:
- '.github/release.yml'
- '.github/copy-pr-bot.yaml'
- '.github/dco.yml'
- 'container/Dockerfile.aws'
- 'container/Dockerfile.docs'
- 'container/Dockerfile.epp'
- 'container/Dockerfile.frontend'
- 'container/Dockerfile.local_dev'
- 'container/run.sh'
- 'container/use-sccache.sh'
- 'container/dev/**'
......@@ -79,8 +75,6 @@ core:
- '!**/*.rst'
- '!**/*.txt'
- *ci
- 'container/build.sh'
- 'container/Dockerfile'
- 'container/render.py'
- 'container/Dockerfile.template'
- 'container/context.yaml'
......@@ -124,7 +118,6 @@ planner:
vllm:
- '!**/*.md'
- '!**/*.rst'
- 'container/Dockerfile.vllm'
- 'container/deps/requirements.vllm.txt'
- 'container/deps/vllm/**'
- 'examples/backends/vllm/**'
......@@ -134,7 +127,6 @@ vllm:
sglang:
- '!**/*.md'
- '!**/*.rst'
- 'container/Dockerfile.sglang'
- 'examples/backends/sglang/**'
- 'components/src/dynamo/sglang/**'
- 'container/templates/sglang_*'
......@@ -142,7 +134,6 @@ sglang:
trtllm:
- '!**/*.md'
- '!**/*.rst'
- 'container/Dockerfile.trtllm'
- 'container/deps/trtllm/**'
- 'examples/backends/trtllm/**'
- 'components/src/dynamo/trtllm/**'
......@@ -157,8 +148,6 @@ frontend:
- 'lib/**'
- '*.toml'
- '*.lock'
- 'container/build.sh'
- 'container/Dockerfile'
- 'container/deps/*'
- 'components/src/dynamo/router/**'
- 'components/src/dynamo/mocker/**'
......
......@@ -139,13 +139,14 @@ jobs:
shell: bash
env:
EXTRA_TAGS: ${{ inputs.extra_tags }}
PLATFORM: linux/${{ inputs.platform }}
CUDA_VERSION: ${{ inputs.cuda_version }}
run: |
CUDA_VERSION_MAJOR=${CUDA_VERSION%%.*}
if [ -n "$EXTRA_TAGS" ]; then
RESULT=""
while IFS= read -r tag; do
if [ -n "$tag" ]; then
RESULT+="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${tag}-${{ inputs.platform }}"$'\n'
RESULT+="${{ secrets.AWS_ACCOUNT_ID }}.dkr.ecr.${{ secrets.AWS_DEFAULT_REGION }}.amazonaws.com/ai-dynamo/dynamo:${tag}-cuda${CUDA_VERSION_MAJOR}-${{ inputs.platform }}"$'\n'
fi
done <<< "$EXTRA_TAGS"
echo "tags<<EOF" >> $GITHUB_OUTPUT
......@@ -202,7 +203,7 @@ jobs:
--platform=${{ inputs.platform }} \
--cuda-version=${{ inputs.cuda_version }} \
--show-result \
--short-output
--output-short-filename
- name: Build Container
id: build-image
timeout-minutes: ${{ inputs.build_timeout_minutes }}
......
......@@ -75,7 +75,7 @@ jobs:
--framework=dynamo \
--platform=amd64 \
--show-result \
--short-output
--output-short-filename
- name: Build image
env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
......
......@@ -26,17 +26,32 @@
{% endif %}
{% endif %}
# --- Framework Stages
# --- SGLang Stages
{% if framework == "sglang" %}
# SGLang is the only framework without a `framework` target currently, needs special treatment
{% include "templates/sglang_runtime.Dockerfile" %}
{% elif target == "framework" and framework != "dynamo" %}
{% include "templates/" ~ framework ~ "_framework.Dockerfile" %}
{% elif ( target == "runtime" or target == "dev" or target == "local-dev" ) and framework != "dynamo" %}
{% include "templates/" ~ framework ~ "_framework.Dockerfile" %}
{% include "templates/" ~ framework ~ "_runtime.Dockerfile" %}
{% else %}
# --- No framework stages included
# --- No SGLANG stages included
{% endif %}
# --- VLLM Stages
{% if framework == "vllm" %}
{% include "templates/vllm_framework.Dockerfile" %}
{% if target != "framework" %}
{% include "templates/vllm_runtime.Dockerfile" %}
{% endif %}
{% else %}
# --- No VLLM stages included
{% endif %}
# --- TRTLLM Stages
{% if framework == "trtllm" %}
{% include "templates/trtllm_framework.Dockerfile" %}
{% if target != "framework" %}
{% include "templates/trtllm_runtime.Dockerfile" %}
{% endif %}
{% else %}
# --- No TRTLLM stages included
{% endif %}
{% if make_efa == true and target == "runtime" %}
......
......@@ -4,18 +4,18 @@
The NVIDIA Dynamo project uses containerized development and deployment to maintain consistent environments across different AI inference frameworks and deployment scenarios. This directory contains the tools for building and running Dynamo containers:
### Core Components
- **`render.py`** - A render script used to generate Dockerfiles for AI inference frameworks (vLLM, TensorRT-LLM, SGLang). The generated Dockerfile includes the needed multi-stage steps for development vs production configurations
- **`run.sh`** - A container runtime manager that launches Docker containers with proper GPU access, volume mounts, and environment configurations. It supports different development workflows from root-based legacy setups to user-based development environments.
## Rendering Requirements:
### Rendering Requirements:
- Python
- Python Packages:
- pyyaml
- jinja2
### Core Components
- **`render.py`** - A render script used to generate Dockerfiles for AI inference frameworks (vLLM, TensorRT-LLM, SGLang) and the frontend image. The generated Dockerfile includes the needed multi-stage steps for development vs production configurations.
- **`run.sh`** - A container runtime manager that launches Docker containers with proper GPU access, volume mounts, and environment configurations. It supports different development workflows from root-based legacy setups to user-based development environments.
### Stage Summary for Frameworks
<details>
......@@ -115,7 +115,7 @@ The `run.sh` script and rendering scripts are convenience that simplify common D
### 1. runtime target (runs as non-root dynamo user):
```bash
# Build runtime image
python container/render.py --framework vllm --target runtime --short-output
python container/render.py --framework vllm --target runtime --output-short-filename
docker build -t dynamo:latest-vllm-runtime -f rendered.Dockerfile .
# Run runtime container
......@@ -226,16 +226,16 @@ Note: `uv` commands set `UV_CACHE_DIR` per `RUN` so `uv` always uses the same pa
```bash
# Build vLLM dev image called dynamo:latest-vllm (default). This runs as root and is for development.
python container/render.py --framework=vllm --target=dev --short-output
python container/render.py --framework=vllm --target=dev --output-short-filename
docker build -t dynamo:latest-vllm-dev -f rendered.Dockerfile .
# Build a local-dev image. The local-dev image will run as `dynamo` with UID/GID matched to your host user,
# which is useful when mounting partitions for development.
python container/render.py --framework=vllm --target=local-dev --short-output
python container/render.py --framework=vllm --target=local-dev --output-short-filename
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
# Build TensorRT-LLM development image called dynamo:latest-trtllm
python container/render.py --framework=trtllm --target=runtime --short-output
python container/render.py --framework=trtllm --target=runtime --output-short-filename
docker build -t dynamo:latest-trtllm-runtime -f rendered.Dockerfile .
```
......@@ -261,7 +261,7 @@ EPP_IMAGE="dynamo/dynamo-epp:${EPP_GIT_TAG}"
**Build Frontend Image**
```bash
# Build the frontend image (automatically builds EPP image as a dependency)
python container/render.py --framework=dynamo --target=frontend --short-output
python container/render.py --framework=dynamo --target=frontend --output-short-filename
docker build -t dynamo:frontend --build-arg EPP_IMAGE=${EPP_IMAGE} -f rendered.Dockerfile .
```
......@@ -421,7 +421,7 @@ See Docker documentation for custom network creation and management.
### Development Workflow
```bash
# 1. Build local-dev image (builds runtime, then dev as intermediate, then local-dev as final image)
python container/render.py --framework=vllm --target=local-dev --short-output
python container/render.py --framework=vllm --target=local-dev --output-short-filename
docker build --build-arg USER_UID=$(id -u) --build-arg USER_GID=$(id -g) -f container/rendered.Dockerfile -t dynamo:latest-vllm-local-dev .
# 2. Run development container using the local-dev image
......@@ -439,7 +439,7 @@ python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.20 &
### Production Workflow
```bash
# 1. Build production runtime image (runs as non-root dynamo user)
python container/render.py --framework=vllm --target=runtime --short-output
python container/render.py --framework=vllm --target=runtime --output-short-filename
docker build -t dynamo:latest-vllm-runtime -f rendered.Dockerfile .
# 2. Run production container as non-root dynamo user
......@@ -449,7 +449,7 @@ container/run.sh --image dynamo:latest-vllm-runtime --gpus all -v $HOME/.cache:/
### Testing Workflow
```bash
# 1. Build dev image
python container/render.py --framework=vllm --target=dev --short-output
python container/render.py --framework=vllm --target=dev --output-short-filename
docker build -t dynamo:latest-vllm-dev -f rendered.Dockerfile .
# 2. Run tests with network isolation for reproducible results (no -it needed for CI)
......
......@@ -34,6 +34,10 @@ vllm:
base_image: nvcr.io/nvidia/cuda-dl-base
runtime_image: nvcr.io/nvidia/cuda
runtime_image_tag: ${CUDA_VERSION}.0-runtime-ubuntu24.04
cuda12.9:
base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
cuda13.0:
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
vllm_ref: v0.14.1
flashinf_ref: v0.5.3
lmcache_ref: 0.3.12
......@@ -41,10 +45,6 @@ vllm:
enable_media_ffmpeg: "true"
enable_gpu_memory_service: "true"
enable_kvbm: "true"
cuda12.9:
base_image_tag: 25.06-cuda12.9-devel-ubuntu24.04
cuda13.0:
base_image_tag: 25.11-cuda13.0-devel-ubuntu24.04
sglang:
base_image: nvcr.io/nvidia/cuda-dl-base
......
......@@ -4,7 +4,6 @@
import argparse
import re
import sys
from pathlib import Path
import yaml
......@@ -41,9 +40,9 @@ def parse_args():
)
parser.add_argument("--make-efa", action="store_true", help="Enable AWS EFA")
parser.add_argument(
"--short-output",
"--output-short-filename",
action="store_true",
help="Output filename is just rendered.Dockerfile",
help="Output filename is rendered.Dockerfile instead of <framework>-<target>-cuda<cuda_version>-<arch>-rendered.Dockerfile",
)
parser.add_argument(
"--show-result",
......@@ -55,7 +54,23 @@ def parse_args():
def validate_args(args):
# TODO: Add validation logic
valid_inputs = {
"vllm": {"runtime", "dev", "local-dev", "framework", "wheel_builder", "base"},
"trtllm": {"runtime", "dev", "local-dev", "framework", "wheel_builder", "base"},
"sglang": {"runtime", "dev", "local-dev", "wheel_builder", "base"},
"dynamo": {"runtime", "dev", "local-dev", "frontend", "wheel_builder", "base"},
}
if args.framework in valid_inputs:
if args.target in valid_inputs[args.framework]:
return
raise ValueError(
f"Invalid input combination: [framework={args.framework},target={args.target}]"
)
raise ValueError(
f"Invalid input combination: [framework={args.framework},target={args.target}]"
)
return
......@@ -75,7 +90,7 @@ def render(args, context, script_dir):
# Replace all instances of 3+ newlines with 2 newlines
cleaned = re.sub(r"\n{3,}", "\n\n", rendered)
if args.short_output:
if args.output_short_filename:
filename = "rendered.Dockerfile"
else:
filename = f"{args.framework}-{args.target}-cuda{args.cuda_version}-{args.platform}-rendered.Dockerfile"
......@@ -98,7 +113,7 @@ def render(args, context, script_dir):
def main():
args = parse_args()
validate_args(args)
script_dir = Path(sys.argv[0]).parent
script_dir = Path(__file__).parent
with open(f"{script_dir}/context.yaml", "r") as f:
context = yaml.safe_load(f)
......
......@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
# === BEGIN templates/args.Dockerfile ===
##########################
#### Build Arguments #####
##########################
......
......@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
# === BEGIN templates/aws.Dockerfile ===
#############################
########## AWS EFA ##########
#############################
......
......@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
# === BEGIN templates/dev.Dockerfile ===
# ======================================================================
# STAGE: dynamo_tools for developers
# ======================================================================
......@@ -126,9 +127,9 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
# Add NVIDIA devtools repository and install development tools (nsight-systems).
# Cache apt downloads; sharing=locked avoids apt/dpkg races with concurrent builds.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
wget -qO - "https://developer.download.nvidia.com/devtools/repos/ubuntu2404/amd64/nvidia.pub" \
wget -qO - "https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH}/nvidia.pub" \
| gpg --dearmor -o /etc/apt/keyrings/nvidia-devtools.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/nvidia-devtools.gpg] https://developer.download.nvidia.com/devtools/repos/ubuntu2404/amd64 /" \
echo "deb [signed-by=/etc/apt/keyrings/nvidia-devtools.gpg] https://developer.download.nvidia.com/devtools/repos/ubuntu2404/${ARCH} /" \
| tee /etc/apt/sources.list.d/nvidia-devtools.list && \
apt-get update && \
apt-get install -y --no-install-recommends nsight-systems-2025.5.1 && \
......
......@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
# === BEGIN templates/dynamo_base.Dockerfile ===
##################################
########## Base Image ############
##################################
......
......@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
# === BEGIN templates/dynamo_runtime.Dockerfile ===
#######################################
########## Runtime image ##############
#######################################
......
......@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
# === BEGIN templates/frontend.Dockerfile ===
##############################################
########## Frontend entrypoint image #########
##############################################
......
......@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
# === BEGIN templates/local_dev.Dockerfile ===
# ======================================================================
# TARGET: local-dev (non-root development with UID/GID remapping)
# ======================================================================
......
......@@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
# === BEGIN templates/sglang_runtime.Dockerfile ===
##################################
########## Runtime Image #########
##################################
......
......@@ -2,6 +2,8 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#}
# === BEGIN templates/trtllm_framework.Dockerfile ===
# Copy artifacts from NGC PyTorch image
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS pytorch_base
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment