Unverified Commit f33fdd52 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

chore(container): clean up Dockerfile.{sglang,trtllm,vllm} and build.sh (#4569)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent dfbcdbe0
...@@ -6,16 +6,14 @@ ...@@ -6,16 +6,14 @@
########## Build Arguments ######## ########## Build Arguments ########
################################## ##################################
# Base image configuration # This section contains build arguments that are common and shared across various
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" # Dockerfile.<frameworks>, so they should NOT have a default. The source of truth is from build.sh.
# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065 ARG BASE_IMAGE
# for details and reproducer to manually test if the image ARG BASE_IMAGE_TAG
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" ARG PYTHON_VERSION
ARG ENABLE_KVBM
# Build configuration
ARG ENABLE_KVBM=false
ARG CARGO_BUILD_JOBS ARG CARGO_BUILD_JOBS
# Define general architecture ARGs for supporting both x86 and aarch64 builds. # Define general architecture ARGs for supporting both x86 and aarch64 builds.
...@@ -41,9 +39,6 @@ ARG NIXL_UCX_REF=v1.19.0 ...@@ -41,9 +39,6 @@ ARG NIXL_UCX_REF=v1.19.0
ARG NIXL_REF=0.7.1 ARG NIXL_REF=0.7.1
ARG NIXL_GDRCOPY_REF=v2.5.1 ARG NIXL_GDRCOPY_REF=v2.5.1
# Python configuration
ARG PYTHON_VERSION=3.12
################################## ##################################
########## Base Image ############ ########## Base Image ############
################################## ##################################
......
...@@ -2,14 +2,17 @@ ...@@ -2,14 +2,17 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
ARG CUDA_VERSION=12.9.1 # This section contains build arguments that are common and shared with
# the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh.
# Runtime image and build-time configuration (aligned with other backends) ARG BASE_IMAGE
# TODO: OPS-<number>: Use the same runtime image as the other backends ARG BASE_IMAGE_TAG
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.9.1-cudnn-runtime-ubuntu24.04" ARG FRAMEWORK_IMAGE
ARG FRAMEWORK_IMAGE_TAG
ARG PYTHON_VERSION
ARG CUDA_VERSION
ARG PYTHON_VERSION=3.10
ARG ARCH=amd64 ARG ARCH=amd64
ARG ARCH_ALT=x86_64 ARG ARCH_ALT=x86_64
ARG CARGO_BUILD_JOBS ARG CARGO_BUILD_JOBS
...@@ -39,7 +42,8 @@ FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base ...@@ -39,7 +42,8 @@ FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
# - Develop or debug framework-level components # - Develop or debug framework-level components
# - Create custom builds with specific optimization flags # - Create custom builds with specific optimization flags
# #
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 AS framework #FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 AS framework
FROM ${FRAMEWORK_IMAGE}:${FRAMEWORK_IMAGE_TAG} AS framework
# Declare all ARGs # Declare all ARGs
ARG BUILD_TYPE=all ARG BUILD_TYPE=all
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" # This section contains build arguments that are common and shared with
ARG BASE_IMAGE_TAG="25.10-cuda13.0-devel-ubuntu24.04" # the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh.
ARG BASE_IMAGE
ARG BASE_IMAGE_TAG
ARG PYTHON_VERSION
ARG ENABLE_KVBM
ARG PYTORCH_BASE_IMAGE="nvcr.io/nvidia/pytorch" ARG PYTORCH_BASE_IMAGE="nvcr.io/nvidia/pytorch"
ARG PYTORCH_BASE_IMAGE_TAG="25.10-py3" ARG PYTORCH_BASE_IMAGE_TAG="25.10-py3"
ARG ENABLE_KVBM=false
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda-dl-base" ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda-dl-base"
ARG RUNTIME_IMAGE_TAG="25.10-cuda13.0-runtime-ubuntu24.04" ARG RUNTIME_IMAGE_TAG="25.10-cuda13.0-runtime-ubuntu24.04"
...@@ -30,8 +34,6 @@ ARG GITHUB_TRTLLM_COMMIT ...@@ -30,8 +34,6 @@ ARG GITHUB_TRTLLM_COMMIT
# without adding if statements everywhere, so just define both as ARGs for now. # without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64 ARG ARCH=amd64
ARG ARCH_ALT=x86_64 ARG ARCH_ALT=x86_64
# Python configuration
ARG PYTHON_VERSION=3.12
ARG DYNAMO_BASE_IMAGE="dynamo:latest-none" ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
......
...@@ -2,13 +2,14 @@ ...@@ -2,13 +2,14 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" # This section contains build arguments that are common and shared with
# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now # the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh.
# Please check https://github.com/ai-dynamo/dynamo/pull/1065 ARG BASE_IMAGE
# for details and reproducer to manually test if the image ARG BASE_IMAGE_TAG
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" ARG PYTHON_VERSION
ARG ENABLE_KVBM=false ARG ENABLE_KVBM
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG CUDA_VERSION="12.8" ARG CUDA_VERSION="12.8"
...@@ -41,8 +42,6 @@ ARG SCCACHE_REGION="" ...@@ -41,8 +42,6 @@ ARG SCCACHE_REGION=""
# without adding if statements everywhere, so just define both as ARGs for now. # without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64 ARG ARCH=amd64
ARG ARCH_ALT=x86_64 ARG ARCH_ALT=x86_64
# Python configuration
ARG PYTHON_VERSION=3.12
ARG DYNAMO_BASE_IMAGE="dynamo:latest-none" ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
......
...@@ -102,7 +102,7 @@ DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc2" ...@@ -102,7 +102,7 @@ DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc2"
TENSORRTLLM_PIP_WHEEL="" TENSORRTLLM_PIP_WHEEL=""
VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# FIXME: NCCL will hang with 25.03, so use 25.01 for now # FIXME: OPS-612 NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065 # Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image # for details and reproducer to manually test if the image
# can be updated to later versions. # can be updated to later versions.
...@@ -111,8 +111,13 @@ VLLM_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" ...@@ -111,8 +111,13 @@ VLLM_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
NONE_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" NONE_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
SGLANG_CUDA_VERSION="12.9.1"
# This is for Dockerfile
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
# This is for Dockerfile.sglang. Unlike the other frameworks, it is using a different base image
SGLANG_FRAMEWORK_IMAGE="nvcr.io/nvidia/cuda"
SGLANG_FRAMEWORK_IMAGE_TAG="${SGLANG_CUDA_VERSION}-cudnn-devel-ubuntu24.04"
NIXL_REF=0.7.1 NIXL_REF=0.7.1
NIXL_UCX_REF=v1.19.0 NIXL_UCX_REF=v1.19.0
...@@ -324,6 +329,9 @@ get_options() { ...@@ -324,6 +329,9 @@ get_options() {
missing_requirement "$1" missing_requirement "$1"
fi fi
;; ;;
--no-tag-latest)
NO_TAG_LATEST=true
;;
-?*) -?*)
error 'ERROR: Unknown option: ' "$1" error 'ERROR: Unknown option: ' "$1"
;; ;;
...@@ -465,6 +473,7 @@ show_help() { ...@@ -465,6 +473,7 @@ show_help() {
echo " [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]" echo " [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]"
echo " [--sccache-region S3 region for sccache (required with --use-sccache)]" echo " [--sccache-region S3 region for sccache (required with --use-sccache)]"
echo " [--vllm-max-jobs number of parallel jobs for compilation (only used by vLLM framework)]" echo " [--vllm-max-jobs number of parallel jobs for compilation (only used by vLLM framework)]"
echo " [--no-tag-latest do not add latest-{framework} tag to built image]"
echo "" echo ""
echo " Note: When using --use-sccache, AWS credentials must be set:" echo " Note: When using --use-sccache, AWS credentials must be set:"
echo " export AWS_ACCESS_KEY_ID=your_access_key" echo " export AWS_ACCESS_KEY_ID=your_access_key"
...@@ -792,6 +801,8 @@ fi ...@@ -792,6 +801,8 @@ fi
if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]]; then if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]]; then
echo "Forcing enable_kvbm to true in ${FRAMEWORK} image build" echo "Forcing enable_kvbm to true in ${FRAMEWORK} image build"
ENABLE_KVBM=true ENABLE_KVBM=true
else
ENABLE_KVBM=false
fi fi
if [ ! -z ${ENABLE_KVBM} ]; then if [ ! -z ${ENABLE_KVBM} ]; then
...@@ -809,9 +820,16 @@ fi ...@@ -809,9 +820,16 @@ fi
if [ -n "${MAX_JOBS}" ]; then if [ -n "${MAX_JOBS}" ]; then
BUILD_ARGS+=" --build-arg MAX_JOBS=${MAX_JOBS} " BUILD_ARGS+=" --build-arg MAX_JOBS=${MAX_JOBS} "
fi fi
if [[ $FRAMEWORK == "SGLANG" ]]; then if [[ $FRAMEWORK == "SGLANG" ]]; then
echo "Forcing Python version to 3.10 for sglang image build" echo "Customizing Python, CUDA, and framework images for sglang images"
BUILD_ARGS+=" --build-arg PYTHON_VERSION=3.10" BUILD_ARGS+=" --build-arg PYTHON_VERSION=3.10"
BUILD_ARGS+=" --build-arg CUDA_VERSION=${SGLANG_CUDA_VERSION}"
# Unlike the other two frameworks, SGLang's framework image is different from the base image, so we need to set it explicitly.
BUILD_ARGS+=" --build-arg FRAMEWORK_IMAGE=${SGLANG_FRAMEWORK_IMAGE}"
BUILD_ARGS+=" --build-arg FRAMEWORK_IMAGE_TAG=${SGLANG_FRAMEWORK_IMAGE_TAG}"
else
BUILD_ARGS+=" --build-arg PYTHON_VERSION=3.12"
fi fi
# Add sccache build arguments # Add sccache build arguments
if [ "$USE_SCCACHE" = true ]; then if [ "$USE_SCCACHE" = true ]; then
...@@ -825,9 +843,12 @@ if [[ "$PLATFORM" == *"linux/arm64"* && "${FRAMEWORK}" == "SGLANG" ]]; then ...@@ -825,9 +843,12 @@ if [[ "$PLATFORM" == *"linux/arm64"* && "${FRAMEWORK}" == "SGLANG" ]]; then
# Add arguments required for sglang blackwell build # Add arguments required for sglang blackwell build
BUILD_ARGS+=" --build-arg GRACE_BLACKWELL=true --build-arg BUILD_TYPE=blackwell_aarch64" BUILD_ARGS+=" --build-arg GRACE_BLACKWELL=true --build-arg BUILD_TYPE=blackwell_aarch64"
fi fi
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}" LATEST_TAG=""
if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then if [ -z "${NO_TAG_LATEST}" ]; then
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
LATEST_TAG="${LATEST_TAG}-${TARGET}" LATEST_TAG="${LATEST_TAG}-${TARGET}"
fi
fi fi
show_image_options show_image_options
...@@ -841,8 +862,13 @@ fi ...@@ -841,8 +862,13 @@ fi
if [[ -z "${DEV_IMAGE_INPUT:-}" ]]; then if [[ -z "${DEV_IMAGE_INPUT:-}" ]]; then
# Follow 2-step build process for all frameworks # Follow 2-step build process for all frameworks
if [[ $FRAMEWORK != "NONE" ]]; then if [[ $FRAMEWORK != "NONE" ]]; then
# Define base image tag before using it # Define base image tag with framework suffix to prevent clobbering
DYNAMO_BASE_IMAGE="dynamo-base:${VERSION}" # Different frameworks require different base configurations:
# - VLLM: Python 3.12, ENABLE_KVBM=true, BASE_IMAGE=cuda-dl-base
# - SGLANG: Python 3.10, BASE_IMAGE=cuda-dl-base
# - TRTLLM: Python 3.12, ENABLE_KVBM=true, BASE_IMAGE=pytorch
# Without unique tags, building different frameworks would overwrite each other's names
DYNAMO_BASE_IMAGE="dynamo-base:${VERSION}-${FRAMEWORK,,}"
# Start base image build # Start base image build
echo "======================================" echo "======================================"
echo "Starting Build 1: Base Image" echo "Starting Build 1: Base Image"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment