Unverified Commit f33fdd52 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

chore(container): clean up Dockerfile.{sglang,trtllm,vllm} and build.sh (#4569)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent dfbcdbe0
......@@ -6,16 +6,14 @@
########## Build Arguments ########
##################################
# Base image configuration
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
# Build configuration
ARG ENABLE_KVBM=false
# This section contains build arguments that are common and shared across various
# Dockerfile.<frameworks>, so they should NOT have a default. The source of truth is from build.sh.
ARG BASE_IMAGE
ARG BASE_IMAGE_TAG
ARG PYTHON_VERSION
ARG ENABLE_KVBM
ARG CARGO_BUILD_JOBS
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
......@@ -41,9 +39,6 @@ ARG NIXL_UCX_REF=v1.19.0
ARG NIXL_REF=0.7.1
ARG NIXL_GDRCOPY_REF=v2.5.1
# Python configuration
ARG PYTHON_VERSION=3.12
##################################
########## Base Image ############
##################################
......
......@@ -2,14 +2,17 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
ARG CUDA_VERSION=12.9.1
# This section contains build arguments that are common and shared with
# the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh.
# Runtime image and build-time configuration (aligned with other backends)
# TODO: OPS-<number>: Use the same runtime image as the other backends
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.9.1-cudnn-runtime-ubuntu24.04"
ARG BASE_IMAGE
ARG BASE_IMAGE_TAG
ARG FRAMEWORK_IMAGE
ARG FRAMEWORK_IMAGE_TAG
ARG PYTHON_VERSION
ARG CUDA_VERSION
ARG PYTHON_VERSION=3.10
ARG ARCH=amd64
ARG ARCH_ALT=x86_64
ARG CARGO_BUILD_JOBS
......@@ -39,7 +42,8 @@ FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
# - Develop or debug framework-level components
# - Create custom builds with specific optimization flags
#
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 AS framework
#FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 AS framework
FROM ${FRAMEWORK_IMAGE}:${FRAMEWORK_IMAGE_TAG} AS framework
# Declare all ARGs
ARG BUILD_TYPE=all
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
ARG BASE_IMAGE_TAG="25.10-cuda13.0-devel-ubuntu24.04"
# This section contains build arguments that are common and shared with
# the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh.
ARG BASE_IMAGE
ARG BASE_IMAGE_TAG
ARG PYTHON_VERSION
ARG ENABLE_KVBM
ARG PYTORCH_BASE_IMAGE="nvcr.io/nvidia/pytorch"
ARG PYTORCH_BASE_IMAGE_TAG="25.10-py3"
ARG ENABLE_KVBM=false
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda-dl-base"
ARG RUNTIME_IMAGE_TAG="25.10-cuda13.0-runtime-ubuntu24.04"
......@@ -30,8 +34,6 @@ ARG GITHUB_TRTLLM_COMMIT
# without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64
ARG ARCH_ALT=x86_64
# Python configuration
ARG PYTHON_VERSION=3.12
ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
......
......@@ -2,13 +2,14 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG ENABLE_KVBM=false
# This section contains build arguments that are common and shared with
# the plain Dockerfile, so they should NOT have a default. The source of truth is from build.sh.
ARG BASE_IMAGE
ARG BASE_IMAGE_TAG
ARG PYTHON_VERSION
ARG ENABLE_KVBM
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG CUDA_VERSION="12.8"
......@@ -41,8 +42,6 @@ ARG SCCACHE_REGION=""
# without adding if statements everywhere, so just define both as ARGs for now.
ARG ARCH=amd64
ARG ARCH_ALT=x86_64
# Python configuration
ARG PYTHON_VERSION=3.12
ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
......
......@@ -102,7 +102,7 @@ DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc2"
TENSORRTLLM_PIP_WHEEL=""
VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
# FIXME: NCCL will hang with 25.03, so use 25.01 for now
# FIXME: OPS-612 NCCL will hang with 25.03, so use 25.01 for now
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
......@@ -111,8 +111,13 @@ VLLM_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
NONE_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
SGLANG_CUDA_VERSION="12.9.1"
# This is for Dockerfile
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
# This is for Dockerfile.sglang. Unlike the other frameworks, it is using a different base image
SGLANG_FRAMEWORK_IMAGE="nvcr.io/nvidia/cuda"
SGLANG_FRAMEWORK_IMAGE_TAG="${SGLANG_CUDA_VERSION}-cudnn-devel-ubuntu24.04"
NIXL_REF=0.7.1
NIXL_UCX_REF=v1.19.0
......@@ -324,6 +329,9 @@ get_options() {
missing_requirement "$1"
fi
;;
--no-tag-latest)
NO_TAG_LATEST=true
;;
-?*)
error 'ERROR: Unknown option: ' "$1"
;;
......@@ -465,6 +473,7 @@ show_help() {
echo " [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]"
echo " [--sccache-region S3 region for sccache (required with --use-sccache)]"
echo " [--vllm-max-jobs number of parallel jobs for compilation (only used by vLLM framework)]"
echo " [--no-tag-latest do not add latest-{framework} tag to built image]"
echo ""
echo " Note: When using --use-sccache, AWS credentials must be set:"
echo " export AWS_ACCESS_KEY_ID=your_access_key"
......@@ -792,6 +801,8 @@ fi
if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]]; then
echo "Forcing enable_kvbm to true in ${FRAMEWORK} image build"
ENABLE_KVBM=true
else
ENABLE_KVBM=false
fi
if [ ! -z ${ENABLE_KVBM} ]; then
......@@ -809,9 +820,16 @@ fi
if [ -n "${MAX_JOBS}" ]; then
BUILD_ARGS+=" --build-arg MAX_JOBS=${MAX_JOBS} "
fi
if [[ $FRAMEWORK == "SGLANG" ]]; then
echo "Forcing Python version to 3.10 for sglang image build"
echo "Customizing Python, CUDA, and framework images for sglang images"
BUILD_ARGS+=" --build-arg PYTHON_VERSION=3.10"
BUILD_ARGS+=" --build-arg CUDA_VERSION=${SGLANG_CUDA_VERSION}"
# Unlike the other two frameworks, SGLang's framework image is different from the base image, so we need to set it explicitly.
BUILD_ARGS+=" --build-arg FRAMEWORK_IMAGE=${SGLANG_FRAMEWORK_IMAGE}"
BUILD_ARGS+=" --build-arg FRAMEWORK_IMAGE_TAG=${SGLANG_FRAMEWORK_IMAGE_TAG}"
else
BUILD_ARGS+=" --build-arg PYTHON_VERSION=3.12"
fi
# Add sccache build arguments
if [ "$USE_SCCACHE" = true ]; then
......@@ -825,9 +843,12 @@ if [[ "$PLATFORM" == *"linux/arm64"* && "${FRAMEWORK}" == "SGLANG" ]]; then
# Add arguments required for sglang blackwell build
BUILD_ARGS+=" --build-arg GRACE_BLACKWELL=true --build-arg BUILD_TYPE=blackwell_aarch64"
fi
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
LATEST_TAG=""
if [ -z "${NO_TAG_LATEST}" ]; then
LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
LATEST_TAG="${LATEST_TAG}-${TARGET}"
fi
fi
show_image_options
......@@ -841,8 +862,13 @@ fi
if [[ -z "${DEV_IMAGE_INPUT:-}" ]]; then
# Follow 2-step build process for all frameworks
if [[ $FRAMEWORK != "NONE" ]]; then
# Define base image tag before using it
DYNAMO_BASE_IMAGE="dynamo-base:${VERSION}"
# Define base image tag with framework suffix to prevent clobbering
# Different frameworks require different base configurations:
# - VLLM: Python 3.12, ENABLE_KVBM=true, BASE_IMAGE=cuda-dl-base
# - SGLANG: Python 3.10, BASE_IMAGE=cuda-dl-base
# - TRTLLM: Python 3.12, ENABLE_KVBM=true, BASE_IMAGE=pytorch
# Without unique tags, building different frameworks would overwrite each other's names
DYNAMO_BASE_IMAGE="dynamo-base:${VERSION}-${FRAMEWORK,,}"
# Start base image build
echo "======================================"
echo "Starting Build 1: Base Image"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment