build.sh 40.9 KB
Newer Older
Carsten Csiky's avatar
Carsten Csiky committed
1
#!/usr/bin/env bash
2
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
# SPDX-License-Identifier: Apache-2.0
4
5
6
7
8
9
10
11
12
13
14
15
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
16

17
18
19
20
21
if [ "${BASH_VERSINFO[0]}" -lt 4 ]; then
    echo "Error: Bash version 4.0 or higher is required. Current version: ${BASH_VERSINFO[0]}.${BASH_VERSINFO[1]}"
    exit 1
fi

Carsten Csiky's avatar
Carsten Csiky committed
22
set -e
23

24
TAG=
Ran Rubin's avatar
Ran Rubin committed
25
PRIMARY_TAG=
26
27
RUN_PREFIX=
PLATFORM=linux/amd64
28
29

# Get short commit hash
30
commit_id=${commit_id:-$(git rev-parse --short HEAD)}
31

32
# if COMMIT_ID matches a TAG use that
ls-2018's avatar
ls-2018 committed
33
current_tag=${current_tag:-$(git describe --tags --exact-match 2>/dev/null | sed 's/^v//' || true)}
34

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# Get latest version from release branches or tags
# Strategy:
# 1. Check for release/X.Y.Z branches (most reliable for development)
# 2. Fall back to git tags, excluding test-rc tags
# 3. Default to 0.0.1 if nothing found

# Try to find the latest release branch first
latest_release_branch=$(git branch -r 2>/dev/null | grep -E 'origin/release/[0-9]+\.[0-9]+\.[0-9]+$' | sed 's|.*/||' | sort -V | tail -1 || true)

if [[ -n ${latest_release_branch} ]]; then
    latest_tag=${latest_tag:-$latest_release_branch}
    echo "INFO: Using version from latest release branch: ${latest_tag}"
else
    # Fall back to tags, excluding test-rc tags
    latest_tag=${latest_tag:-$(git tag -l 'v*' --sort=-version:refname | grep -v 'test-rc' | head -1 | sed 's/^v//' || true)}
fi

52
53
if [[ -z ${latest_tag} ]]; then
    latest_tag="0.0.1"
54
    echo "No git release tag or branch found, setting to unknown version: ${latest_tag}"
55
fi
56

57
58
59
60
# Use tag if available, otherwise use latest_tag.dev.commit_id
VERSION=v${current_tag:-$latest_tag.dev.$commit_id}

PYTHON_PACKAGE_VERSION=${current_tag:-$latest_tag.dev+$commit_id}
61
62
63
64
65
66

# Frameworks
#
# Each framework has a corresponding base image.  Additional
# dependencies are specified in the /container/deps folder and
# installed within framework specific sections of the Dockerfile.
67

68
declare -A FRAMEWORKS=(["VLLM"]=1 ["TRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
Ryan Olson's avatar
Ryan Olson committed
69

70
DEFAULT_FRAMEWORK=VLLM
71
72
73
74
75
76

SOURCE_DIR=$(dirname "$(readlink -f "$0")")
DOCKERFILE=${SOURCE_DIR}/Dockerfile
BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")

# Base Images
77
TRTLLM_BASE_IMAGE=nvcr.io/nvidia/pytorch
78
TRTLLM_BASE_IMAGE_TAG=25.12-py3
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102

# Important Note: Because of ABI compatibility issues between TensorRT-LLM and NGC PyTorch,
# we need to build the TensorRT-LLM wheel from source.
#
# There are two ways to build the dynamo image with TensorRT-LLM.
# 1. Use the local TensorRT-LLM wheel directory.
# 2. Use the TensorRT-LLM wheel on artifactory.
#
# If using option 1, the TENSORRTLLM_PIP_WHEEL_DIR must be a path to a directory
# containing TensorRT-LLM wheel file along with commit.txt file with the
# <arch>_<commit ID> as contents. If no valid trtllm wheel is found, the script
# will attempt to build the wheel from source and store the built wheel in the
# specified directory. TRTLLM_COMMIT from the TensorRT-LLM main branch will be
# used to build the wheel.
#
# If using option 2, the TENSORRTLLM_PIP_WHEEL must be the TensorRT-LLM wheel
# package that will be installed from the specified TensorRT-LLM PyPI Index URL.
# This option will ignore the TRTLLM_COMMIT option. As the TensorRT-LLM wheel from PyPI
# is not ABI compatible with NGC PyTorch, you can use TENSORRTLLM_INDEX_URL to specify
# a private PyPI index URL which has your pre-built TensorRT-LLM wheel.
#
# By default, we will use option 1. If you want to use option 2, you can set
# TENSORRTLLM_PIP_WHEEL to the TensorRT-LLM wheel on artifactory.
#
103
DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
104

105
106
107
# TensorRT-LLM commit to use for building the trtllm wheel if not provided.
# Important Note: This commit is not used in our CI pipeline. See the CI
# variables to learn how to run a pipeline with a specific commit.
108
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="45d7022cc33903509fd8045bbc577d77dd1d3e2f" # 1.3.0rc1
109
TRTLLM_COMMIT=""
110
TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0"
111
TRTLLM_GIT_URL=""
112

113
# TensorRT-LLM PyPI index URL
114
DEFAULT_TENSORRTLLM_INDEX_URL="https://pypi.nvidia.com/"
115
# TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
116
# Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
117
118
119
120
121
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.3.0rc1"
# TensorRT-LLM wheels on PyPI might not be compatible with the NGC PyTorch.
# For incompatible versions, we install the wheel from the NGC image during the Docker build.
# The following versions are not ABI compatible with the NGC PyTorch.
TRTLLM_ABI_INCOMPATIBLE_VERSIONS=("1.3.0rc1")
122
TENSORRTLLM_PIP_WHEEL=""
123
TRTLLM_WHEEL_IMAGE=""
124

125
VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
126
# FIXME: OPS-612 NCCL will hang with 25.03, so use 25.01 for now
127
128
129
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
130
VLLM_BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
Dmitry Tokarev's avatar
Dmitry Tokarev committed
131
132
VLLM_BASE_IMAGE_TAG_CU13="25.11-cuda13.0-devel-ubuntu24.04"
VLLM_RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
133
VLLM_RUNTIME_IMAGE_TAG="12.9.1-runtime-ubuntu24.04"
Dmitry Tokarev's avatar
Dmitry Tokarev committed
134
VLLM_RUNTIME_IMAGE_TAG_CU13="13.0.2-runtime-ubuntu24.04"
135

136
137
NONE_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
138

139

140
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
141
SGLANG_BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
142
SGLANG_BASE_IMAGE_TAG_CU13="25.11-cuda13.0-devel-ubuntu24.04"
143
SGLANG_CUDA_VERSION="12.9.1"
144
SGLANG_CUDA_VERSION_CU13="13.0.1"
145
SGLANG_RUNTIME_IMAGE_TAG_CU13="v0.5.8-cu130-runtime"
146
147

PYTHON_VERSION="3.12"
148

149
NIXL_REF=0.9.0
150
NIXL_UCX_REF=v1.20.0
151
NIXL_GDRCOPY_REF=v2.5.1
Joe Chandler's avatar
Joe Chandler committed
152
153
154
155
NIXL_LIBFABRIC_REF=v2.3.0

# AWS EFA installer version
EFA_VERSION=1.45.1
156

157
NO_CACHE=""
158
159
NO_LOAD=""
PUSH=""
160

161
162
163
164
# KVBM (KV Cache Block Manager) - default disabled, enabled automatically for VLLM/TRTLLM
# or can be explicitly enabled via --enable-kvbm flag
ENABLE_KVBM=false

165
166
167
168
# GPU Memory Service - default disabled, enabled automatically for VLLM/SGLANG
# or can be explicitly enabled via --enable-gpu-memory-service flag
ENABLE_GPU_MEMORY_SERVICE=false

169
170
171
172
173
# sccache configuration for S3
USE_SCCACHE=""
SCCACHE_BUCKET=""
SCCACHE_REGION=""

174
175
176
177
178
179
180
get_options() {
    while :; do
        case $1 in
        -h | -\? | --help)
            show_help
            exit
            ;;
181
        --platform)
182
183
184
185
            if [ "$2" ]; then
                PLATFORM=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
186
                missing_requirement "$1"
187
188
            fi
            ;;
189
        --framework)
190
191
192
193
            if [ "$2" ]; then
                FRAMEWORK=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
194
                missing_requirement "$1"
195
196
            fi
            ;;
Dmitry Tokarev's avatar
Dmitry Tokarev committed
197
198
199
200
201
202
203
204
205
206
        --cuda-version)
            if [ "$2" ]; then
                echo "INFO: Setting CUDA_VERSION to $2"
                CUDA_VERSION=$2
                BUILD_ARGS+=" --build-arg CUDA_VERSION=$2 "
                shift
            else
                missing_requirement "$1"
            fi
            ;;
207
208
209
210
211
212
213
214
        --nixl-ref)
            if [ "$2" ]; then
                NIXL_REF=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
215
        --tensorrtllm-pip-wheel-dir)
216
            if [ "$2" ]; then
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
                TENSORRTLLM_PIP_WHEEL_DIR=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
        --tensorrtllm-commit)
            if [ "$2" ]; then
                TRTLLM_COMMIT=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
        --tensorrtllm-pip-wheel)
            if [ "$2" ]; then
                TENSORRTLLM_PIP_WHEEL=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
        --tensorrtllm-index-url)
            if [ "$2" ]; then
                TENSORRTLLM_INDEX_URL=$2
242
243
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
244
                missing_requirement "$1"
245
246
            fi
            ;;
247
248
249
250
251
252
253
254
        --tensorrtllm-git-url)
            if [ "$2" ]; then
                TRTLLM_GIT_URL=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
255
256
257
258
259
        --base-image)
            if [ "$2" ]; then
                BASE_IMAGE=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
260
                missing_requirement "$1"
261
262
            fi
            ;;
263
        --base-image-tag)
264
265
266
267
            if [ "$2" ]; then
                BASE_IMAGE_TAG=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
268
                missing_requirement "$1"
269
270
271
272
273
274
275
            fi
            ;;
        --target)
            if [ "$2" ]; then
                TARGET=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
276
                missing_requirement "$1"
277
278
            fi
            ;;
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
        --uid)
            if [ "$2" ]; then
                CUSTOM_UID=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
        --gid)
            if [ "$2" ]; then
                CUSTOM_GID=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
295
296
297
298
299
        --build-arg)
            if [ "$2" ]; then
                BUILD_ARGS+="--build-arg $2 "
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
300
                missing_requirement "$1"
301
302
303
304
            fi
            ;;
        --tag)
            if [ "$2" ]; then
Ran Rubin's avatar
Ran Rubin committed
305
306
307
308
309
310
                if [ -z "$TAG" ]; then
                    TAG="--tag $2"
                    PRIMARY_TAG="$2"
                else
                    TAG+=" --tag $2"
                fi
311
312
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
313
                missing_requirement "$1"
314
315
316
317
            fi
            ;;
        --dry-run)
            RUN_PREFIX="echo"
318
            DRY_RUN="true"
319
320
321
322
323
324
            echo ""
            echo "=============================="
            echo "DRY RUN: COMMANDS PRINTED ONLY"
            echo "=============================="
            echo ""
            ;;
325
326
        --no-cache)
            NO_CACHE=" --no-cache"
327
            ;;
328
329
330
331
332
333
        --no-load)
            NO_LOAD=true
            ;;
        --push)
            PUSH=" --push"
            ;;
334
335
        --cache-from)
            if [ "$2" ]; then
336
                CACHE_FROM+="--cache-from $2 "
337
338
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
339
                missing_requirement "$1"
340
341
            fi
            ;;
342
343
        --cache-to)
            if [ "$2" ]; then
344
                CACHE_TO+="--cache-to $2 "
345
346
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
347
                missing_requirement "$1"
348
349
            fi
            ;;
ptarasiewiczNV's avatar
ptarasiewiczNV committed
350
351
352
353
354
        --build-context)
            if [ "$2" ]; then
                BUILD_CONTEXT_ARG="--build-context $2"
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
355
                missing_requirement "$1"
ptarasiewiczNV's avatar
ptarasiewiczNV committed
356
357
            fi
            ;;
358
359
360
        --enable-kvbm)
            ENABLE_KVBM=true
            ;;
361
362
363
        --enable-gpu-memory-service)
            ENABLE_GPU_MEMORY_SERVICE=true
            ;;
364
365
366
        --enable-media-nixl)
            ENABLE_MEDIA_NIXL=true
            ;;
367
368
369
        --enable-media-ffmpeg)
            ENABLE_MEDIA_FFMPEG=true
            ;;
370
        --make-efa)
Joe Chandler's avatar
Joe Chandler committed
371
            MAKE_EFA=true
372
            ;;
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
        --use-sccache)
            USE_SCCACHE=true
            ;;
        --sccache-bucket)
            if [ "$2" ]; then
                SCCACHE_BUCKET=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
        --sccache-region)
            if [ "$2" ]; then
                SCCACHE_REGION=$2
                shift
            else
                missing_requirement "$1"
            fi
391
            ;;
392
        --vllm-max-jobs)
393
            # Set MAX_JOBS for vLLM compilation (only used by Dockerfile.vllm)
394
395
396
397
398
399
400
            if [ "$2" ]; then
                MAX_JOBS=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
Joe Chandler's avatar
Joe Chandler committed
401
402
403
404
405
406
407
408
        --efa-version)
            if [ "$2" ]; then
                EFA_VERSION=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
409
410
411
        --no-tag-latest)
            NO_TAG_LATEST=true
            ;;
412
         -?*)
Carsten Csiky's avatar
Carsten Csiky committed
413
            error 'ERROR: Unknown option: ' "$1"
414
            ;;
415
         ?*)
Carsten Csiky's avatar
Carsten Csiky committed
416
            error 'ERROR: Unknown option: ' "$1"
417
418
419
420
421
422
423
424
            ;;
        *)
            break
            ;;
        esac
        shift
    done

425
    # Validate that --uid and --gid are only used with local-dev target
426
    if [[ -n "${CUSTOM_UID:-}" || -n "${CUSTOM_GID:-}" ]]; then
Joe Chandler's avatar
Joe Chandler committed
427
428
        if [[ "${TARGET:-}" != "local-dev" && "${TARGET:-}" != "local-dev-aws" ]]; then
            error "ERROR: --uid and --gid can only be used with --target local-dev or --target local-dev-aws"
429
430
431
        fi
    fi

432
    if [ -z "$FRAMEWORK" ]; then
433
        FRAMEWORK=$DEFAULT_FRAMEWORK
434
435
    fi

Carsten Csiky's avatar
Carsten Csiky committed
436
    if [ -n "$FRAMEWORK" ]; then
437
        FRAMEWORK=${FRAMEWORK^^}
438

Carsten Csiky's avatar
Carsten Csiky committed
439
440
        if [[ -z "${FRAMEWORKS[$FRAMEWORK]}" ]]; then
            error 'ERROR: Unknown framework: ' "$FRAMEWORK"
441
        fi
442

Carsten Csiky's avatar
Carsten Csiky committed
443
        if [ -z "$BASE_IMAGE_TAG" ]; then
444
445
            BASE_IMAGE_TAG=${FRAMEWORK}_BASE_IMAGE_TAG
            BASE_IMAGE_TAG=${!BASE_IMAGE_TAG}
Dmitry Tokarev's avatar
Dmitry Tokarev committed
446
            echo "INFO: Using default base image tag for $FRAMEWORK: $BASE_IMAGE_TAG"
447
        fi
448

Carsten Csiky's avatar
Carsten Csiky committed
449
        if [ -z "$BASE_IMAGE" ]; then
450
451
452
            BASE_IMAGE=${FRAMEWORK}_BASE_IMAGE
            BASE_IMAGE=${!BASE_IMAGE}
        fi
453

Dmitry Tokarev's avatar
Dmitry Tokarev committed
454
455
456
457
458
459
460
461
        if [[ $FRAMEWORK == "VLLM" ]] && [[ $CUDA_VERSION == "13."* ]]; then
            BASE_IMAGE_TAG=$VLLM_BASE_IMAGE_TAG_CU13
            BUILD_ARGS+=" --build-arg BASE_IMAGE_TAG=${VLLM_BASE_IMAGE_TAG_CU13} "
            RUNTIME_IMAGE_TAG=$VLLM_RUNTIME_IMAGE_TAG_CU13
            BUILD_ARGS+=" --build-arg RUNTIME_IMAGE_TAG=${VLLM_RUNTIME_IMAGE_TAG_CU13} "
            echo "INFO: Overriding base image tag for vLLM with CUDA 13: $BASE_IMAGE_TAG AND RUNTIME_IMAGE_TAG: $RUNTIME_IMAGE_TAG"
        fi

462
463
464
465
466
467
468
469
470
471
472

        if [[ $FRAMEWORK == "SGLANG" ]] && [[ $CUDA_VERSION == "13."* ]]; then
            BASE_IMAGE_TAG=$SGLANG_BASE_IMAGE_TAG_CU13
            BUILD_ARGS+=" --build-arg BASE_IMAGE_TAG=${SGLANG_BASE_IMAGE_TAG_CU13} "
            SGLANG_CUDA_VERSION="${SGLANG_CUDA_VERSION_CU13}"
            RUNTIME_IMAGE_TAG="${SGLANG_RUNTIME_IMAGE_TAG_CU13}"
            BUILD_ARGS+=" --build-arg RUNTIME_IMAGE_TAG=${RUNTIME_IMAGE_TAG} "
            echo "INFO: Overriding base image tag for SGLang with CUDA 13: $BASE_IMAGE_TAG AND RUNTIME_IMAGE_TAG: $RUNTIME_IMAGE_TAG"
        fi


Carsten Csiky's avatar
Carsten Csiky committed
473
        if [ -z "$BASE_IMAGE" ]; then
474
475
            error "ERROR: Framework $FRAMEWORK without BASE_IMAGE"
        fi
476

477
478
        BASE_VERSION=${FRAMEWORK}_BASE_VERSION
        BASE_VERSION=${!BASE_VERSION}
479
480
481
482

    fi

    if [ -z "$TAG" ]; then
483
        TAG="--tag dynamo:${VERSION}-${FRAMEWORK,,}"
Ran Rubin's avatar
Ran Rubin committed
484
        PRIMARY_TAG="dynamo:${VERSION}-${FRAMEWORK,,}"
485
        if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
486
            TAG="${TAG}-${TARGET}"
Ran Rubin's avatar
Ran Rubin committed
487
            PRIMARY_TAG="${PRIMARY_TAG}-${TARGET}"
488
        fi
489
490
    fi

Carsten Csiky's avatar
Carsten Csiky committed
491
    if [ -n "$PLATFORM" ]; then
492
493
494
        PLATFORM="--platform ${PLATFORM}"
    fi

Carsten Csiky's avatar
Carsten Csiky committed
495
    if [ -n "$TARGET" ]; then
496
        TARGET_STR="--target ${TARGET}"
497
    else
498
        TARGET_STR="--target dev"
499
    fi
500
501
502
503
504
505
506
507
508
509

    # Validate sccache configuration
    if [ "$USE_SCCACHE" = true ]; then
        if [ -z "$SCCACHE_BUCKET" ]; then
            error "ERROR: --sccache-bucket is required when --use-sccache is specified"
        fi
        if [ -z "$SCCACHE_REGION" ]; then
            error "ERROR: --sccache-region is required when --use-sccache is specified"
        fi
    fi
510
511
512
513
514
}


show_image_options() {
    echo ""
515
    echo "Building Dynamo Image: '${TAG}'"
516
517
518
    echo ""
    echo "   Base: '${BASE_IMAGE}'"
    echo "   Base_Image_Tag: '${BASE_IMAGE_TAG}'"
519
    if [[ $FRAMEWORK == "TRTLLM" ]]; then
520
        echo "   Tensorrtllm_Pip_Wheel: '${PRINT_TRTLLM_WHEEL_FILE}'"
521
522
523
524
    fi
    echo "   Build Context: '${BUILD_CONTEXT}'"
    echo "   Build Arguments: '${BUILD_ARGS}'"
    echo "   Framework: '${FRAMEWORK}'"
525
526
527
528
529
530
531
532
533
    if [ "$USE_SCCACHE" = true ]; then
        echo "   sccache: Enabled"
        echo "   sccache Bucket: '${SCCACHE_BUCKET}'"
        echo "   sccache Region: '${SCCACHE_REGION}'"

        if [ -n "$SCCACHE_S3_KEY_PREFIX" ]; then
            echo "   sccache S3 Key Prefix: '${SCCACHE_S3_KEY_PREFIX}'"
        fi
    fi
534
535
536
537
538
    echo ""
}

show_help() {
    echo "usage: build.sh"
539
    echo "  [--base-image base image]"
Carsten Csiky's avatar
Carsten Csiky committed
540
    echo "  [--base-image-tag base image tag]"
541
    echo "  [--platform platform for docker build]"
Carsten Csiky's avatar
Carsten Csiky committed
542
    echo "  [--framework framework one of ${!FRAMEWORKS[*]}]"
543
    echo "  [--tensorrtllm-pip-wheel-dir path to tensorrtllm pip wheel directory]"
544
    echo "  [--tensorrtllm-commit tensorrtllm commit/tag/branch to use for building the trtllm wheel if the wheel is not provided]"
545
546
    echo "  [--tensorrtllm-pip-wheel tensorrtllm pip wheel on artifactory]"
    echo "  [--tensorrtllm-index-url tensorrtllm PyPI index URL if providing the wheel from artifactory]"
547
    echo "  [--tensorrtllm-git-url tensorrtllm git repository URL for cloning]"
548
    echo "  [--build-arg additional build args to pass to docker build]"
549
550
    echo "  [--cache-from cache location to start from]"
    echo "  [--cache-to location where to cache the build output]"
Ran Rubin's avatar
Ran Rubin committed
551
    echo "  [--tag tag for image (can be specified multiple times)]"
552
553
    echo "  [--uid user ID for local-dev images (only with --target local-dev)]"
    echo "  [--gid group ID for local-dev images (only with --target local-dev)]"
554
    echo "  [--no-cache disable docker build cache]"
555
556
    echo "  [--no-load do not load the image into docker (disables default --load)]"
    echo "  [--push push the image to the registry]"
557
    echo "  [--dry-run print docker commands without running]"
ptarasiewiczNV's avatar
ptarasiewiczNV committed
558
    echo "  [--build-context name=path to add build context]"
559
    echo "  [--release-build perform a release build]"
Joe Chandler's avatar
Joe Chandler committed
560
    echo "  [--make-efa Adds AWS EFA layer on top of the built image (works with any target)]"
561
    echo "  [--enable-kvbm Enables KVBM support in Python 3.12]"
562
    echo "  [--enable-gpu-memory-service Enables GPU Memory Service support]"
563
    echo "  [--enable-media-nixl Enable media processing with NIXL support (default: true for frameworks, false for none)]"
564
    echo "  [--enable-media-ffmpeg Enable media processing with FFMPEG support (default: true for frameworks, false for none)]"
565
566
567
    echo "  [--use-sccache enable sccache for Rust/C/C++ compilation caching]"
    echo "  [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]"
    echo "  [--sccache-region S3 region for sccache (required with --use-sccache)]"
568
    echo "  [--vllm-max-jobs number of parallel jobs for compilation (only used by vLLM framework)]"
Joe Chandler's avatar
Joe Chandler committed
569
    echo "  [--efa-version AWS EFA installer version (default: 1.45.1)]"
570
    echo "  [--no-tag-latest do not add latest-{framework} tag to built image]"
571
572
573
574
    echo ""
    echo "  Note: When using --use-sccache, AWS credentials must be set:"
    echo "        export AWS_ACCESS_KEY_ID=your_access_key"
    echo "        export AWS_SECRET_ACCESS_KEY=your_secret_key"
575
576
577
578
579
580
581
582
583
584
585
586
587
588
    exit 0
}

missing_requirement() {
    error "ERROR: $1 requires an argument."
}

error() {
    printf '%s %s\n' "$1" "$2" >&2
    exit 1
}

get_options "$@"

589
# Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64
590
ARCH="amd64"
591
if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
592
    ARCH="arm64"
593
594
595
    BUILD_ARGS+=" --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 "
fi

596
597
598
599
# Set the commit sha in the container so we can inspect what build this relates to
DYNAMO_COMMIT_SHA=${DYNAMO_COMMIT_SHA:-$(git rev-parse HEAD)}
BUILD_ARGS+=" --build-arg DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA "

600
601
602
# Update DOCKERFILE if framework is VLLM
if [[ $FRAMEWORK == "VLLM" ]]; then
    DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm
603
604
elif [[ $FRAMEWORK == "TRTLLM" ]]; then
    DOCKERFILE=${SOURCE_DIR}/Dockerfile.trtllm
605
elif [[ $FRAMEWORK == "NONE" ]]; then
606
    DOCKERFILE=${SOURCE_DIR}/Dockerfile
607
608
elif [[ $FRAMEWORK == "SGLANG" ]]; then
    DOCKERFILE=${SOURCE_DIR}/Dockerfile.sglang
609
610
fi

611
612
# Add NIXL_REF as a build argument
BUILD_ARGS+=" --build-arg NIXL_REF=${NIXL_REF} "
Joe Chandler's avatar
Joe Chandler committed
613
614
615
616
# Add NIXL_LIBFABRIC_REF as a build argument
BUILD_ARGS+=" --build-arg NIXL_LIBFABRIC_REF=${NIXL_LIBFABRIC_REF} "
# Add EFA_VERSION as a build argument
BUILD_ARGS+=" --build-arg EFA_VERSION=${EFA_VERSION} "
617

Joe Chandler's avatar
Joe Chandler committed
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
# Function to build AWS EFA images from base runtime or dev images
build_aws_with_header() {
    local base_image="$1"
    local tags="$2"
    local aws_target="$3"  # runtime-aws or dev-aws
    local success_msg="$4"

    DOCKERFILE_AWS="${SOURCE_DIR}/Dockerfile.aws"

    if [[ ! -f "$DOCKERFILE_AWS" ]]; then
        echo "ERROR: Dockerfile.aws not found at: $DOCKERFILE_AWS"
        exit 1
    fi

    echo ""
    echo "Building AWS EFA image from base: $base_image"
    echo "Target stage: $aws_target"

    # Show the docker command being executed if not in dry-run mode
    if [ -z "$RUN_PREFIX" ]; then
        set -x
    fi

    $RUN_PREFIX docker build --progress=plain \
        --build-arg BASE_IMAGE="$base_image" \
        --build-arg EFA_VERSION="${EFA_VERSION}" \
        --target "$aws_target" \
        --file "$DOCKERFILE_AWS" \
        $PLATFORM \
        $tags \
        "$SOURCE_DIR" || {
        { set +x; } 2>/dev/null
        echo "ERROR: Failed to build AWS EFA image"
        exit 1
    }

    { set +x; } 2>/dev/null
    echo "$success_msg"
}

658

659
BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG"
660

Carsten Csiky's avatar
Carsten Csiky committed
661
if [ -n "${GITHUB_TOKEN}" ]; then
662
663
664
    BUILD_ARGS+=" --build-arg GITHUB_TOKEN=${GITHUB_TOKEN} "
fi

Carsten Csiky's avatar
Carsten Csiky committed
665
if [ -n "${GITLAB_TOKEN}" ]; then
666
667
668
    BUILD_ARGS+=" --build-arg GITLAB_TOKEN=${GITLAB_TOKEN} "
fi

669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688

check_wheel_file() {
    local wheel_dir="$1"
    # Check if directory exists
    if [ ! -d "$wheel_dir" ]; then
        echo "Error: Directory '$wheel_dir' does not exist"
        return 1
    fi

    # Look for .whl files
    wheel_count=$(find "$wheel_dir" -name "*.whl" | wc -l)

    if [ "$wheel_count" -eq 0 ]; then
        echo "WARN: No .whl files found in '$wheel_dir'"
        return 1
    elif [ "$wheel_count" -gt 1 ]; then
        echo "Warning: Multiple wheel files found in '$wheel_dir'. Will use first one found."
        find "$wheel_dir" -name "*.whl" | head -n 1
        return 0
    fi
689
690
    echo "Found $wheel_count wheel in $wheel_dir"
    return 0
691
692
}

693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
get_trtllm_version_from_pip_wheel() {
    local wheel_spec="$1"
    if [[ "$wheel_spec" =~ == ]]; then
        local version
        version=$(echo "$wheel_spec" | sed -n 's/.*==\([0-9a-zA-Z\.\-]*\).*/\1/p')
        if _is_semver_ref "$version"; then
            echo "${version#v}"
            return 0
        fi
    fi
    echo ""
    return 0
}

trtllm_version_incompatible() {
    local version="$1"
    for incompatible_version in "${TRTLLM_ABI_INCOMPATIBLE_VERSIONS[@]}"; do
        if [[ "$version" == "$incompatible_version" ]]; then
            return 0
        fi
    done
    return 1
}

_is_semver_ref() {
    local ref="$1"
    local semver_regex='^v?(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)\.(0|[1-9][0-9]*)([-+][0-9A-Za-z.-]+|[A-Za-z][0-9A-Za-z.-]+)?$'
    [[ "$ref" =~ $semver_regex ]]
}

get_github_trtllm_ref() {
    local commit="$1"
    if _is_semver_ref "$commit"; then
        if [[ "$commit" =~ ^v ]]; then
            echo "$commit"
        else
            echo "v${commit}"
        fi
        return 0
    fi
    echo "$commit"
    return 0
}

737
function determine_user_intention_trtllm() {
738
739
740
741
742
743
    # The tensorrt llm installation flags are not quite mutually exclusive
    # since the user should be able to point at a directory of their choosing
    # for storing a trtllm wheel built from source.
    #
    # This function attempts to discern the intention of the user by
    # applying checks, or rules, for each of the scenarios.
744
745
746
747
748
    #
    # /return: Calculated intention. One of "download", "install", "build".
    #
    # The three different methods of installing TRTLLM with build.sh are:
    # 1. Download
749
750
751
    # required: --tensorrtllm-pip-wheel
    # optional: --tensorrtllm-index-url
    # optional: --tensorrtllm-commit
752
753
    #
    # 2. Install from pre-built
754
755
    # required: --tensorrtllm-pip-wheel-dir
    # optional: --tensorrtllm-commit
756
757
    #
    # 3. Build from source
758
759
760
    # required: --tensorrtllm-git-url
    # optional: --tensorrtllm-commit
    # optional: --tensorrtllm-pip-wheel-dir
761
762
763
764
765
766
767
    local intention_download="false"
    local intention_install="false"
    local intention_build="false"
    local intention_count=0
    TRTLLM_INTENTION=${TRTLLM_INTENTION}

    # Install from pre-built
768
    if [[ -n "$TENSORRTLLM_PIP_WHEEL_DIR"  && ! -n "$TRTLLM_GIT_URL" ]]; then
769
770
771
        intention_install="true";
        intention_count=$((intention_count+1))
        TRTLLM_INTENTION="install"
772
    fi
773
    echo "  Intent to Install TRTLLM: $intention_install"
774

775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
    # Build from source
    if [[ -n "$TRTLLM_GIT_URL" ]]; then
        intention_build="true";
        intention_count=$((intention_count+1))
        TRTLLM_INTENTION="build"
    fi
    echo "  Intent to Build TRTLLM: $intention_build"

    # Download from repository
    if [[ -n "$TENSORRTLLM_INDEX_URL" ]] && [[ -n "$TENSORRTLLM_PIP_WHEEL" ]]; then
        intention_download="true";
        intention_count=$((intention_count+1));
        TRTLLM_INTENTION="download"
        echo "INFO: Installing $TENSORRTLLM_PIP_WHEEL trtllm version from index: $TENSORRTLLM_INDEX_URL"
    elif [[ -n "$TENSORRTLLM_PIP_WHEEL" ]]; then
        intention_download="true";
        intention_count=$((intention_count+1));
        TRTLLM_INTENTION="download"
        echo "INFO: Installing $TENSORRTLLM_PIP_WHEEL trtllm version from default pip index."
794
795
    fi

796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
    # If nothing is set then we default to downloading the wheel
    # with the defaults sepcified at the top this file.
    if [[ -z "${TENSORRTLLM_INDEX_URL}" ]] && [[ -z "${TENSORRTLLM_PIP_WHEEL}" ]] && [[ "${intention_count}" -eq 0 ]]; then
        intention_download="true";
        intention_count=$((intention_count+1))
        TRTLLM_INTENTION="download"
        echo "INFO: Inferring download because both TENSORRTLLM_PIP_WHEEL and TENSORRTLLM_INDEX_URL are not set."
    fi
    echo "  Intent to Download TRTLLM: $intention_download"

    if [[ ! "$intention_count" -eq 1 ]]; then
        echo -e "[ERROR] Could not figure out the trtllm installation intent from the current flags. Please check your build.sh command against the following"
        echo -e "  The grouped flags are mutually exclusive:"
        echo -e "  To download and install use both: --tensorrtllm-index-url, --tensorrtllm-pip-wheel"
        echo -e "  To install from a pre-built wheel use: --tensorrtllm-pip-wheel-dir"
        echo -e "  To build from source and install use both: --tensorrtllm-commit, --tensorrtllm-git-url"
        exit 1
813
    fi
814
815
}

816

817
818
819
820
821
822
823
if [[ $FRAMEWORK == "TRTLLM" ]]; then
    echo -e "Determining the user's TRTLLM installation intent..."
    determine_user_intention_trtllm   # From this point forward, can assume correct TRTLLM flags

    if [[ "$TRTLLM_INTENTION" == "download" ]]; then
        TENSORRTLLM_INDEX_URL=${TENSORRTLLM_INDEX_URL:-$DEFAULT_TENSORRTLLM_INDEX_URL}
        TENSORRTLLM_PIP_WHEEL=${TENSORRTLLM_PIP_WHEEL:-$DEFAULT_TENSORRTLLM_PIP_WHEEL}
824
825
826
827
828
829
830
831
832
833
834
835
        TRTLLM_WHEEL_VERSION=$(get_trtllm_version_from_pip_wheel "${TENSORRTLLM_PIP_WHEEL}")
        if trtllm_version_incompatible "${TRTLLM_WHEEL_VERSION}"; then
            TRTLLM_WHEEL_IMAGE="nvcr.io/nvidia/tensorrt-llm/release:${TRTLLM_WHEEL_VERSION}"
            BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=0"
            BUILD_ARGS+=" --build-arg TRTLLM_WHEEL_IMAGE=${TRTLLM_WHEEL_IMAGE}"
            PRINT_TRTLLM_WHEEL_FILE=${TRTLLM_WHEEL_IMAGE}
        else
            BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=0"
            BUILD_ARGS+=" --build-arg TENSORRTLLM_PIP_WHEEL=${TENSORRTLLM_PIP_WHEEL}"
            BUILD_ARGS+=" --build-arg TENSORRTLLM_INDEX_URL=${TENSORRTLLM_INDEX_URL}"
            PRINT_TRTLLM_WHEEL_FILE=${TENSORRTLLM_PIP_WHEEL}
        fi
836
837
        # Create a dummy directory to satisfy the build context requirement
        # There is no way to conditionally copy the build context in dockerfile.
838
839
        mkdir -p /tmp/trtllm_wheel_context
        BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=/tmp/trtllm_wheel_context"
840
841
842
843
844
845
846
847
    elif [[ "$TRTLLM_INTENTION" == "install" ]]; then
        echo "Checking for TensorRT-LLM wheel in ${TENSORRTLLM_PIP_WHEEL_DIR}"
        if ! check_wheel_file "${TENSORRTLLM_PIP_WHEEL_DIR}"; then
            echo "ERROR: Valid trtllm wheel file not found in ${TENSORRTLLM_PIP_WHEEL_DIR}"
            echo "      If this is not intended you can try building from source with the following variables set instead:"
            echo ""
            echo "      --tensorrtllm-git-url https://github.com/NVIDIA/TensorRT-LLM --tensorrtllm-commit $TRTLLM_COMMIT"
            exit 1
848
        fi
849
        echo "Installing TensorRT-LLM from local wheel directory"
850
        BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=1"
851
852
853
        BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=${TENSORRTLLM_PIP_WHEEL_DIR}"
        PRINT_TRTLLM_WHEEL_FILE=$(find $TENSORRTLLM_PIP_WHEEL_DIR -name "*.whl" | head -n 1)
    elif [[ "$TRTLLM_INTENTION" == "build" ]]; then
854
855
        TENSORRTLLM_PIP_WHEEL_DIR=${TENSORRTLLM_PIP_WHEEL_DIR:=$DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR}
        echo "TRTLLM pip wheel output directory is: ${TENSORRTLLM_PIP_WHEEL_DIR}"
856
        if [ "$DRY_RUN" != "true" ]; then
857
858
859
860
861
            GIT_URL_ARG=""
            if [ -n "${TRTLLM_GIT_URL}" ]; then
                GIT_URL_ARG="-u ${TRTLLM_GIT_URL}"
            fi
            if ! env -i ${SOURCE_DIR}/build_trtllm_wheel.sh -o ${TENSORRTLLM_PIP_WHEEL_DIR} -c ${TRTLLM_COMMIT} -a ${ARCH} -n ${NIXL_REF} ${GIT_URL_ARG}; then
862
863
                error "ERROR: Failed to build TensorRT-LLM wheel"
            fi
864
865
866
            BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=1"
            BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=${TENSORRTLLM_PIP_WHEEL_DIR}"
            PRINT_TRTLLM_WHEEL_FILE=$(find $TENSORRTLLM_PIP_WHEEL_DIR -name "*.whl" | head -n 1)
867
868
        fi
    else
869
870
871
        echo 'No intention was set. This error should have been detected in "determine_user_intention_trtllm()". Exiting...'
        exit 1
    fi
872

873
874
875
876
877
    # Need to know the commit of TRTLLM so we can determine the
    # TensorRT installation associated with TRTLLM.
    if [[ -z "$TRTLLM_COMMIT" ]]; then
        # Attempt to default since the commit will work with a hash or a tag/branch
        if [[ ! -z "$TENSORRTLLM_PIP_WHEEL" ]]; then
878
879
880
881
882
            TRTLLM_COMMIT=$(get_trtllm_version_from_pip_wheel "${TENSORRTLLM_PIP_WHEEL}")
            if [[ -z "$TRTLLM_COMMIT" ]]; then
                echo -e "[ERROR] Could not parse a semver version from TENSORRTLLM_PIP_WHEEL: ${TENSORRTLLM_PIP_WHEEL}"
                exit 1
            fi
883
884
885
886
887
888
889
            echo "Attempting to default TRTLLM_COMMIT to \"$TRTLLM_COMMIT\" for installation of TensorRT."
        else
            echo -e "[ERROR] TRTLLM framework was set as a target but the TRTLLM_COMMIT variable was not set."
            echo -e "  Could not find a suitible default by infering from TENSORRTLLM_PIP_WHEEL."
            echo -e "  TRTLLM_COMMIT is needed to install the correct version of TensorRT associated with TensorRT-LLM."
            exit 1
        fi
890
    fi
891
892
    GITHUB_TRTLLM_REF=$(get_github_trtllm_ref "${TRTLLM_COMMIT}")
    BUILD_ARGS+=" --build-arg GITHUB_TRTLLM_COMMIT=${GITHUB_TRTLLM_REF}"
893
894


895
896
fi

897
898
# ENABLE_KVBM: Used in base Dockerfile for block-manager feature.
#              Declared but not currently used in Dockerfile.{vllm,trtllm}.
899
# Force KVBM to be enabled for VLLM and TRTLLM frameworks
900
901
if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]]; then
    echo "Forcing enable_kvbm to true in ${FRAMEWORK} image build"
902
903
    ENABLE_KVBM=true
fi
904
# For other frameworks, ENABLE_KVBM defaults to false unless --enable-kvbm flag was provided
905

906
907
if [[ ${ENABLE_KVBM} == "true" ]]; then
    echo "Enabling KVBM in the dynamo image"
908
909
910
    BUILD_ARGS+=" --build-arg ENABLE_KVBM=${ENABLE_KVBM} "
fi

911
912
913
914
915
916
917
918
919
920
921
922
923
924
# ENABLE_GPU_MEMORY_SERVICE: Used in Dockerfiles for gpu_memory_service wheel.
#                            Declared but not currently used in Dockerfile.trtllm.
# Force GPU Memory Service to be enabled for VLLM and SGLANG frameworks
if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
    echo "Forcing enable_gpu_memory_service to true in ${FRAMEWORK} image build"
    ENABLE_GPU_MEMORY_SERVICE=true
fi
# For other frameworks, ENABLE_GPU_MEMORY_SERVICE defaults to false unless --enable-gpu-memory-service flag was provided

if [[ ${ENABLE_GPU_MEMORY_SERVICE} == "true" ]]; then
    echo "Enabling GPU Memory Service in the dynamo image"
    BUILD_ARGS+=" --build-arg ENABLE_GPU_MEMORY_SERVICE=${ENABLE_GPU_MEMORY_SERVICE} "
fi

925
926
927
928
929
930
931
932
933
934
935
936
# ENABLE_MEDIA_NIXL: Enable media processing with NIXL support
# Used in base Dockerfile for maturin build feature flag.
# Can be explicitly overridden with --enable-media-nixl flag
if [ -z "${ENABLE_MEDIA_NIXL}" ]; then
    if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
        ENABLE_MEDIA_NIXL=true
    else
        ENABLE_MEDIA_NIXL=false
    fi
fi
BUILD_ARGS+=" --build-arg ENABLE_MEDIA_NIXL=${ENABLE_MEDIA_NIXL} "

937
938
939
940
941
942
943
944
945
946
947
948
949
# ENABLE_MEDIA_FFMPEG: Enable media processing with FFMPEG support
# Used in base Dockerfile for maturin build feature flag.
# Can be explicitly overridden with --enable-media-ffmpeg flag
if [ -z "${ENABLE_MEDIA_FFMPEG}" ]; then
    if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
        ENABLE_MEDIA_FFMPEG=true
    else
        ENABLE_MEDIA_FFMPEG=false
    fi
fi
BUILD_ARGS+=" --build-arg ENABLE_MEDIA_FFMPEG=${ENABLE_MEDIA_FFMPEG} "

# NIXL_UCX_REF: Used in base Dockerfile only.
950
951
952
953
if [ -n "${NIXL_UCX_REF}" ]; then
    BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} "
fi

954
955
956
957
958
959
# NIXL_GDRCOPY_REF: Used in dynamo base stages.
if [ -n "${NIXL_GDRCOPY_REF}" ]; then
    BUILD_ARGS+=" --build-arg NIXL_GDRCOPY_REF=${NIXL_GDRCOPY_REF} "

fi

960
# MAX_JOBS is only used by Dockerfile.vllm
961
962
963
if [ -n "${MAX_JOBS}" ]; then
    BUILD_ARGS+=" --build-arg MAX_JOBS=${MAX_JOBS} "
fi
964

965
if [[ $FRAMEWORK == "SGLANG" ]]; then
966
967
    echo "Customizing Python, CUDA, and framework images for sglang images"
    BUILD_ARGS+=" --build-arg CUDA_VERSION=${SGLANG_CUDA_VERSION}"
968
fi
969
970
971

BUILD_ARGS+=" --build-arg PYTHON_VERSION=${PYTHON_VERSION}"

972
973
974
975
976
# Add sccache build arguments
if [ "$USE_SCCACHE" = true ]; then
    BUILD_ARGS+=" --build-arg USE_SCCACHE=true"
    BUILD_ARGS+=" --build-arg SCCACHE_BUCKET=${SCCACHE_BUCKET}"
    BUILD_ARGS+=" --build-arg SCCACHE_REGION=${SCCACHE_REGION}"
977
978
    BUILD_ARGS+=" --secret id=aws-key-id,env=AWS_ACCESS_KEY_ID"
    BUILD_ARGS+=" --secret id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY"
979
fi
980
981
982
983
if [[ "$PLATFORM" == *"linux/arm64"* && "${FRAMEWORK}" == "SGLANG" ]]; then
    # Add arguments required for sglang blackwell build
    BUILD_ARGS+=" --build-arg GRACE_BLACKWELL=true --build-arg BUILD_TYPE=blackwell_aarch64"
fi
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020

# Dev/local-dev targets: build from a concatenated Dockerfile:
#   <framework Dockerfile> + container/dev/Dockerfile.dev
if [[ -z "${TARGET:-}" || "${TARGET:-}" == "dev" || "${TARGET:-}" == "local-dev" ]]; then
    _gen_dev_dockerfile_temp() {
        local fw_df dev_df out
        fw_df="$1"
        dev_df="${SOURCE_DIR}/dev/Dockerfile.dev"
        if [[ ! -f "${fw_df}" ]]; then
            error "ERROR:" "Framework Dockerfile not found: ${fw_df}"
        fi
        if [[ ! -f "${dev_df}" ]]; then
            error "ERROR:" "Dev Dockerfile not found: ${dev_df}"
        fi

        out="$(mktemp -t dynamo-dev-combined.XXXXXX.Dockerfile)"
        cat "${fw_df}" "${dev_df}" > "${out}"
        printf '\n' >> "${out}"

        if [[ ! -s "${out}" ]]; then
            rm -f "${out}"
            error "ERROR:" "Temp Dockerfile was generated but is empty"
        fi
        printf '%s\n' "${out}"
    }

    DOCKERFILE="$(_gen_dev_dockerfile_temp "${DOCKERFILE}")"

    # Ensure we clean up the temp Dockerfile (opt-out with KEEP_DEV_DOCKERFILE_TEMP=1 for debugging).
    if [[ "${KEEP_DEV_DOCKERFILE_TEMP:-}" != "1" ]]; then
        trap 'rm -f "${DOCKERFILE}" 2>/dev/null || true' EXIT
    fi

    # Dockerfile.dev expects a lowercase framework string.
    BUILD_ARGS+=" --build-arg FRAMEWORK=${FRAMEWORK,,} "

    # Preserve historical tagging behavior for dev/local-dev (build.sh used to delegate out).
Ran Rubin's avatar
Ran Rubin committed
1021
    base="${PRIMARY_TAG}"
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
    base="${base%-runtime}"
    base="${base%-local-dev}"
    base="${base%-dev}"
    if [[ -z "${TARGET:-}" || "${TARGET}" == "dev" ]]; then
        TAG="--tag ${base}-dev"
    else
        TAG="--tag ${base}-local-dev"
        # Default UID/GID behavior: current user if not specified.
        if [[ -z "${CUSTOM_UID:-}" ]]; then
            CUSTOM_UID="$(id -u)"
        fi
        if [[ -z "${CUSTOM_GID:-}" ]]; then
            CUSTOM_GID="$(id -g)"
        fi
        BUILD_ARGS+=" --build-arg USER_UID=${CUSTOM_UID} --build-arg USER_GID=${CUSTOM_GID} "
    fi
fi

1040
1041
LATEST_TAG=""
if [ -z "${NO_TAG_LATEST}" ]; then
1042
1043
1044
1045
1046
1047
1048
1049
1050
    if [[ -z "${TARGET:-}" || "${TARGET}" == "dev" ]]; then
        LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
    elif [[ "${TARGET}" == "local-dev" ]]; then
        LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}-local-dev"
    else
        LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
        if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
            LATEST_TAG="${LATEST_TAG}-${TARGET}"
        fi
1051
    fi
1052
fi
1053

1054
1055
show_image_options

1056
1057
1058
1059
# Handle FRONTEND target: build EPP image first
if [[ ${TARGET^^} == "FRONTEND" ]]; then
    echo "Building FRONTEND image - requires EPP image"
    echo ""
1060
    echo "Building EPP image for Frontend using Makefile..."
1061

1062
1063
    # EPP directory with the new self-contained build
    EPP_DIR="${BUILD_CONTEXT}/deploy/inference-gateway/epp"
1064

1065
    # Set DOCKER_PROXY from ECR_HOSTNAME if available (for pulling base images through proxy)
1066
1067
    # This prevents rate-limiting when building in CI across multiple PRs
    DOCKER_PROXY_ARG=""
1068
    if [[ -n "${ECR_HOSTNAME}" ]]; then
1069
1070
        DOCKER_PROXY="${ECR_HOSTNAME}/dockerhub/"
        DOCKER_PROXY_ARG="DOCKER_PROXY=${DOCKER_PROXY}"
1071
1072
1073
        echo "Using DOCKER_PROXY: ${DOCKER_PROXY}"
    fi

1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
    # Build EPP image using the Makefile
    # The Makefile handles: building Dynamo library, building Docker image, loading it locally
    $RUN_PREFIX make -C "${EPP_DIR}" all DYNAMO_DIR="${BUILD_CONTEXT}" ${DOCKER_PROXY_ARG}

    # Compute EPP image tag (must match Makefile's IMAGE_TAG)
    # IMAGE_TAG = $(IMAGE_REPO):$(GIT_TAG)
    # IMAGE_REPO = $(DOCKER_SERVER)/$(IMAGE_NAME)
    # Image lives in local cache only, not pushed to any registry
    EPP_DOCKER_SERVER="dynamo"
    EPP_IMAGE_NAME="dynamo-epp"
    EPP_GIT_TAG=$(git describe --tags --dirty --always 2>/dev/null || echo "dev")
    EPP_IMAGE_TAG="${EPP_DOCKER_SERVER}/${EPP_IMAGE_NAME}:${EPP_GIT_TAG}"
1086
1087
1088
1089
1090
1091
1092

    echo "Successfully built EPP image: ${EPP_IMAGE_TAG}"

    # Add build args for frontend image
    BUILD_ARGS+=" --build-arg EPP_IMAGE=${EPP_IMAGE_TAG}"
fi

1093
1094
1095
1096
1097
# Always build the main image first
# Create build log directory for BuildKit reports
BUILD_LOG_DIR="${BUILD_CONTEXT}/build-logs"
mkdir -p "${BUILD_LOG_DIR}"
SINGLE_BUILD_LOG="${BUILD_LOG_DIR}/single-stage-build.log"
1098

1099
1100
1101
1102
1103
1104
# Determine --load flag (default on unless --no-load or --push specified)
LOAD_FLAG=""
if [ "$NO_LOAD" != "true" ] && [ -z "$PUSH" ]; then
    LOAD_FLAG=" --load"
fi

1105
1106
# Use BuildKit for enhanced metadata
if docker buildx version &>/dev/null; then
Ran Rubin's avatar
Ran Rubin committed
1107
    $RUN_PREFIX docker buildx build --progress=plain ${LOAD_FLAG} ${PUSH} -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${SINGLE_BUILD_LOG}"
1108
1109
1110
1111
    BUILD_EXIT_CODE=${PIPESTATUS[0]}
else
    $RUN_PREFIX DOCKER_BUILDKIT=1 docker build --progress=plain -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${SINGLE_BUILD_LOG}"
    BUILD_EXIT_CODE=${PIPESTATUS[0]}
1112
1113
fi

1114
1115
1116
if [ ${BUILD_EXIT_CODE} -ne 0 ]; then
    exit ${BUILD_EXIT_CODE}
fi
1117

Joe Chandler's avatar
Joe Chandler committed
1118
1119
1120
# Handle --make-efa flag: add AWS EFA layer on top of the built image
# This runs BEFORE local-dev so the flow is: dev -> dev-aws -> local-dev-aws
if [[ "${MAKE_EFA:-}" == "true" ]]; then
Ran Rubin's avatar
Ran Rubin committed
1121
1122
    # Get the base image that was just built (use PRIMARY_TAG to avoid parsing issues)
    BASE_IMAGE_FOR_EFA="${PRIMARY_TAG}"
Joe Chandler's avatar
Joe Chandler committed
1123
1124
1125
1126
1127
1128

    # Determine the EFA stage based on the target
    # runtime target -> runtime-aws stage
    # dev/local-dev target -> dev-aws stage
    if [[ "${TARGET:-dev}" == "runtime" ]]; then
        EFA_STAGE="runtime-aws"
1129
    else
Joe Chandler's avatar
Joe Chandler committed
1130
        EFA_STAGE="dev-aws"
1131
1132
    fi

Joe Chandler's avatar
Joe Chandler committed
1133
1134
    # Build AWS tags by appending -aws to existing tags
    AWS_TAGS=""
1135
    if [[ -n "$TAG" ]]; then
Joe Chandler's avatar
Joe Chandler committed
1136
1137
        AWS_TAG=$(echo "$TAG" | sed 's/--tag //')
        AWS_TAGS+=" --tag ${AWS_TAG}-aws"
1138
1139
    fi
    if [[ -n "$LATEST_TAG" ]]; then
Joe Chandler's avatar
Joe Chandler committed
1140
1141
        AWS_LATEST_TAG=$(echo "$LATEST_TAG" | sed 's/--tag //')
        AWS_TAGS+=" --tag ${AWS_LATEST_TAG}-aws"
1142
1143
    fi

Joe Chandler's avatar
Joe Chandler committed
1144
1145
1146
    build_aws_with_header "$BASE_IMAGE_FOR_EFA" "$AWS_TAGS" "$EFA_STAGE" "Successfully built ${EFA_STAGE} image"
fi

Ran Rubin's avatar
Ran Rubin committed
1147
{ set +x; } 2>/dev/null