"lib/vscode:/vscode.git/clone" did not exist on "7aa8e0e6fbc28634cb1065334439f396704f6c08"
build.sh 32.3 KB
Newer Older
Carsten Csiky's avatar
Carsten Csiky committed
1
#!/usr/bin/env bash
Neelay Shah's avatar
Neelay Shah committed
2
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
# SPDX-License-Identifier: Apache-2.0
4
5
6
7
8
9
10
11
12
13
14
15
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
16

17
18
19
20
21
if [ "${BASH_VERSINFO[0]}" -lt 4 ]; then
    echo "Error: Bash version 4.0 or higher is required. Current version: ${BASH_VERSINFO[0]}.${BASH_VERSINFO[1]}"
    exit 1
fi

Carsten Csiky's avatar
Carsten Csiky committed
22
set -e
23

24
25
26
TAG=
RUN_PREFIX=
PLATFORM=linux/amd64
27
28

# Get short commit hash
29
commit_id=${commit_id:-$(git rev-parse --short HEAD)}
30

31
# if COMMIT_ID matches a TAG use that
32
current_tag=${current_tag:-$($(git describe --tags --exact-match 2>/dev/null | sed 's/^v//') || true)}
33

34
# Get latest TAG and add COMMIT_ID for dev
35
latest_tag=${latest_tag:-$(git describe --tags --abbrev=0 "$(git rev-list --tags --max-count=1)" | sed 's/^v//' || true)}
36
37
38
39
if [[ -z ${latest_tag} ]]; then
    latest_tag="0.0.1"
    echo "No git release tag found, setting to unknown version: ${latest_tag}"
fi
40

41
42
43
44
# Use tag if available, otherwise use latest_tag.dev.commit_id
VERSION=v${current_tag:-$latest_tag.dev.$commit_id}

PYTHON_PACKAGE_VERSION=${current_tag:-$latest_tag.dev+$commit_id}
45
46
47
48
49
50

# Frameworks
#
# Each framework has a corresponding base image.  Additional
# dependencies are specified in the /container/deps folder and
# installed within framework specific sections of the Dockerfile.
51

52
declare -A FRAMEWORKS=(["VLLM"]=1 ["TRTLLM"]=2 ["NONE"]=3 ["SGLANG"]=4)
Ryan Olson's avatar
Ryan Olson committed
53

54
DEFAULT_FRAMEWORK=VLLM
55
56
57
58
59
60

SOURCE_DIR=$(dirname "$(readlink -f "$0")")
DOCKERFILE=${SOURCE_DIR}/Dockerfile
BUILD_CONTEXT=$(dirname "$(readlink -f "$SOURCE_DIR")")

# Base Images
61
TRTLLM_BASE_IMAGE=nvcr.io/nvidia/pytorch
62
TRTLLM_BASE_IMAGE_TAG=25.10-py3
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86

# Important Note: Because of ABI compatibility issues between TensorRT-LLM and NGC PyTorch,
# we need to build the TensorRT-LLM wheel from source.
#
# There are two ways to build the dynamo image with TensorRT-LLM.
# 1. Use the local TensorRT-LLM wheel directory.
# 2. Use the TensorRT-LLM wheel on artifactory.
#
# If using option 1, the TENSORRTLLM_PIP_WHEEL_DIR must be a path to a directory
# containing TensorRT-LLM wheel file along with commit.txt file with the
# <arch>_<commit ID> as contents. If no valid trtllm wheel is found, the script
# will attempt to build the wheel from source and store the built wheel in the
# specified directory. TRTLLM_COMMIT from the TensorRT-LLM main branch will be
# used to build the wheel.
#
# If using option 2, the TENSORRTLLM_PIP_WHEEL must be the TensorRT-LLM wheel
# package that will be installed from the specified TensorRT-LLM PyPI Index URL.
# This option will ignore the TRTLLM_COMMIT option. As the TensorRT-LLM wheel from PyPI
# is not ABI compatible with NGC PyTorch, you can use TENSORRTLLM_INDEX_URL to specify
# a private PyPI index URL which has your pre-built TensorRT-LLM wheel.
#
# By default, we will use option 1. If you want to use option 2, you can set
# TENSORRTLLM_PIP_WHEEL to the TensorRT-LLM wheel on artifactory.
#
87
DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR="/tmp/trtllm_wheel/"
88

89
90
91
# TensorRT-LLM commit to use for building the trtllm wheel if not provided.
# Important Note: This commit is not used in our CI pipeline. See the CI
# variables to learn how to run a pipeline with a specific commit.
92
DEFAULT_EXPERIMENTAL_TRTLLM_COMMIT="e4c707845ff58fcc0b1d87afb4dd0e64885c780a" # 1.2.0rc5
93
TRTLLM_COMMIT=""
94
TRTLLM_USE_NIXL_KVCACHE_EXPERIMENTAL="0"
95
TRTLLM_GIT_URL=""
96

97
# TensorRT-LLM PyPI index URL
98
DEFAULT_TENSORRTLLM_INDEX_URL="https://pypi.nvidia.com/"
99
# TODO: Remove the version specification from here and use the ai-dynamo[trtllm] package.
100
# Need to update the Dockerfile.trtllm to use the ai-dynamo[trtllm] package.
101
DEFAULT_TENSORRTLLM_PIP_WHEEL="tensorrt-llm==1.2.0rc5"
102
103
TENSORRTLLM_PIP_WHEEL=""

104
VLLM_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
105
# FIXME: OPS-612 NCCL will hang with 25.03, so use 25.01 for now
106
107
108
# Please check https://github.com/ai-dynamo/dynamo/pull/1065
# for details and reproducer to manually test if the image
# can be updated to later versions.
109
VLLM_BASE_IMAGE_TAG="25.04-cuda12.9-devel-ubuntu24.04"
110

111
112
NONE_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
NONE_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
113

114
115
SGLANG_CUDA_VERSION="12.9.1"
# This is for Dockerfile
116
117
SGLANG_BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base"
SGLANG_BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
118
119
120
# This is for Dockerfile.sglang. Unlike the other frameworks, it is using a different base image
SGLANG_FRAMEWORK_IMAGE="nvcr.io/nvidia/cuda"
SGLANG_FRAMEWORK_IMAGE_TAG="${SGLANG_CUDA_VERSION}-cudnn-devel-ubuntu24.04"
121

122
NIXL_REF=0.7.1
123
124
NIXL_UCX_REF=v1.19.0
NIXL_UCX_EFA_REF=9d2b88a1f67faf9876f267658bd077b379b8bb76
125
NIXL_GDRCOPY_REF=v2.5.1
126

127
128
NO_CACHE=""

129
130
131
132
# KVBM (KV Cache Block Manager) - default disabled, enabled automatically for VLLM/TRTLLM
# or can be explicitly enabled via --enable-kvbm flag
ENABLE_KVBM=false

133
134
135
136
137
# sccache configuration for S3
USE_SCCACHE=""
SCCACHE_BUCKET=""
SCCACHE_REGION=""

138
139
140
141
142
143
144
get_options() {
    while :; do
        case $1 in
        -h | -\? | --help)
            show_help
            exit
            ;;
145
        --platform)
146
147
148
149
            if [ "$2" ]; then
                PLATFORM=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
150
                missing_requirement "$1"
151
152
            fi
            ;;
153
        --framework)
154
155
156
157
            if [ "$2" ]; then
                FRAMEWORK=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
158
                missing_requirement "$1"
159
160
            fi
            ;;
161
        --tensorrtllm-pip-wheel-dir)
162
            if [ "$2" ]; then
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
                TENSORRTLLM_PIP_WHEEL_DIR=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
        --tensorrtllm-commit)
            if [ "$2" ]; then
                TRTLLM_COMMIT=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
        --tensorrtllm-pip-wheel)
            if [ "$2" ]; then
                TENSORRTLLM_PIP_WHEEL=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
        --tensorrtllm-index-url)
            if [ "$2" ]; then
                TENSORRTLLM_INDEX_URL=$2
188
189
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
190
                missing_requirement "$1"
191
192
            fi
            ;;
193
194
195
196
197
198
199
200
        --tensorrtllm-git-url)
            if [ "$2" ]; then
                TRTLLM_GIT_URL=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
201
202
203
204
205
        --base-image)
            if [ "$2" ]; then
                BASE_IMAGE=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
206
                missing_requirement "$1"
207
208
            fi
            ;;
209
        --base-image-tag)
210
211
212
213
            if [ "$2" ]; then
                BASE_IMAGE_TAG=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
214
                missing_requirement "$1"
215
216
217
218
219
220
221
            fi
            ;;
        --target)
            if [ "$2" ]; then
                TARGET=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
222
                missing_requirement "$1"
223
224
            fi
            ;;
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
        --uid)
            if [ "$2" ]; then
                CUSTOM_UID=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
        --gid)
            if [ "$2" ]; then
                CUSTOM_GID=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
241
242
243
244
245
        --build-arg)
            if [ "$2" ]; then
                BUILD_ARGS+="--build-arg $2 "
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
246
                missing_requirement "$1"
247
248
249
250
            fi
            ;;
        --tag)
            if [ "$2" ]; then
251
                TAG="--tag $2"
252
253
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
254
                missing_requirement "$1"
255
256
257
258
            fi
            ;;
        --dry-run)
            RUN_PREFIX="echo"
259
            DRY_RUN="true"
260
261
262
263
264
265
            echo ""
            echo "=============================="
            echo "DRY RUN: COMMANDS PRINTED ONLY"
            echo "=============================="
            echo ""
            ;;
266
267
        --no-cache)
            NO_CACHE=" --no-cache"
268
            ;;
269
270
        --cache-from)
            if [ "$2" ]; then
271
                CACHE_FROM+="--cache-from $2 "
272
273
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
274
                missing_requirement "$1"
275
276
            fi
            ;;
277
278
        --cache-to)
            if [ "$2" ]; then
279
                CACHE_TO+="--cache-to $2 "
280
281
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
282
                missing_requirement "$1"
283
284
            fi
            ;;
ptarasiewiczNV's avatar
ptarasiewiczNV committed
285
286
287
288
289
        --build-context)
            if [ "$2" ]; then
                BUILD_CONTEXT_ARG="--build-context $2"
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
290
                missing_requirement "$1"
ptarasiewiczNV's avatar
ptarasiewiczNV committed
291
292
            fi
            ;;
293
294
295
        --enable-kvbm)
            ENABLE_KVBM=true
            ;;
296
297
298
        --enable-media-nixl)
            ENABLE_MEDIA_NIXL=true
            ;;
299
300
301
        --make-efa)
            NIXL_UCX_REF=$NIXL_UCX_EFA_REF
            ;;
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
        --use-sccache)
            USE_SCCACHE=true
            ;;
        --sccache-bucket)
            if [ "$2" ]; then
                SCCACHE_BUCKET=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;

        --sccache-region)
            if [ "$2" ]; then
                SCCACHE_REGION=$2
                shift
            else
                missing_requirement "$1"
            fi
321
            ;;
322
        --vllm-max-jobs)
323
            # Set MAX_JOBS for vLLM compilation (only used by Dockerfile.vllm)
324
325
326
327
328
329
330
            if [ "$2" ]; then
                MAX_JOBS=$2
                shift
            else
                missing_requirement "$1"
            fi
            ;;
331
332
333
        --no-tag-latest)
            NO_TAG_LATEST=true
            ;;
334
         -?*)
Carsten Csiky's avatar
Carsten Csiky committed
335
            error 'ERROR: Unknown option: ' "$1"
336
            ;;
337
         ?*)
Carsten Csiky's avatar
Carsten Csiky committed
338
            error 'ERROR: Unknown option: ' "$1"
339
340
341
342
343
344
345
346
            ;;
        *)
            break
            ;;
        esac
        shift
    done

347
    # Validate that --uid and --gid are only used with local-dev target
348
    if [[ -n "${CUSTOM_UID:-}" || -n "${CUSTOM_GID:-}" ]]; then
349
350
        if [[ "${TARGET:-}" != "local-dev" ]]; then
            error "ERROR: --uid and --gid can only be used with --target local-dev"
351
352
353
        fi
    fi

354
    if [ -z "$FRAMEWORK" ]; then
355
        FRAMEWORK=$DEFAULT_FRAMEWORK
356
357
    fi

Carsten Csiky's avatar
Carsten Csiky committed
358
    if [ -n "$FRAMEWORK" ]; then
359
        FRAMEWORK=${FRAMEWORK^^}
360

Carsten Csiky's avatar
Carsten Csiky committed
361
362
        if [[ -z "${FRAMEWORKS[$FRAMEWORK]}" ]]; then
            error 'ERROR: Unknown framework: ' "$FRAMEWORK"
363
        fi
364

Carsten Csiky's avatar
Carsten Csiky committed
365
        if [ -z "$BASE_IMAGE_TAG" ]; then
366
367
368
            BASE_IMAGE_TAG=${FRAMEWORK}_BASE_IMAGE_TAG
            BASE_IMAGE_TAG=${!BASE_IMAGE_TAG}
        fi
369

Carsten Csiky's avatar
Carsten Csiky committed
370
        if [ -z "$BASE_IMAGE" ]; then
371
372
373
            BASE_IMAGE=${FRAMEWORK}_BASE_IMAGE
            BASE_IMAGE=${!BASE_IMAGE}
        fi
374

Carsten Csiky's avatar
Carsten Csiky committed
375
        if [ -z "$BASE_IMAGE" ]; then
376
377
            error "ERROR: Framework $FRAMEWORK without BASE_IMAGE"
        fi
378

379
380
        BASE_VERSION=${FRAMEWORK}_BASE_VERSION
        BASE_VERSION=${!BASE_VERSION}
381
382
383
384

    fi

    if [ -z "$TAG" ]; then
385
        TAG="--tag dynamo:${VERSION}-${FRAMEWORK,,}"
386
        if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
387
388
            TAG="${TAG}-${TARGET}"
        fi
389
390
    fi

Carsten Csiky's avatar
Carsten Csiky committed
391
    if [ -n "$PLATFORM" ]; then
392
393
394
        PLATFORM="--platform ${PLATFORM}"
    fi

Carsten Csiky's avatar
Carsten Csiky committed
395
    if [ -n "$TARGET" ]; then
396
        TARGET_STR="--target ${TARGET}"
397
    else
398
        TARGET_STR="--target dev"
399
    fi
400
401
402
403
404
405
406
407
408
409

    # Validate sccache configuration
    if [ "$USE_SCCACHE" = true ]; then
        if [ -z "$SCCACHE_BUCKET" ]; then
            error "ERROR: --sccache-bucket is required when --use-sccache is specified"
        fi
        if [ -z "$SCCACHE_REGION" ]; then
            error "ERROR: --sccache-region is required when --use-sccache is specified"
        fi
    fi
410
411
412
413
414
}


show_image_options() {
    echo ""
415
    echo "Building Dynamo Image: '${TAG}'"
416
417
418
    echo ""
    echo "   Base: '${BASE_IMAGE}'"
    echo "   Base_Image_Tag: '${BASE_IMAGE_TAG}'"
419
    if [[ $FRAMEWORK == "TRTLLM" ]]; then
420
        echo "   Tensorrtllm_Pip_Wheel: '${PRINT_TRTLLM_WHEEL_FILE}'"
421
422
423
424
    fi
    echo "   Build Context: '${BUILD_CONTEXT}'"
    echo "   Build Arguments: '${BUILD_ARGS}'"
    echo "   Framework: '${FRAMEWORK}'"
425
426
427
428
429
430
431
432
433
    if [ "$USE_SCCACHE" = true ]; then
        echo "   sccache: Enabled"
        echo "   sccache Bucket: '${SCCACHE_BUCKET}'"
        echo "   sccache Region: '${SCCACHE_REGION}'"

        if [ -n "$SCCACHE_S3_KEY_PREFIX" ]; then
            echo "   sccache S3 Key Prefix: '${SCCACHE_S3_KEY_PREFIX}'"
        fi
    fi
434
435
436
437
438
    echo ""
}

show_help() {
    echo "usage: build.sh"
439
    echo "  [--base-image base image]"
Carsten Csiky's avatar
Carsten Csiky committed
440
    echo "  [--base-image-tag base image tag]"
441
    echo "  [--platform platform for docker build]"
Carsten Csiky's avatar
Carsten Csiky committed
442
    echo "  [--framework framework one of ${!FRAMEWORKS[*]}]"
443
    echo "  [--tensorrtllm-pip-wheel-dir path to tensorrtllm pip wheel directory]"
444
    echo "  [--tensorrtllm-commit tensorrtllm commit/tag/branch to use for building the trtllm wheel if the wheel is not provided]"
445
446
    echo "  [--tensorrtllm-pip-wheel tensorrtllm pip wheel on artifactory]"
    echo "  [--tensorrtllm-index-url tensorrtllm PyPI index URL if providing the wheel from artifactory]"
447
    echo "  [--tensorrtllm-git-url tensorrtllm git repository URL for cloning]"
448
    echo "  [--build-arg additional build args to pass to docker build]"
449
450
    echo "  [--cache-from cache location to start from]"
    echo "  [--cache-to location where to cache the build output]"
451
    echo "  [--tag tag for image]"
452
453
    echo "  [--uid user ID for local-dev images (only with --target local-dev)]"
    echo "  [--gid group ID for local-dev images (only with --target local-dev)]"
454
455
    echo "  [--no-cache disable docker build cache]"
    echo "  [--dry-run print docker commands without running]"
ptarasiewiczNV's avatar
ptarasiewiczNV committed
456
    echo "  [--build-context name=path to add build context]"
457
458
    echo "  [--release-build perform a release build]"
    echo "  [--make-efa Enables EFA support for NIXL]"
459
    echo "  [--enable-kvbm Enables KVBM support in Python 3.12]"
460
    echo "  [--enable-media-nixl Enable media processing with NIXL support (default: true for frameworks, false for none)]"
461
462
463
    echo "  [--use-sccache enable sccache for Rust/C/C++ compilation caching]"
    echo "  [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]"
    echo "  [--sccache-region S3 region for sccache (required with --use-sccache)]"
464
    echo "  [--vllm-max-jobs number of parallel jobs for compilation (only used by vLLM framework)]"
465
    echo "  [--no-tag-latest do not add latest-{framework} tag to built image]"
466
467
468
469
    echo ""
    echo "  Note: When using --use-sccache, AWS credentials must be set:"
    echo "        export AWS_ACCESS_KEY_ID=your_access_key"
    echo "        export AWS_SECRET_ACCESS_KEY=your_secret_key"
470
471
472
473
474
475
476
477
478
479
480
481
482
483
    exit 0
}

missing_requirement() {
    error "ERROR: $1 requires an argument."
}

error() {
    printf '%s %s\n' "$1" "$2" >&2
    exit 1
}

get_options "$@"

484
# Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64
485
ARCH="amd64"
486
if [[ "$PLATFORM" == *"linux/arm64"* ]]; then
487
    ARCH="arm64"
488
489
490
    BUILD_ARGS+=" --build-arg ARCH=arm64 --build-arg ARCH_ALT=aarch64 "
fi

491
492
493
494
# Set the commit sha in the container so we can inspect what build this relates to
DYNAMO_COMMIT_SHA=${DYNAMO_COMMIT_SHA:-$(git rev-parse HEAD)}
BUILD_ARGS+=" --build-arg DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA "

495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
# Special handling for vLLM on ARM64 - set required defaults if not already specified by user
if [[ $FRAMEWORK == "VLLM" ]] && [[ "$PLATFORM" == *"linux/arm64"* ]]; then
    # Set base image tag to CUDA 12.9 if using the default value (user didn't override)
    if [ "$BASE_IMAGE_TAG" == "$VLLM_BASE_IMAGE_TAG" ]; then
        BASE_IMAGE_TAG="25.06-cuda12.9-devel-ubuntu24.04"
        echo "INFO: Automatically setting base-image-tag to $BASE_IMAGE_TAG for vLLM ARM64"
    fi

    # Add required build args if not already present
    if [[ "$BUILD_ARGS" != *"RUNTIME_IMAGE_TAG"* ]]; then
        BUILD_ARGS+=" --build-arg RUNTIME_IMAGE_TAG=12.9.0-runtime-ubuntu24.04 "
        echo "INFO: Automatically setting RUNTIME_IMAGE_TAG=12.9.0-runtime-ubuntu24.04 for vLLM ARM64"
    fi

    if [[ "$BUILD_ARGS" != *"CUDA_VERSION"* ]]; then
        BUILD_ARGS+=" --build-arg CUDA_VERSION=129 "
        echo "INFO: Automatically setting CUDA_VERSION=129 for vLLM ARM64"
    fi

    if [[ "$BUILD_ARGS" != *"TORCH_BACKEND"* ]]; then
        BUILD_ARGS+=" --build-arg TORCH_BACKEND=cu129 "
        echo "INFO: Automatically setting TORCH_BACKEND=cu129 for vLLM ARM64"
    fi

fi

521
522
523
# Update DOCKERFILE if framework is VLLM
if [[ $FRAMEWORK == "VLLM" ]]; then
    DOCKERFILE=${SOURCE_DIR}/Dockerfile.vllm
524
525
elif [[ $FRAMEWORK == "TRTLLM" ]]; then
    DOCKERFILE=${SOURCE_DIR}/Dockerfile.trtllm
526
elif [[ $FRAMEWORK == "NONE" ]]; then
527
    DOCKERFILE=${SOURCE_DIR}/Dockerfile
528
529
elif [[ $FRAMEWORK == "SGLANG" ]]; then
    DOCKERFILE=${SOURCE_DIR}/Dockerfile.sglang
530
531
fi

532
533
# Add NIXL_REF as a build argument
BUILD_ARGS+=" --build-arg NIXL_REF=${NIXL_REF} "
534

535
# Function to build local-dev image
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
build_local_dev_with_header() {
    local dev_base_image="$1"
    local tags="$2"
    local success_msg="$3"
    local header_title="$4"

    # Get user info right before using it
    USER_UID=${CUSTOM_UID:-$(id -u)}
    USER_GID=${CUSTOM_GID:-$(id -g)}

    # Set up dockerfile path
    DOCKERFILE_LOCAL_DEV="${SOURCE_DIR}/Dockerfile.local_dev"

    if [[ ! -f "$DOCKERFILE_LOCAL_DEV" ]]; then
        echo "ERROR: Dockerfile.local_dev not found at: $DOCKERFILE_LOCAL_DEV"
        exit 1
    fi

554
555
    echo ""
    echo "Now building new local-dev image from: $dev_base_image"
556
    echo "User 'dynamo' will have UID: $USER_UID, GID: $USER_GID"
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581

    # Show the docker command being executed if not in dry-run mode
    if [ -z "$RUN_PREFIX" ]; then
        set -x
    fi

    $RUN_PREFIX docker build \
        --build-arg DEV_BASE="$dev_base_image" \
        --build-arg USER_UID="$USER_UID" \
        --build-arg USER_GID="$USER_GID" \
        --build-arg ARCH="$ARCH" \
        --file "$DOCKERFILE_LOCAL_DEV" \
        $tags \
        "$SOURCE_DIR" || {
        { set +x; } 2>/dev/null
        echo "ERROR: Failed to build local_dev image"
        exit 1
    }

    { set +x; } 2>/dev/null
    echo "$success_msg"

    # Show usage instructions
    echo ""
    echo "To run the local-dev image as the local user ($USER_UID/$USER_GID):"
582
583
    # Extract the first tag from the tags string (the full version tag, not the latest tag)
    last_tag=$(echo "$tags" | grep -o -- '--tag [^ ]*' | head -1 | cut -d' ' -f2)
584
585
586
587
588
589
590
591
592
593
594
595
    # Calculate relative path to run.sh from current working directory
    # Get the directory where build.sh is located
    build_dir="$(dirname "${BASH_SOURCE[0]}")"
    # Get the absolute path to run.sh (in the same directory as build.sh)
    run_abs_path="$(realpath "$build_dir/run.sh")"
    # Calculate relative path from current PWD to run.sh
    run_path="$(python3 -c "import os; print(os.path.relpath('$run_abs_path', '$PWD'))")"
    echo "  $run_path --image $last_tag --mount-workspace ..."
}


# Handle local-dev target
596
if [[ $TARGET == "local-dev" ]]; then
597
598
    LOCAL_DEV_BUILD=true
    TARGET_STR="--target dev"
599
600
fi

601
602
# BUILD DEV IMAGE

603
BUILD_ARGS+=" --build-arg BASE_IMAGE=$BASE_IMAGE --build-arg BASE_IMAGE_TAG=$BASE_IMAGE_TAG"
604

Carsten Csiky's avatar
Carsten Csiky committed
605
if [ -n "${GITHUB_TOKEN}" ]; then
606
607
608
    BUILD_ARGS+=" --build-arg GITHUB_TOKEN=${GITHUB_TOKEN} "
fi

Carsten Csiky's avatar
Carsten Csiky committed
609
if [ -n "${GITLAB_TOKEN}" ]; then
610
611
612
    BUILD_ARGS+=" --build-arg GITLAB_TOKEN=${GITLAB_TOKEN} "
fi

613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632

check_wheel_file() {
    local wheel_dir="$1"
    # Check if directory exists
    if [ ! -d "$wheel_dir" ]; then
        echo "Error: Directory '$wheel_dir' does not exist"
        return 1
    fi

    # Look for .whl files
    wheel_count=$(find "$wheel_dir" -name "*.whl" | wc -l)

    if [ "$wheel_count" -eq 0 ]; then
        echo "WARN: No .whl files found in '$wheel_dir'"
        return 1
    elif [ "$wheel_count" -gt 1 ]; then
        echo "Warning: Multiple wheel files found in '$wheel_dir'. Will use first one found."
        find "$wheel_dir" -name "*.whl" | head -n 1
        return 0
    fi
633
634
    echo "Found $wheel_count wheel in $wheel_dir"
    return 0
635
636
}

637
function determine_user_intention_trtllm() {
638
639
640
641
642
643
    # The tensorrt llm installation flags are not quite mutually exclusive
    # since the user should be able to point at a directory of their choosing
    # for storing a trtllm wheel built from source.
    #
    # This function attempts to discern the intention of the user by
    # applying checks, or rules, for each of the scenarios.
644
645
646
647
648
    #
    # /return: Calculated intention. One of "download", "install", "build".
    #
    # The three different methods of installing TRTLLM with build.sh are:
    # 1. Download
649
650
651
    # required: --tensorrtllm-pip-wheel
    # optional: --tensorrtllm-index-url
    # optional: --tensorrtllm-commit
652
653
    #
    # 2. Install from pre-built
654
655
    # required: --tensorrtllm-pip-wheel-dir
    # optional: --tensorrtllm-commit
656
657
    #
    # 3. Build from source
658
659
660
    # required: --tensorrtllm-git-url
    # optional: --tensorrtllm-commit
    # optional: --tensorrtllm-pip-wheel-dir
661
662
663
664
665
666
667
    local intention_download="false"
    local intention_install="false"
    local intention_build="false"
    local intention_count=0
    TRTLLM_INTENTION=${TRTLLM_INTENTION}

    # Install from pre-built
668
    if [[ -n "$TENSORRTLLM_PIP_WHEEL_DIR"  && ! -n "$TRTLLM_GIT_URL" ]]; then
669
670
671
        intention_install="true";
        intention_count=$((intention_count+1))
        TRTLLM_INTENTION="install"
672
    fi
673
    echo "  Intent to Install TRTLLM: $intention_install"
674

675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
    # Build from source
    if [[ -n "$TRTLLM_GIT_URL" ]]; then
        intention_build="true";
        intention_count=$((intention_count+1))
        TRTLLM_INTENTION="build"
    fi
    echo "  Intent to Build TRTLLM: $intention_build"

    # Download from repository
    if [[ -n "$TENSORRTLLM_INDEX_URL" ]] && [[ -n "$TENSORRTLLM_PIP_WHEEL" ]]; then
        intention_download="true";
        intention_count=$((intention_count+1));
        TRTLLM_INTENTION="download"
        echo "INFO: Installing $TENSORRTLLM_PIP_WHEEL trtllm version from index: $TENSORRTLLM_INDEX_URL"
    elif [[ -n "$TENSORRTLLM_PIP_WHEEL" ]]; then
        intention_download="true";
        intention_count=$((intention_count+1));
        TRTLLM_INTENTION="download"
        echo "INFO: Installing $TENSORRTLLM_PIP_WHEEL trtllm version from default pip index."
694
695
    fi

696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
    # If nothing is set then we default to downloading the wheel
    # with the defaults sepcified at the top this file.
    if [[ -z "${TENSORRTLLM_INDEX_URL}" ]] && [[ -z "${TENSORRTLLM_PIP_WHEEL}" ]] && [[ "${intention_count}" -eq 0 ]]; then
        intention_download="true";
        intention_count=$((intention_count+1))
        TRTLLM_INTENTION="download"
        echo "INFO: Inferring download because both TENSORRTLLM_PIP_WHEEL and TENSORRTLLM_INDEX_URL are not set."
    fi
    echo "  Intent to Download TRTLLM: $intention_download"

    if [[ ! "$intention_count" -eq 1 ]]; then
        echo -e "[ERROR] Could not figure out the trtllm installation intent from the current flags. Please check your build.sh command against the following"
        echo -e "  The grouped flags are mutually exclusive:"
        echo -e "  To download and install use both: --tensorrtllm-index-url, --tensorrtllm-pip-wheel"
        echo -e "  To install from a pre-built wheel use: --tensorrtllm-pip-wheel-dir"
        echo -e "  To build from source and install use both: --tensorrtllm-commit, --tensorrtllm-git-url"
        exit 1
713
    fi
714
715
}

716

717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
if [[ $FRAMEWORK == "TRTLLM" ]]; then
    echo -e "Determining the user's TRTLLM installation intent..."
    determine_user_intention_trtllm   # From this point forward, can assume correct TRTLLM flags

    if [[ "$TRTLLM_INTENTION" == "download" ]]; then
        TENSORRTLLM_INDEX_URL=${TENSORRTLLM_INDEX_URL:-$DEFAULT_TENSORRTLLM_INDEX_URL}
        TENSORRTLLM_PIP_WHEEL=${TENSORRTLLM_PIP_WHEEL:-$DEFAULT_TENSORRTLLM_PIP_WHEEL}
        BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=0"
        BUILD_ARGS+=" --build-arg TENSORRTLLM_PIP_WHEEL=${TENSORRTLLM_PIP_WHEEL}"
        BUILD_ARGS+=" --build-arg TENSORRTLLM_INDEX_URL=${TENSORRTLLM_INDEX_URL}"

        # Create a dummy directory to satisfy the build context requirement
        # There is no way to conditionally copy the build context in dockerfile.
        mkdir -p /tmp/dummy_dir
        BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=/tmp/dummy_dir"
        PRINT_TRTLLM_WHEEL_FILE=${TENSORRTLLM_PIP_WHEEL}
    elif [[ "$TRTLLM_INTENTION" == "install" ]]; then
        echo "Checking for TensorRT-LLM wheel in ${TENSORRTLLM_PIP_WHEEL_DIR}"
        if ! check_wheel_file "${TENSORRTLLM_PIP_WHEEL_DIR}"; then
            echo "ERROR: Valid trtllm wheel file not found in ${TENSORRTLLM_PIP_WHEEL_DIR}"
            echo "      If this is not intended you can try building from source with the following variables set instead:"
            echo ""
            echo "      --tensorrtllm-git-url https://github.com/NVIDIA/TensorRT-LLM --tensorrtllm-commit $TRTLLM_COMMIT"
            exit 1
741
        fi
742
        echo "Installing TensorRT-LLM from local wheel directory"
743
        BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=1"
744
745
746
        BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=${TENSORRTLLM_PIP_WHEEL_DIR}"
        PRINT_TRTLLM_WHEEL_FILE=$(find $TENSORRTLLM_PIP_WHEEL_DIR -name "*.whl" | head -n 1)
    elif [[ "$TRTLLM_INTENTION" == "build" ]]; then
747
748
        TENSORRTLLM_PIP_WHEEL_DIR=${TENSORRTLLM_PIP_WHEEL_DIR:=$DEFAULT_TENSORRTLLM_PIP_WHEEL_DIR}
        echo "TRTLLM pip wheel output directory is: ${TENSORRTLLM_PIP_WHEEL_DIR}"
749
        if [ "$DRY_RUN" != "true" ]; then
750
751
752
753
754
            GIT_URL_ARG=""
            if [ -n "${TRTLLM_GIT_URL}" ]; then
                GIT_URL_ARG="-u ${TRTLLM_GIT_URL}"
            fi
            if ! env -i ${SOURCE_DIR}/build_trtllm_wheel.sh -o ${TENSORRTLLM_PIP_WHEEL_DIR} -c ${TRTLLM_COMMIT} -a ${ARCH} -n ${NIXL_REF} ${GIT_URL_ARG}; then
755
756
                error "ERROR: Failed to build TensorRT-LLM wheel"
            fi
757
758
759
            BUILD_ARGS+=" --build-arg HAS_TRTLLM_CONTEXT=1"
            BUILD_CONTEXT_ARG+=" --build-context trtllm_wheel=${TENSORRTLLM_PIP_WHEEL_DIR}"
            PRINT_TRTLLM_WHEEL_FILE=$(find $TENSORRTLLM_PIP_WHEEL_DIR -name "*.whl" | head -n 1)
760
761
        fi
    else
762
763
764
        echo 'No intention was set. This error should have been detected in "determine_user_intention_trtllm()". Exiting...'
        exit 1
    fi
765

766
767
768
769
770
771
772
773
774
775
776
777
778
    # Need to know the commit of TRTLLM so we can determine the
    # TensorRT installation associated with TRTLLM.
    if [[ -z "$TRTLLM_COMMIT" ]]; then
        # Attempt to default since the commit will work with a hash or a tag/branch
        if [[ ! -z "$TENSORRTLLM_PIP_WHEEL" ]]; then
            TRTLLM_COMMIT=$(echo "${TENSORRTLLM_PIP_WHEEL}" | sed -n 's/.*==\([0-9a-zA-Z\.\-]*\).*/\1/p')
            echo "Attempting to default TRTLLM_COMMIT to \"$TRTLLM_COMMIT\" for installation of TensorRT."
        else
            echo -e "[ERROR] TRTLLM framework was set as a target but the TRTLLM_COMMIT variable was not set."
            echo -e "  Could not find a suitible default by infering from TENSORRTLLM_PIP_WHEEL."
            echo -e "  TRTLLM_COMMIT is needed to install the correct version of TensorRT associated with TensorRT-LLM."
            exit 1
        fi
779
    fi
780
781
782
    BUILD_ARGS+=" --build-arg GITHUB_TRTLLM_COMMIT=${TRTLLM_COMMIT}"


783
784
fi

785
786
# ENABLE_KVBM: Used in base Dockerfile for block-manager feature.
#              Declared but not currently used in Dockerfile.{vllm,trtllm}.
787
# Force KVBM to be enabled for VLLM and TRTLLM frameworks
788
789
if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]]; then
    echo "Forcing enable_kvbm to true in ${FRAMEWORK} image build"
790
791
    ENABLE_KVBM=true
fi
792
# For other frameworks, ENABLE_KVBM defaults to false unless --enable-kvbm flag was provided
793

794
795
if [[ ${ENABLE_KVBM} == "true" ]]; then
    echo "Enabling KVBM in the dynamo image"
796
797
798
    BUILD_ARGS+=" --build-arg ENABLE_KVBM=${ENABLE_KVBM} "
fi

799
800
801
802
803
804
805
806
807
808
809
810
# ENABLE_MEDIA_NIXL: Enable media processing with NIXL support
# Used in base Dockerfile for maturin build feature flag.
# Can be explicitly overridden with --enable-media-nixl flag
if [ -z "${ENABLE_MEDIA_NIXL}" ]; then
    if [[ $FRAMEWORK == "VLLM" ]] || [[ $FRAMEWORK == "TRTLLM" ]] || [[ $FRAMEWORK == "SGLANG" ]]; then
        ENABLE_MEDIA_NIXL=true
    else
        ENABLE_MEDIA_NIXL=false
    fi
fi
BUILD_ARGS+=" --build-arg ENABLE_MEDIA_NIXL=${ENABLE_MEDIA_NIXL} "

811
# NIXL_UCX_REF: Used in dynamo base stages.
812
813
814
815
if [ -n "${NIXL_UCX_REF}" ]; then
    BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} "
fi

816
817
818
819
820
821
# NIXL_GDRCOPY_REF: Used in dynamo base stages.
if [ -n "${NIXL_GDRCOPY_REF}" ]; then
    BUILD_ARGS+=" --build-arg NIXL_GDRCOPY_REF=${NIXL_GDRCOPY_REF} "

fi

822
# MAX_JOBS is only used by Dockerfile.vllm
823
824
825
if [ -n "${MAX_JOBS}" ]; then
    BUILD_ARGS+=" --build-arg MAX_JOBS=${MAX_JOBS} "
fi
826

827
if [[ $FRAMEWORK == "SGLANG" ]]; then
828
    echo "Customizing Python, CUDA, and framework images for sglang images"
829
    BUILD_ARGS+=" --build-arg PYTHON_VERSION=3.10"
830
831
832
833
834
835
    BUILD_ARGS+=" --build-arg CUDA_VERSION=${SGLANG_CUDA_VERSION}"
    # Unlike the other two frameworks, SGLang's framework image is different from the base image, so we need to set it explicitly.
    BUILD_ARGS+=" --build-arg FRAMEWORK_IMAGE=${SGLANG_FRAMEWORK_IMAGE}"
    BUILD_ARGS+=" --build-arg FRAMEWORK_IMAGE_TAG=${SGLANG_FRAMEWORK_IMAGE_TAG}"
else
    BUILD_ARGS+=" --build-arg PYTHON_VERSION=3.12"
836
fi
837
838
839
840
841
# Add sccache build arguments
if [ "$USE_SCCACHE" = true ]; then
    BUILD_ARGS+=" --build-arg USE_SCCACHE=true"
    BUILD_ARGS+=" --build-arg SCCACHE_BUCKET=${SCCACHE_BUCKET}"
    BUILD_ARGS+=" --build-arg SCCACHE_REGION=${SCCACHE_REGION}"
842
843
    BUILD_ARGS+=" --secret id=aws-key-id,env=AWS_ACCESS_KEY_ID"
    BUILD_ARGS+=" --secret id=aws-secret-id,env=AWS_SECRET_ACCESS_KEY"
844
fi
845
846
847
848
if [[ "$PLATFORM" == *"linux/arm64"* && "${FRAMEWORK}" == "SGLANG" ]]; then
    # Add arguments required for sglang blackwell build
    BUILD_ARGS+=" --build-arg GRACE_BLACKWELL=true --build-arg BUILD_TYPE=blackwell_aarch64"
fi
849
850
851
852
853
854
LATEST_TAG=""
if [ -z "${NO_TAG_LATEST}" ]; then
    LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}"
    if [ -n "${TARGET}" ] && [ "${TARGET}" != "local-dev" ]; then
        LATEST_TAG="${LATEST_TAG}-${TARGET}"
    fi
855
fi
856

857
858
show_image_options

859
860
861
862
863
# Always build the main image first
# Create build log directory for BuildKit reports
BUILD_LOG_DIR="${BUILD_CONTEXT}/build-logs"
mkdir -p "${BUILD_LOG_DIR}"
SINGLE_BUILD_LOG="${BUILD_LOG_DIR}/single-stage-build.log"
864

865
866
867
868
869
870
871
# Use BuildKit for enhanced metadata
if docker buildx version &>/dev/null; then
    $RUN_PREFIX docker buildx build --progress=plain --load -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${SINGLE_BUILD_LOG}"
    BUILD_EXIT_CODE=${PIPESTATUS[0]}
else
    $RUN_PREFIX DOCKER_BUILDKIT=1 docker build --progress=plain -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE 2>&1 | tee "${SINGLE_BUILD_LOG}"
    BUILD_EXIT_CODE=${PIPESTATUS[0]}
872
873
fi

874
875
876
if [ ${BUILD_EXIT_CODE} -ne 0 ]; then
    exit ${BUILD_EXIT_CODE}
fi
877

878
879
# Handle local-dev target
if [[ "${LOCAL_DEV_BUILD:-}" == "true" ]]; then
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
    # Use the first tag name (TAG) if available, otherwise use latest
    if [[ -n "$TAG" ]]; then
        DEV_IMAGE=$(echo "$TAG" | sed 's/--tag //' | sed 's/-local-dev$//')
    else
        DEV_IMAGE="dynamo:latest-${FRAMEWORK,,}"
    fi

    # Build local-dev tags from existing tags
    LOCAL_DEV_TAGS=""
    if [[ -n "$TAG" ]]; then
        # Extract tag name, remove any existing -local-dev suffix, then add -local-dev
        TAG_NAME=$(echo "$TAG" | sed 's/--tag //' | sed 's/-local-dev$//')
        LOCAL_DEV_TAGS+=" --tag ${TAG_NAME}-local-dev"
    fi

    if [[ -n "$LATEST_TAG" ]]; then
        # Extract tag name, remove any existing -local-dev suffix, then add -local-dev
        LATEST_TAG_NAME=$(echo "$LATEST_TAG" | sed 's/--tag //' | sed 's/-local-dev$//')
        LOCAL_DEV_TAGS+=" --tag ${LATEST_TAG_NAME}-local-dev"
    fi

901
902
903
    # Extract first tag for success message
    FIRST_TAG=$(echo "$LOCAL_DEV_TAGS" | grep -o -- '--tag [^ ]*' | head -1 | cut -d' ' -f2)
    build_local_dev_with_header "$DEV_IMAGE" "$LOCAL_DEV_TAGS" "Successfully built $FIRST_TAG" "Building Local-Dev Image"
904
fi
905

906

907
{ set +x; } 2>/dev/null