run.sh 7.74 KB
Newer Older
Carsten Csiky's avatar
Carsten Csiky committed
1
#!/usr/bin/env bash
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Carsten Csiky's avatar
Carsten Csiky committed
17
18
set -e

19
20
21
22
23
24
25
26
RUN_PREFIX=

# Frameworks
#
# Each framework has a corresponding base image.  Additional
# dependencies are specified in the /container/deps folder and
# installed within framework specific sections of the Dockerfile.

27
declare -A FRAMEWORKS=(["VLLM"]=1 ["TENSORRTLLM"]=2 ["SGLANG"]=3 ["VLLM_V1"]=4)
28
DEFAULT_FRAMEWORK=VLLM
29
30
31
32
33
34
35
36
37
38
39
40
41

SOURCE_DIR=$(dirname "$(readlink -f "$0")")

IMAGE=
HF_CACHE=
DEFAULT_HF_CACHE=${SOURCE_DIR}/.cache/huggingface
GPUS="all"
PRIVILEGED=
VOLUME_MOUNTS=
MOUNT_WORKSPACE=
ENVIRONMENT_VARIABLES=
REMAINING_ARGS=
INTERACTIVE=
42
USE_NIXL_GDS=
43
44
RUNTIME=nvidia
WORKDIR=/workspace
45
46
47
48
49
50
51
52
53
54
55
56
57

get_options() {
    while :; do
        case $1 in
        -h | -\? | --help)
            show_help
            exit
            ;;
	--framework)
            if [ "$2" ]; then
                FRAMEWORK=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
58
		missing_requirement "$1"
59
60
61
62
63
64
65
            fi
            ;;
        --image)
            if [ "$2" ]; then
                IMAGE=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
66
		missing_requirement "$1"
67
68
            fi
            ;;
69
70
71
72
73
        --target)
            if [ "$2" ]; then
                TARGET=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
74
                missing_requirement "$1"
75
76
            fi
            ;;
77
78
79
80
81
	--name)
            if [ "$2" ]; then
                NAME=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
82
		missing_requirement "$1"
83
84
85
86
87
88
89
            fi
            ;;
	--hf-cache)
            if [ "$2" ]; then
                HF_CACHE=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
90
		missing_requirement "$1"
91
92
93
94
95
96
97
98
            fi
            ;;

	--gpus)
            if [ "$2" ]; then
                GPUS=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
99
		missing_requirement "$1"
100
101
            fi
            ;;
102
103
104
105
106
107
108
109
	--runtime)
            if [ "$2" ]; then
                RUNTIME=$2
                shift
            else
		missing_requirement "$1"
            fi
            ;;
110
	--entrypoint)
111
            if [ "$2" ]; then
112
                ENTRYPOINT=$2
113
114
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
115
		missing_requirement "$1"
116
117
            fi
            ;;
118
119
120
121
122
123
124
125
	--workdir)
	    if [ "$2" ]; then
	        WORKDIR="$2"
	        shift
	    else
	        missing_requirement "$1"
	    fi
	    ;;
126
127
128
129
130
	--privileged)
            if [ "$2" ]; then
                PRIVILEGED=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
131
		missing_requirement "$1"
132
133
            fi
            ;;
134
135
136
137
138
	--rm)
            if [ "$2" ]; then
                RM=$2
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
139
		missing_requirement "$1"
140
141
            fi
            ;;
142
143
144
145
146
	-v)
            if [ "$2" ]; then
                VOLUME_MOUNTS+=" -v $2 "
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
147
		missing_requirement "$1"
148
149
150
151
152
153
154
            fi
            ;;
	-e)
            if [ "$2" ]; then
                ENVIRONMENT_VARIABLES+=" -e $2 "
                shift
            else
Carsten Csiky's avatar
Carsten Csiky committed
155
		missing_requirement "$1"
156
157
158
159
160
161
162
163
            fi
            ;;
	-it)
	    INTERACTIVE=" -it "
	    ;;
	--mount-workspace)
	    MOUNT_WORKSPACE=TRUE
	    ;;
164
165
166
        --use-nixl-gds)
            USE_NIXL_GDS=TRUE
            ;;
167
168
169
170
171
172
173
174
175
176
177
178
179
        --dry-run)
            RUN_PREFIX="echo"
            echo ""
            echo "=============================="
            echo "DRY RUN: COMMANDS PRINTED ONLY"
            echo "=============================="
            echo ""
            ;;
        --)
            shift
            break
            ;;
         -?*)
Carsten Csiky's avatar
Carsten Csiky committed
180
	    error 'ERROR: Unknown option: ' "$1"
181
182
            ;;
	 ?*)
Carsten Csiky's avatar
Carsten Csiky committed
183
	    error 'ERROR: Unknown option: ' "$1"
184
185
186
187
188
189
190
191
192
193
194
195
196
            ;;
        *)
            break
            ;;
        esac

        shift
    done

    if [ -z "$FRAMEWORK" ]; then
	FRAMEWORK=$DEFAULT_FRAMEWORK
    fi

Carsten Csiky's avatar
Carsten Csiky committed
197
    if [ -n "$FRAMEWORK" ]; then
198
	FRAMEWORK=${FRAMEWORK^^}
199
	if [[ -z "${FRAMEWORKS[$FRAMEWORK]}" ]]; then
Carsten Csiky's avatar
Carsten Csiky committed
200
	    error 'ERROR: Unknown framework: ' "$FRAMEWORK"
201
202
203
204
	fi
    fi

    if [ -z "$IMAGE" ]; then
205
        IMAGE="dynamo:latest-${FRAMEWORK,,}"
Carsten Csiky's avatar
Carsten Csiky committed
206
        if [ -n "${TARGET}" ]; then
207
208
            IMAGE="${IMAGE}-${TARGET}"
        fi
209
210
211
212
213
214
215
216
217
218
219
220
221
222
    fi

    if [[ ${GPUS^^} == "NONE" ]]; then
	GPU_STRING=""
    else
	GPU_STRING="--gpus ${GPUS}"
    fi

    if [[ ${NAME^^} == "" ]]; then
	NAME_STRING=""
    else
	NAME_STRING="--name ${NAME}"
    fi

223
224
225
226
227
228
    if [[ ${ENTRYPOINT^^} == "" ]]; then
	ENTRYPOINT_STRING=""
    else
	ENTRYPOINT_STRING="--entrypoint ${ENTRYPOINT}"
    fi

Carsten Csiky's avatar
Carsten Csiky committed
229
    if [ -n "$MOUNT_WORKSPACE" ]; then
230
231
232
233
234
235
236
237
	VOLUME_MOUNTS+=" -v ${SOURCE_DIR}/..:/workspace "
	VOLUME_MOUNTS+=" -v /tmp:/tmp "
	VOLUME_MOUNTS+=" -v /mnt/:/mnt "

	if [ -z "$HF_CACHE" ]; then
	    HF_CACHE=$DEFAULT_HF_CACHE
	fi

Carsten Csiky's avatar
Carsten Csiky committed
238
	if [ -z "${PRIVILEGED}" ]; then
239
240
241
242
243
244
245
246
247
248
249
250
	    PRIVILEGED="TRUE"
	fi

	ENVIRONMENT_VARIABLES+=" -e HF_TOKEN"

	INTERACTIVE=" -it "
    fi

    if [[ ${HF_CACHE^^} == "NONE" ]]; then
	HF_CACHE=
    fi

Carsten Csiky's avatar
Carsten Csiky committed
251
252
    if [ -n "$HF_CACHE" ]; then
	mkdir -p "$HF_CACHE"
253
254
255
	VOLUME_MOUNTS+=" -v $HF_CACHE:/root/.cache/huggingface"
    fi

Carsten Csiky's avatar
Carsten Csiky committed
256
    if [ -z "${PRIVILEGED}" ]; then
257
258
259
	PRIVILEGED="FALSE"
    fi

Carsten Csiky's avatar
Carsten Csiky committed
260
    if [ -z "${RM}" ]; then
261
262
263
	RM="TRUE"
    fi

264
265
266
267
268
269
    if [[ ${PRIVILEGED^^} == "FALSE" ]]; then
	PRIVILEGED_STRING=""
    else
	PRIVILEGED_STRING="--privileged"
    fi

270
271
272
273
274
275
    if [[ ${RM^^} == "FALSE" ]]; then
	RM_STRING=""
    else
	RM_STRING=" --rm "
    fi

276
277
278
279
280
281
    if [ -n "$USE_NIXL_GDS" ]; then
        VOLUME_MOUNTS+=" -v /run/udev:/run/udev:ro "
        NIXL_GDS_CAPS="--cap-add=IPC_LOCK"
    else
        NIXL_GDS_CAPS=""
    fi
282
283
284
    if [[ "$GPUS" == "none" || "$GPUS" == "NONE" ]]; then
    	RUNTIME=""
    fi
285
286
287
288
289
290
    REMAINING_ARGS=("$@")
}

show_help() {
    echo "usage: run.sh"
    echo "  [--image image]"
Carsten Csiky's avatar
Carsten Csiky committed
291
    echo "  [--framework framework one of ${!FRAMEWORKS[*]}]"
292
293
294
295
296
    echo "  [--name name for launched container, default NONE] "
    echo "  [--privileged whether to launch in privileged mode, default FALSE unless mounting workspace]"
    echo "  [--dry-run print docker commands without running]"
    echo "  [--hf-cache directory to volume mount as the hf cache, default is NONE unless mounting workspace]"
    echo "  [--gpus gpus to enable, default is 'all', 'none' disables gpu support]"
297
    echo "  [--use-nixl-gds add volume mounts and capabilities needed for NVIDIA GPUDirect Storage]"
298
299
300
301
    echo "  [-v add volume mount]"
    echo "  [-e add environment variable]"
    echo "  [--mount-workspace set up for local development]"
    echo "  [-- stop processing and pass remaining args as command to docker run]"
302
303
    echo "  [--workdir set the working directory inside the container]"
    echo "  [--runtime add runtime variables]"
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
    exit 0
}

missing_requirement() {
    error "ERROR: $1 requires an argument."
}

error() {
    printf '%s %s\n' "$1" "$2" >&2
    exit 1
}

get_options "$@"

# RUN the image

if [ -z "$RUN_PREFIX" ]; then
    set -x
fi

324
325
326
327
328
${RUN_PREFIX} docker run \
    ${GPU_STRING} \
    ${INTERACTIVE} \
    ${RM_STRING} \
    --network host \
329
    ${RUNTIME:+--runtime "$RUNTIME"} \
330
331
332
333
334
335
    --shm-size=10G \
    --ulimit memlock=-1 \
    --ulimit stack=67108864 \
    --ulimit nofile=65536:65536 \
    ${ENVIRONMENT_VARIABLES} \
    ${VOLUME_MOUNTS} \
336
    -w "$WORKDIR" \
337
    --cap-add CAP_SYS_PTRACE \
338
    ${NIXL_GDS_CAPS} \
339
340
341
342
343
344
    --ipc host \
    ${PRIVILEGED_STRING} \
    ${NAME_STRING} \
    ${ENTRYPOINT_STRING} \
    ${IMAGE} \
    "${REMAINING_ARGS[@]}"
345
346

{ set +x; } 2>/dev/null