start_trtllm_worker.sh 1.54 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

if [[ -z ${MODEL_PATH} ]]; then
    echo "ERROR: MODEL_PATH was not set."
    echo "ERROR: MODEL_PATH must be set to either the HuggingFace ID or locally " \
         "downloaded path to the model weights. Since Deepseek R1 is large, it is " \
         "recommended to pre-download them to a shared location and provide the path."
    exit 1
fi

13
14
15
16
17
18
if [[ -z ${SERVED_MODEL_NAME} ]]; then
    echo "WARNING: SERVED_MODEL_NAME was not set. It will be derived from MODEL_PATH."
fi



19
20
21
22
23
24
if [[ -z ${ENGINE_CONFIG} ]]; then
    echo "ERROR: ENGINE_CONFIG was not set."
    echo "ERROR: ENGINE_CONFIG must be set to a valid Dynamo+TRTLLM engine config file."
    exit 1
fi

25
26
27
28
29
30
31
# NOTE: When this script is run directly from srun, the environment variables
# for TRTLLM KV cache are not set. So we need to set them here.
# Related issue: https://github.com/ai-dynamo/dynamo/issues/1743
if [[ -z ${TRTLLM_USE_UCX_KVCACHE} ]] && [[ -z ${TRTLLM_USE_NIXL_KVCACHE} ]]; then
    export TRTLLM_USE_UCX_KVCACHE=1
fi

32
33
34
35
36
37
38
39
40
EXTRA_ARGS=""
if [[ -n ${DISAGGREGATION_MODE} ]]; then
  EXTRA_ARGS+="--disaggregation-mode ${DISAGGREGATION_MODE} "
fi

if [[ -n ${DISAGGREGATION_STRATEGY} ]]; then
  EXTRA_ARGS+="--disaggregation-strategy ${DISAGGREGATION_STRATEGY} "
fi

41
trtllm-llmapi-launch \
42
  python3 -m dynamo.trtllm \
43
    --model-path "${MODEL_PATH}" \
44
    --served-model-name "${SERVED_MODEL_NAME}" \
45
    --extra-engine-args "${ENGINE_CONFIG}" \
46
    ${EXTRA_ARGS}