Unverified Commit da783ce0 authored by KrishnanPrash's avatar KrishnanPrash Committed by GitHub
Browse files

fix: prevent DYN_SYSTEM_PORT collisions in TRT-LLM EPD launch scripts (#7366)


Signed-off-by: default avatarKrishnan Prashanth <kprashanth@nvidia.com>
parent 39310c5a
......@@ -27,6 +27,10 @@ export MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-50}
# Extra arguments forwarded to the PD worker (e.g. --multimodal-embedding-cache-capacity-gb 10)
EXTRA_PD_ARGS=("$@")
# Prevent port collisions: the test framework exports DYN_SYSTEM_PORT which all
# child processes would inherit. Unset it so only workers that need it set their own.
unset DYN_SYSTEM_PORT
HTTP_PORT="${DYN_HTTP_PORT:-8000}"
print_launch_banner --multimodal "Launching Multimodal E/PD" "$MODEL_PATH" "$HTTP_PORT"
......@@ -34,7 +38,8 @@ print_launch_banner --multimodal "Launching Multimodal E/PD" "$MODEL_PATH" "$HTT
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
python3 -m dynamo.frontend &
# run encode worker (vision encoder on GPU 0)
# run encode worker (vision encoder on GPU 0) with metrics on port 8081
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT1:-8081} \
CUDA_VISIBLE_DEVICES=$ENCODE_CUDA_VISIBLE_DEVICES python3 -m dynamo.trtllm \
--model-path "$MODEL_PATH" \
--served-model-name "$SERVED_MODEL_NAME" \
......@@ -44,7 +49,8 @@ CUDA_VISIBLE_DEVICES=$ENCODE_CUDA_VISIBLE_DEVICES python3 -m dynamo.trtllm \
--max-file-size-mb "$MAX_FILE_SIZE_MB" \
--disaggregation-mode encode &
# run PD worker 1 (GPU 0)
# run PD worker 1 (GPU 0) with metrics on port 8082
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT2:-8082} \
CUDA_VISIBLE_DEVICES=0 python3 -m dynamo.trtllm \
--model-path "$MODEL_PATH" \
--served-model-name "$SERVED_MODEL_NAME" \
......
......@@ -23,6 +23,10 @@ export MODALITY=${MODALITY:-"multimodal"}
export ALLOWED_LOCAL_MEDIA_PATH=${ALLOWED_LOCAL_MEDIA_PATH:-"/tmp"}
export MAX_FILE_SIZE_MB=${MAX_FILE_SIZE_MB:-50}
# Prevent port collisions: the test framework exports DYN_SYSTEM_PORT which all
# child processes would inherit. Unset it so only workers that need it set their own.
unset DYN_SYSTEM_PORT
HTTP_PORT="${DYN_HTTP_PORT:-8000}"
print_launch_banner --multimodal "Launching Multimodal E/P/D" "$MODEL_PATH" "$HTTP_PORT"
......@@ -30,7 +34,8 @@ print_launch_banner --multimodal "Launching Multimodal E/P/D" "$MODEL_PATH" "$HT
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
python3 -m dynamo.frontend &
# run encode worker
# run encode worker with metrics on port 8081
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT1:-8081} \
CUDA_VISIBLE_DEVICES=$ENCODE_CUDA_VISIBLE_DEVICES python3 -m dynamo.trtllm \
--model-path "$MODEL_PATH" \
--served-model-name "$SERVED_MODEL_NAME" \
......@@ -40,7 +45,8 @@ CUDA_VISIBLE_DEVICES=$ENCODE_CUDA_VISIBLE_DEVICES python3 -m dynamo.trtllm \
--max-file-size-mb "$MAX_FILE_SIZE_MB" \
--disaggregation-mode encode &
# run prefill worker
# run prefill worker with metrics on port 8082
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT2:-8082} \
CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 -m dynamo.trtllm \
--model-path "$MODEL_PATH" \
--served-model-name "$SERVED_MODEL_NAME" \
......@@ -49,7 +55,8 @@ CUDA_VISIBLE_DEVICES=$PREFILL_CUDA_VISIBLE_DEVICES python3 -m dynamo.trtllm \
--disaggregation-mode prefill \
--encode-endpoint "$ENCODE_ENDPOINT" &
# run decode worker
# run decode worker with metrics on port 8083
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT3:-8083} \
CUDA_VISIBLE_DEVICES=$DECODE_CUDA_VISIBLE_DEVICES python3 -m dynamo.trtllm \
--model-path "$MODEL_PATH" \
--served-model-name "$SERVED_MODEL_NAME" \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment