feat: Add vars to multi-node trtllm slurm scripts to support xP yD deployments (#2429)

cebe9219 · Ryan McCormick · GitHub · dcfa87be · cebe9219 · cebe9219
Unverified Commit cebe9219 authored Aug 13, 2025 by Ryan McCormick Committed by GitHub Aug 13, 2025
2 changed files
--- a/components/backends/trtllm/multinode/multinode-examples.md
+++ b/components/backends/trtllm/multinode/multinode-examples.md
@@ -186,6 +186,10 @@ deployment across 8 nodes:
 ./srun_disaggregated.sh
 ```
+> [!Tip]
+> To launch multiple replicas of the configured prefill/decode workers, you can set
+> NUM_PREFILL_WORKERS and NUM_DECODE_WORKERS respectively (default: 1).
 ## Understanding the Output
 1. The `srun_aggregated.sh` launches two `srun` jobs. The first launches

--- a/components/backends/trtllm/multinode/srun_disaggregated.sh
+++ b/components/backends/trtllm/multinode/srun_disaggregated.sh
@@ -16,9 +16,11 @@ MOUNTS="${MOUNTS:-${DEFAULT_MOUNT}}"
 NUM_GPUS_PER_NODE=${NUM_GPUS_PER_NODE:-4}
 NUM_PREFILL_NODES=${NUM_PREFILL_NODES:-4}
+NUM_PREFILL_WORKERS=${NUM_PREFILL_WORKERS:-1}
 PREFILL_ENGINE_CONFIG="${PREFILL_ENGINE_CONFIG:-/mnt/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml}"
 NUM_DECODE_NODES=${NUM_DECODE_NODES:-4}
+NUM_DECODE_WORKERS=${NUM_DECODE_WORKERS:-1}
 DECODE_ENGINE_CONFIG="${DECODE_ENGINE_CONFIG:-/mnt/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml}"
 DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"}
@@ -59,38 +61,42 @@ srun \
 # NOTE: Output streamed to stdout for ease of understanding the example, but
 # in practice you would probably set `srun --output ... --error ...` to pipe
 # the stdout/stderr to files.
-echo "Launching multi-node prefill worker in background."
+for ((i=1; i<=${NUM_PREFILL_WORKERS}; i++)); do
-DISAGGREGATION_MODE=prefill \
+  echo "Launching multi-node prefill worker in background."
-ENGINE_CONFIG=${PREFILL_ENGINE_CONFIG} \
+  DISAGGREGATION_MODE=prefill \
-srun \
+  ENGINE_CONFIG=${PREFILL_ENGINE_CONFIG} \
-  --mpi pmix \
+  srun \
-  --oversubscribe \
+    --mpi pmix \
-  --container-image "${IMAGE}" \
+    --oversubscribe \
-  --container-mounts "${MOUNTS}" \
+    --container-image "${IMAGE}" \
-  --container-env ETCD_ENDPOINTS,NATS_SERVER,HEAD_NODE_IP,HEAD_NODE,DISAGGREGATION_MODE,DISAGGREGATION_STRATEGY,ENGINE_CONFIG \
+    --container-mounts "${MOUNTS}" \
-  --verbose \
+    --container-env ETCD_ENDPOINTS,NATS_SERVER,HEAD_NODE_IP,HEAD_NODE,DISAGGREGATION_MODE,DISAGGREGATION_STRATEGY,ENGINE_CONFIG \
-  --label \
+    --verbose \
-  -A "${ACCOUNT}" \
+    --label \
-  -J "${ACCOUNT}-dynamo.trtllm" \
+    -A "${ACCOUNT}" \
-  --nodes "${NUM_PREFILL_NODES}" \
+    -J "${ACCOUNT}-dynamo.trtllm" \
-  --ntasks-per-node "${NUM_GPUS_PER_NODE}" \
+    --nodes "${NUM_PREFILL_NODES}" \
-  --jobid "${SLURM_JOB_ID}" \
+    --ntasks-per-node "${NUM_GPUS_PER_NODE}" \
-  /mnt/multinode/start_trtllm_worker.sh &
+    --jobid "${SLURM_JOB_ID}" \
+    /mnt/multinode/start_trtllm_worker.sh &
+done
-echo "Launching multi-node decode worker in background."
+for ((i=1; i<=${NUM_DECODE_WORKERS}; i++)); do
-DISAGGREGATION_MODE=decode \
+  echo "Launching multi-node decode worker in background."
-ENGINE_CONFIG=${DECODE_ENGINE_CONFIG} \
+  DISAGGREGATION_MODE=decode \
-srun \
+  ENGINE_CONFIG=${DECODE_ENGINE_CONFIG} \
-  --mpi pmix \
+  srun \
-  --oversubscribe \
+    --mpi pmix \
-  --container-image "${IMAGE}" \
+    --oversubscribe \
-  --container-mounts "${MOUNTS}" \
+    --container-image "${IMAGE}" \
-  --container-env ETCD_ENDPOINTS,NATS_SERVER,HEAD_NODE_IP,HEAD_NODE,DISAGGREGATION_MODE,DISAGGREGATION_STRATEGY,ENGINE_CONFIG \
+    --container-mounts "${MOUNTS}" \
-  --verbose \
+    --container-env ETCD_ENDPOINTS,NATS_SERVER,HEAD_NODE_IP,HEAD_NODE,DISAGGREGATION_MODE,DISAGGREGATION_STRATEGY,ENGINE_CONFIG \
-  --label \
+    --verbose \
-  -A "${ACCOUNT}" \
+    --label \
-  -J "${ACCOUNT}-dynamo.trtllm" \
+    -A "${ACCOUNT}" \
-  --nodes "${NUM_DECODE_NODES}" \
+    -J "${ACCOUNT}-dynamo.trtllm" \
-  --ntasks-per-node "${NUM_GPUS_PER_NODE}" \
+    --nodes "${NUM_DECODE_NODES}" \
-  --jobid "${SLURM_JOB_ID}" \
+    --ntasks-per-node "${NUM_GPUS_PER_NODE}" \
-  /mnt/multinode/start_trtllm_worker.sh &
+    --jobid "${SLURM_JOB_ID}" \
\ No newline at end of file
+    /mnt/multinode/start_trtllm_worker.sh &
+done
\ No newline at end of file