Unverified Commit cebe9219 authored by Ryan McCormick's avatar Ryan McCormick Committed by GitHub
Browse files

feat: Add vars to multi-node trtllm slurm scripts to support xP yD deployments (#2429)

parent dcfa87be
...@@ -186,6 +186,10 @@ deployment across 8 nodes: ...@@ -186,6 +186,10 @@ deployment across 8 nodes:
./srun_disaggregated.sh ./srun_disaggregated.sh
``` ```
> [!Tip]
> To launch multiple replicas of the configured prefill/decode workers, you can set
> NUM_PREFILL_WORKERS and NUM_DECODE_WORKERS respectively (default: 1).
## Understanding the Output ## Understanding the Output
1. The `srun_aggregated.sh` launches two `srun` jobs. The first launches 1. The `srun_aggregated.sh` launches two `srun` jobs. The first launches
......
...@@ -16,9 +16,11 @@ MOUNTS="${MOUNTS:-${DEFAULT_MOUNT}}" ...@@ -16,9 +16,11 @@ MOUNTS="${MOUNTS:-${DEFAULT_MOUNT}}"
NUM_GPUS_PER_NODE=${NUM_GPUS_PER_NODE:-4} NUM_GPUS_PER_NODE=${NUM_GPUS_PER_NODE:-4}
NUM_PREFILL_NODES=${NUM_PREFILL_NODES:-4} NUM_PREFILL_NODES=${NUM_PREFILL_NODES:-4}
NUM_PREFILL_WORKERS=${NUM_PREFILL_WORKERS:-1}
PREFILL_ENGINE_CONFIG="${PREFILL_ENGINE_CONFIG:-/mnt/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml}" PREFILL_ENGINE_CONFIG="${PREFILL_ENGINE_CONFIG:-/mnt/engine_configs/deepseek_r1/wide_ep/wide_ep_prefill.yaml}"
NUM_DECODE_NODES=${NUM_DECODE_NODES:-4} NUM_DECODE_NODES=${NUM_DECODE_NODES:-4}
NUM_DECODE_WORKERS=${NUM_DECODE_WORKERS:-1}
DECODE_ENGINE_CONFIG="${DECODE_ENGINE_CONFIG:-/mnt/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml}" DECODE_ENGINE_CONFIG="${DECODE_ENGINE_CONFIG:-/mnt/engine_configs/deepseek_r1/wide_ep/wide_ep_decode.yaml}"
DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"} DISAGGREGATION_STRATEGY=${DISAGGREGATION_STRATEGY:-"decode_first"}
...@@ -59,38 +61,42 @@ srun \ ...@@ -59,38 +61,42 @@ srun \
# NOTE: Output streamed to stdout for ease of understanding the example, but # NOTE: Output streamed to stdout for ease of understanding the example, but
# in practice you would probably set `srun --output ... --error ...` to pipe # in practice you would probably set `srun --output ... --error ...` to pipe
# the stdout/stderr to files. # the stdout/stderr to files.
echo "Launching multi-node prefill worker in background." for ((i=1; i<=${NUM_PREFILL_WORKERS}; i++)); do
DISAGGREGATION_MODE=prefill \ echo "Launching multi-node prefill worker in background."
ENGINE_CONFIG=${PREFILL_ENGINE_CONFIG} \ DISAGGREGATION_MODE=prefill \
srun \ ENGINE_CONFIG=${PREFILL_ENGINE_CONFIG} \
--mpi pmix \ srun \
--oversubscribe \ --mpi pmix \
--container-image "${IMAGE}" \ --oversubscribe \
--container-mounts "${MOUNTS}" \ --container-image "${IMAGE}" \
--container-env ETCD_ENDPOINTS,NATS_SERVER,HEAD_NODE_IP,HEAD_NODE,DISAGGREGATION_MODE,DISAGGREGATION_STRATEGY,ENGINE_CONFIG \ --container-mounts "${MOUNTS}" \
--verbose \ --container-env ETCD_ENDPOINTS,NATS_SERVER,HEAD_NODE_IP,HEAD_NODE,DISAGGREGATION_MODE,DISAGGREGATION_STRATEGY,ENGINE_CONFIG \
--label \ --verbose \
-A "${ACCOUNT}" \ --label \
-J "${ACCOUNT}-dynamo.trtllm" \ -A "${ACCOUNT}" \
--nodes "${NUM_PREFILL_NODES}" \ -J "${ACCOUNT}-dynamo.trtllm" \
--ntasks-per-node "${NUM_GPUS_PER_NODE}" \ --nodes "${NUM_PREFILL_NODES}" \
--jobid "${SLURM_JOB_ID}" \ --ntasks-per-node "${NUM_GPUS_PER_NODE}" \
/mnt/multinode/start_trtllm_worker.sh & --jobid "${SLURM_JOB_ID}" \
/mnt/multinode/start_trtllm_worker.sh &
done
echo "Launching multi-node decode worker in background." for ((i=1; i<=${NUM_DECODE_WORKERS}; i++)); do
DISAGGREGATION_MODE=decode \ echo "Launching multi-node decode worker in background."
ENGINE_CONFIG=${DECODE_ENGINE_CONFIG} \ DISAGGREGATION_MODE=decode \
srun \ ENGINE_CONFIG=${DECODE_ENGINE_CONFIG} \
--mpi pmix \ srun \
--oversubscribe \ --mpi pmix \
--container-image "${IMAGE}" \ --oversubscribe \
--container-mounts "${MOUNTS}" \ --container-image "${IMAGE}" \
--container-env ETCD_ENDPOINTS,NATS_SERVER,HEAD_NODE_IP,HEAD_NODE,DISAGGREGATION_MODE,DISAGGREGATION_STRATEGY,ENGINE_CONFIG \ --container-mounts "${MOUNTS}" \
--verbose \ --container-env ETCD_ENDPOINTS,NATS_SERVER,HEAD_NODE_IP,HEAD_NODE,DISAGGREGATION_MODE,DISAGGREGATION_STRATEGY,ENGINE_CONFIG \
--label \ --verbose \
-A "${ACCOUNT}" \ --label \
-J "${ACCOUNT}-dynamo.trtllm" \ -A "${ACCOUNT}" \
--nodes "${NUM_DECODE_NODES}" \ -J "${ACCOUNT}-dynamo.trtllm" \
--ntasks-per-node "${NUM_GPUS_PER_NODE}" \ --nodes "${NUM_DECODE_NODES}" \
--jobid "${SLURM_JOB_ID}" \ --ntasks-per-node "${NUM_GPUS_PER_NODE}" \
/mnt/multinode/start_trtllm_worker.sh & --jobid "${SLURM_JOB_ID}" \
\ No newline at end of file /mnt/multinode/start_trtllm_worker.sh &
done
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment