Unverified Commit 29f5b822 authored by Kris Hung's avatar Kris Hung Committed by GitHub
Browse files

fix: Reduce memory usage to avoid vLLM dsr1 OOM (#3660)

parent 43d687e8
...@@ -101,10 +101,10 @@ for ((i=0; i<GPUS_PER_NODE; i++)); do ...@@ -101,10 +101,10 @@ for ((i=0; i<GPUS_PER_NODE; i++)); do
--data_parallel_size $DATA_PARALLEL_SIZE \ --data_parallel_size $DATA_PARALLEL_SIZE \
--data-parallel-rank $dp_rank \ --data-parallel-rank $dp_rank \
--enable-expert-parallel \ --enable-expert-parallel \
--max-model-len 10240 \ --max-model-len 4096 \
--data-parallel-address $MASTER_ADDR \ --data-parallel-address $MASTER_ADDR \
--data-parallel-rpc-port 13345 \ --data-parallel-rpc-port 13345 \
--gpu-memory-utilization 0.95 \ --gpu-memory-utilization 0.9 \
--enforce-eager 2>&1 | tee $LOG_DIR/dsr1_dep_${dp_rank}.log & --enforce-eager 2>&1 | tee $LOG_DIR/dsr1_dep_${dp_rank}.log &
done done
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment