Unverified Commit 31fe8423 authored by HELSON's avatar HELSON Committed by GitHub
Browse files

[example] fix benchmark.sh for gpt example (#2229)

parent 78483a9f
for MODEL_NAME in "GPT2small" for MODEL_TYPE in "gpt2_medium"
do do
for BATCH_SIZE in 8 for BATCH_SIZE in 16
do do
for GPUNUM in 1 2 4 8 for GPUNUM in 1 2 4 8
do do
...@@ -11,8 +11,8 @@ then ...@@ -11,8 +11,8 @@ then
continue continue
fi fi
echo "****************** Begin ***************************" echo "****************** Begin ***************************"
echo "* benchmrking MODEL_NAME ${MODEL_NAME} BS ${BATCH_SIZE} BS ${BS} GPUNUM ${GPUNUM} TPDEGREE ${TPDEGREE}" echo "* benchmrking MODEL_TYPE ${MODEL_TYPE} BS ${BATCH_SIZE} BS ${BS} GPUNUM ${GPUNUM} TPDEGREE ${TPDEGREE}"
bash ./run.sh MODEL_TYPE=${MODEL_TYPE} BATCH_SIZE=${BATCH_SIZE} GPUNUM=${GPUNUM} TPDEGREE=${TPDEGREE} bash ./run.sh
echo "****************** Finished ***************************" echo "****************** Finished ***************************"
echo "" echo ""
echo "" echo ""
......
# distplan in ["colossalai", "zero1", "zero2", "torch_ddp", "torch_zero"] # distplan in ["colossalai", "zero1", "zero2", "torch_ddp", "torch_zero"]
export DISTPAN={$DISTPAN:-"colossalai"} export DISTPAN=${DISTPAN:-"colossalai"}
# The following options only valid when DISTPAN="colossalai" # The following options only valid when DISTPAN="colossalai"
export TPDEGREE=${TPDEGREE:-1}
export GPUNUM=${GPUNUM:-1} export GPUNUM=${GPUNUM:-1}
export PLACEMENT=${PLACEMENT:'const'} export TPDEGREE=${TPDEGREE:-1}
export USE_SHARD_INIT=${USE_SHARD_INIT:False} export PLACEMENT=${PLACEMENT:-"const"}
export BATCH_SIZE=${BATCH_SIZE:-8} export USE_SHARD_INIT=${USE_SHARD_INIT:-False}
export MODEL_TYPE=${MODEL_TYPE:"gpt2_medium"} export BATCH_SIZE=${BATCH_SIZE:-16}
export MODEL_TYPE=${MODEL_TYPE:-"gpt2_medium"}
mkdir -p logs mkdir -p logs
torchrun --standalone --nproc_per_node=${GPUNUM} train_gpt_demo.py --tp_degree=${TPDEGREE} --model_type=${MODEL_TYPE} --batch_size=${BATCH_SIZE} --placement ${PLACEMENT} --shardinit ${USE_SHARD_INIT} --distplan ${DISTPAN} 2>&1 | tee ./logs/${MODEL_TYPE}_${DISTPAN}_gpu_${GPUNUM}_bs_${BATCH_SIZE}_tp_${TPDEGREE}.log torchrun --standalone --nproc_per_node=${GPUNUM} train_gpt_demo.py --tp_degree=${TPDEGREE} --model_type=${MODEL_TYPE} --batch_size=${BATCH_SIZE} --placement ${PLACEMENT} --shardinit ${USE_SHARD_INIT} --distplan ${DISTPAN} 2>&1 | tee ./logs/${MODEL_TYPE}_${DISTPAN}_gpu_${GPUNUM}_bs_${BATCH_SIZE}_tp_${TPDEGREE}.log
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment