run_gpt_567B_multinodes.sh 436 Bytes
Newer Older
1
2
3
4
5
6
7
for para in $*
do
    if [[ $para == --profiling* ]];then
        profiling=${para#*=}
    fi
done

silencealiang's avatar
silencealiang committed
8
mpirun -np 1024 --hostfile hostfile_gpt_567B \
9
10
11
              --allow-run-as-root \
              --bind-to none \
              --mca plm_rsh_no_tree_spawn 1 \
silencealiang's avatar
silencealiang committed
12
              train_gpt_567B_multinodes.sh node059 --profiling=$profiling > log-1024nodes-`date +%F-%H%M`.log 2>&1
13
14
15
16
17

wait

rm -rf CKPT
rm -rf gpt_dataset/redpajama_text_document