run_gpt_567B_multinodes.sh 370 Bytes
Newer Older
silencealiang's avatar
silencealiang committed
1
2
3
4
5
6
7
for para in $*
do
    if [[ $para == --profiling* ]];then
        profiling=${para#*=}
    fi
done

silencealiang's avatar
silencealiang committed
8
mpirun -np 512 --hostfile hostfile_gpt_567B \
silencealiang's avatar
silencealiang committed
9
10
11
              --allow-run-as-root \
              --bind-to none \
              --mca plm_rsh_no_tree_spawn 1 \
silencealiang's avatar
silencealiang committed
12
              train_gpt_567B_multinodes.sh node002 --profiling=$profiling > output.log 2>&1
silencealiang's avatar
silencealiang committed
13
14
15
16

wait

rm -rf CKPT