run_gpt_567B_multinodes.sh 814 Bytes
Newer Older
1
2
3
4
5
6
7
for para in $*
do
    if [[ $para == --profiling* ]];then
        profiling=${para#*=}
    fi
done

8
9
10
11
12
13
14
15
# Runs GPT 567B model
source /opt/dtk/env.sh
HOST=""  # modify this variable
PORT=25900
DATA_PATH="path to redpajama_text_document"
TOKENIZER_MODEL_PATH="path to tokenizer.model"
CHECKPOINT_PATH="path to ckpt" 

16

17
18
19
20
21
22
23
24
25
26
27
mpirun -np 1024 --hostfile hostfile_gpt_567B \
                --allow-run-as-root \
                --bind-to none \
                --mca plm_rsh_no_tree_spawn 1 \
                train_gpt_567B_multinodes.sh \
                ${HOST} \
                ${PORT} \
                --data_path=$DATA_PATH \
                --tokenizer_path=$TOKENIZER_MODEL_PATH \
                --checkpoint_path=$CHECKPOINT_PATH \
                --profiling=$profiling > log-1024nodes-`date +%F-%H%M`.log 2>&1
28

29
wait