run_llama2_7B.sh 1.14 KB
Newer Older
silencealiang's avatar
silencealiang committed
1
2
3
4
5
6
7
8
for para in $*
do
    if [[ $para == --profiling* ]];then
        profiling=${para#*=}
    fi
done

# Those variables need to modify
9
10
11
GPUS="16"                 # how many gpus to use
HOST="a121"                 # hostname
PORT="11452"                 # port id
wxj's avatar
wxj committed
12
13
14
DATA_PATH="/data/datasets/oscar-1GB_head-llama2_text_document"            # path to oscar-1GB_head-llama2_text_document
TOKENIZER_MODEL_PATH="/data/models/llama2/tokenizer.model" # path to tokenizer.model
CHECKPOINT_PATH="./ckpt"      # path to ckpt
silencealiang's avatar
silencealiang committed
15
16

# Runs Llama2 7B model
wxj's avatar
wxj committed
17
mpirun -np ${GPUS}  --hostfile hostfile \
silencealiang's avatar
silencealiang committed
18
19
20
                    --allow-run-as-root \
                    --bind-to none \
                    --mca plm_rsh_no_tree_spawn 1 \
21
                    --mca plm_rsh_args "-p 11451" \
silencealiang's avatar
silencealiang committed
22
                    bash -c "
23
                    ./train_llama2_7b_2nodes.sh \
silencealiang's avatar
silencealiang committed
24
25
26
27
28
                    ${HOST} \
                    ${PORT} \
                    --data_path=$DATA_PATH \
                    --tokenizer_path=$TOKENIZER_MODEL_PATH \
                    --checkpoint_path=$CHECKPOINT_PATH \
29
                    --profiling=$profiling" > ./logs/log-$((${GPUS} / 8))nodes-`date +%F-%H%M`.log 2>&1
silencealiang's avatar
silencealiang committed
30
31

wait