Commit 7d462a77 authored by wxj's avatar wxj
Browse files

Update llama_pretraining.sh

parent b44f3138
Pipeline #2038 passed with stage
......@@ -98,11 +98,29 @@ EVAL_AND_LOGGING_ARGS=(
--tensorboard-dir $TENSORBOARD_LOGS_PATH
)
NNODES=1
NODE_RANK=0
MASTER_ADDR=localhost
while [ $# -gt 0 ]
do
case $1 in
--NNODES)
NNODES=$2; shift;;
--NODE_RANK)
NODE_RANK=$2; shift;;
--MASTER_ADDR)
MASTER_ADDR=$2; shift;;
(*)
break;;
esac
shift
done
DISTRIBUTED_ARGS=(
--nproc_per_node 4
--nnodes 1
--node_rank 0
--master_addr localhost
--nproc_per_node 2
--nnodes $NNODES
--node_rank $NODE_RANK
--master_addr $MASTER_ADDR
--master_port 29500
)
export HIP_VISIBLE_DEVICES=0,1,2,3 #4,5,6,7
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment