Commit 7d462a77 authored by wxj's avatar wxj
Browse files

Update llama_pretraining.sh

parent b44f3138
Pipeline #2038 passed with stage
...@@ -98,11 +98,29 @@ EVAL_AND_LOGGING_ARGS=( ...@@ -98,11 +98,29 @@ EVAL_AND_LOGGING_ARGS=(
--tensorboard-dir $TENSORBOARD_LOGS_PATH --tensorboard-dir $TENSORBOARD_LOGS_PATH
) )
NNODES=1
NODE_RANK=0
MASTER_ADDR=localhost
while [ $# -gt 0 ]
do
case $1 in
--NNODES)
NNODES=$2; shift;;
--NODE_RANK)
NODE_RANK=$2; shift;;
--MASTER_ADDR)
MASTER_ADDR=$2; shift;;
(*)
break;;
esac
shift
done
DISTRIBUTED_ARGS=( DISTRIBUTED_ARGS=(
--nproc_per_node 4 --nproc_per_node 2
--nnodes 1 --nnodes $NNODES
--node_rank 0 --node_rank $NODE_RANK
--master_addr localhost --master_addr $MASTER_ADDR
--master_port 29500 --master_port 29500
) )
export HIP_VISIBLE_DEVICES=0,1,2,3 #4,5,6,7 export HIP_VISIBLE_DEVICES=0,1,2,3 #4,5,6,7
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment