#!/bin/bash

#for llama2-7b pretrain test
#mpirun -np 8 --allow-run-as-root ./llama2_7b.sh localhost . >& llama2_7b.log &

#for llama2-13b pretrain test
#mpirun -np 8 --allow-run-as-root ./llama2_13b.sh localhost . >& llama2_13b.log &

/opt/mpi/bin/mpirun \
	--allow-run-as-root \
	--hostfile hostfile \
	--map-by node \
	-mca plm_rsh_args "-p 3344" \
        bash .tmp
#how to calculate tgs(tokens/s/gpu)
#TGS = sequence_length * global_batchsize / elapsed_time_per_iteration(s) / total_gpu_cards