#!/bin/bash #for llama2-7b pretrain test #mpirun -np 8 --allow-run-as-root ./llama2_7b.sh localhost . >& llama2_7b.log & #for llama2-13b pretrain test #mpirun -np 8 --allow-run-as-root ./llama2_13b.sh localhost . >& llama2_13b.log & /opt/mpi/bin/mpirun \ --allow-run-as-root \ --hostfile hostfile \ --map-by node \ -mca plm_rsh_args "-p 3344" \ bash .tmp #how to calculate tgs(tokens/s/gpu) #TGS = sequence_length * global_batchsize / elapsed_time_per_iteration(s) / total_gpu_cards