#!/bin/bash

# runs benchmark and reports time to convergence
# to use the script:
#   run_and_time.sh

set -e

lrank=$OMPI_COMM_WORLD_LOCAL_RANK
comm_rank=$OMPI_COMM_WORLD_RANK
comm_size=$OMPI_COMM_WORLD_SIZE

export HSA_FORCE_FINE_GRAIN_PCIE=1
export MIOPEN_FIND_MODE=5

source config_DGXA100.sh

# run benchmark
set -x

APP="python3 tools/train_mlperf.py ${EXTRA_PARAMS} --local_rank $comm_rank --world_size $comm_size --config-file configs/e2e_mask_rcnn_R_50_FPN_1x.yaml DTYPE 'float16' PATHS_CATALOG 'maskrcnn_benchmark.bak/config/paths_catalog_dbcluster.py' MODEL.WEIGHT './R-50.pkl' DISABLE_REDUCED_LOGGING True ${EXTRA_CONFIG}"

case ${lrank} in
[0])
  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
  echo $APP
  numactl --cpunodebind=0 --membind=0 ${APP}
  ;;
[1])
  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
  numactl --cpunodebind=1 --membind=1 ${APP}
  ;;
[2])
  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
  numactl --cpunodebind=2 --membind=2 ${APP}
  ;;
[3])
  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
  numactl --cpunodebind=3 --membind=3 ${APP}
  ;;
[4])
  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
  numactl --cpunodebind=4 --membind=4 ${APP}
  ;;
[5])
  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
  numactl --cpunodebind=5 --membind=5 ${APP}
  ;;
[6])
  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
  numactl --cpunodebind=6 --membind=6 ${APP}
  ;;
[7])
  export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
  numactl --cpunodebind=7 --membind=7 ${APP}
  ;;
esac


