#export HIP_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
#NUM=8
NUM=$(($(rocm-smi |sed -n '/DCU/,/===/ p'|wc -l)-2))
START=0
if [ $# -gt 0 ];then      ##DCU Number
    NUM=$1
fi
if [ $# -gt 1 ];then      ##The First DCU ID
    START=$2
fi
LAST=$((START+NUM-1))
export HIP_VISIBLE_DEVICES=$(seq -s, ${START} ${LAST})
export HSA_FORCE_FINE_GRAIN_PCIE=1
#export ROCBLAS_LAYER=3
#export MIOPEN_ENABLE_LOGGING=1
#export MIOPEN_ENABLE_LOGGING_CMD=1
#export MIOPEN_LOG_LEVEL=6
BS=128
EPOCH_SIZE=$((462413/${BS}))
EPOCH_SIZE=$((426208/${BS}))
#EPOCH_SIZE=1000

horovodrun -np ${NUM} -H localhost:${NUM} python src/train_softmax_horovod.py \
--logs_base_dir ./logs_m/facenet/ \
--models_base_dir ./models_m/facenet/ \
--data_dir /datasets/facenet/casia_maxpy_mtcnnpy_182_clean/ \
--image_size 160 \
--model_def models.inception_resnet_v1 \
--lfw_dir /datasets/facenet/lfw_mtcnnpy_160/ \
--optimizer ADAM \
--learning_rate -1 \
--max_nrof_epochs 100 \
--keep_probability 0.8 \
--random_crop \
--random_flip \
--use_fixed_image_standardization \
--learning_rate_schedule_file data/casia2.txt \
--weight_decay 5e-4 \
--embedding_size 512 \
--lfw_distance_metric 1 \
--lfw_use_flipped_images \
--lfw_subtract_mean \
--validation_set_split_ratio 0.05 \
--validate_every_n_epochs 1 \
--batch_size ${BS} \
--epoch_size ${EPOCH_SIZE} \
--evaluate_every_n_epochs 1 \
--best_evaluate_lag 10 \
--evaluate_start_epoch 0 \
--lfw_accuracy_threshold 0.99 \
--prelogits_norm_loss_factor 5e-4  2>&1 | tee facenet_${NUM}dcu_${BS}_`date +%Y%m%d%H%M%S`.log
