#!/bin/bash
export PYTHONPATH=`pwd`/sugon/imagenet/:$PYTHONPATH
export GLOO_SOCKET_IFNAME=ib0,ib1,ib2,ib3

DIST_URL=$1
WORLD_SIZE=$2
RANK=$3
SUBMIT_FILE=$4
SAVE_DIR=$5

echo ++$SUBMIT_FILE++$SAVE_DIR

python3 `pwd`/sugon/imagenet/train.py \
	-a nasnet \
	-g $SUBMIT_FILE \
	--auxiliary \
        --validate-architecture \
	`pwd`

if (test $? -ne 0)
then
	echo 'Invalid architecture'
	exit 1
fi

python3 `pwd`/sugon/imagenet/train.py \
	--batch-size 512 \
	--learning-rate=2.0 \
	-a nasnet \
	-g $SUBMIT_FILE \
        --auxiliary \
	-j 36 \
	--epochs=250 \
        --ip=${DIST_URL} \
        --port=34567 \
	--dist-backend=gloo \
	--world-size=${WORLD_SIZE} \
	--rank=${RANK} \
	--multiprocessing-distributed \
	--resume $SAVE_DIR/checkpoint.pth.tar \
	--save $SAVE_DIR \
        /path/to/{ImageNet-pytorch-data-dir}

