#!/bin/bash export PYTHONPATH=`pwd`/sugon/imagenet/:$PYTHONPATH export GLOO_SOCKET_IFNAME=ib0,ib1,ib2,ib3 DIST_URL=$1 WORLD_SIZE=$2 RANK=$3 SUBMIT_FILE=$4 SAVE_DIR=$5 echo ++$SUBMIT_FILE++$SAVE_DIR python3 `pwd`/sugon/imagenet/train.py \ -a nasnet \ -g $SUBMIT_FILE \ --auxiliary \ --validate-architecture \ `pwd` if (test $? -ne 0) then echo 'Invalid architecture' exit 1 fi python3 `pwd`/sugon/imagenet/train.py \ --batch-size 512 \ --learning-rate=2.0 \ -a nasnet \ -g $SUBMIT_FILE \ --auxiliary \ -j 36 \ --epochs=250 \ --ip=${DIST_URL} \ --port=34567 \ --dist-backend=gloo \ --world-size=${WORLD_SIZE} \ --rank=${RANK} \ --multiprocessing-distributed \ --resume $SAVE_DIR/checkpoint.pth.tar \ --save $SAVE_DIR \ /path/to/{ImageNet-pytorch-data-dir}