"web/vscode:/vscode.git/clone" did not exist on "6aae1f497f680355b0e51242c4195cf75803056d"
evaluate.sh 818 Bytes
Newer Older
huchen's avatar
huchen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/bin/bash
export PYTHONPATH=`pwd`/sugon/imagenet/:$PYTHONPATH
export GLOO_SOCKET_IFNAME=ib0,ib1,ib2,ib3

DIST_URL=$1
WORLD_SIZE=$2
RANK=$3
SUBMIT_FILE=$4
SAVE_DIR=$5

echo ++$SUBMIT_FILE++$SAVE_DIR

python3 `pwd`/sugon/imagenet/train.py \
	-a nasnet \
	-g $SUBMIT_FILE \
	--auxiliary \
        --validate-architecture \
	`pwd`

if (test $? -ne 0)
then
	echo 'Invalid architecture'
	exit 1
fi

python3 `pwd`/sugon/imagenet/train.py \
	--batch-size 512 \
	--learning-rate=2.0 \
	-a nasnet \
	-g $SUBMIT_FILE \
        --auxiliary \
	-j 36 \
	--epochs=250 \
        --ip=${DIST_URL} \
        --port=34567 \
	--dist-backend=gloo \
	--world-size=${WORLD_SIZE} \
	--rank=${RANK} \
	--multiprocessing-distributed \
	--resume $SAVE_DIR/checkpoint.pth.tar \
	--save $SAVE_DIR \
        /path/to/{ImageNet-pytorch-data-dir}