run_rocm_distributed.sh 712 Bytes
Newer Older
rohithkrn's avatar
rohithkrn committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/bin/bash
set -e

# To run the test on 2 gpus
export WORLD_SIZE=2

# Test with opt_level="O2"
echo "running opt_level O2"
python3.6 -m torch.distributed.launch --nproc_per_node=2 amp_master_params.py --opt_level "O2"
python3.6 compare.py

# delete the model files
echo -e "O2 test completed. Deleting model files\n"
rm rank0model.pth
rm rank1model.pth
rm rank0master.pth
rm rank1master.pth


# Test with opt_level="O5"
echo "running opt_level O5"
python3.6 -m torch.distributed.launch --nproc_per_node=2 amp_master_params.py --opt_level "O5"
python3.6 compare.py

# delete the model files
echo "O5 test completed. Deleting model files"
rm rank0model.pth
rm rank1model.pth
rm rank0master.pth
rm rank1master.pth