Commit 15498555 authored by Hubert Lu's avatar Hubert Lu
Browse files

Add unit tests for Apex extensions and distributed Apex

parent f3868524
import unittest
import sys
test_dirs = ["groupbn", "layer_norm", "multihead_attn", "."] # "." for test_label_smoothing.py
ROCM_BLACKLIST = [
"groupbn",
"layer_norm"
]
runner = unittest.TextTestRunner(verbosity=2)
errcode = 0
for test_dir in test_dirs:
if test_dir in ROCM_BLACKLIST:
continue
suite = unittest.TestLoader().discover(test_dir)
print("\nExecuting tests from " + test_dir)
result = runner.run(suite)
if not result.wasSuccessful():
errcode = 1
sys.exit(errcode)
......@@ -6,8 +6,8 @@ export WORLD_SIZE=2
# Test with opt_level="O2"
echo "running opt_level O2"
python3.6 -m torch.distributed.launch --nproc_per_node=2 amp_master_params/amp_master_params.py --opt_level "O2"
python3.6 amp_master_params/compare.py
python -m torch.distributed.launch --nproc_per_node=2 amp_master_params/amp_master_params.py --opt_level "O2"
python amp_master_params/compare.py
# delete the model files
echo -e "O2 test completed. Deleting model files\n"
......@@ -19,9 +19,9 @@ rm rank1master.pth
# Test with opt_level="O5"
#echo "running opt_level O5"
#python3.6 -m torch.distributed.launch --nproc_per_node=2 amp_master_params/amp_master_params.py --opt_level "O5"
#python3.6 amp_master_params/compare.py
#
#python -m torch.distributed.launch --nproc_per_node=2 amp_master_params/amp_master_params.py --opt_level "O5"
#python amp_master_params/compare.py
## delete the model files
#echo "O5 test completed. Deleting model files"
#rm rank0model.pth
......@@ -31,7 +31,14 @@ rm rank1master.pth
## Run the Sync BN Tests.
echo "Running syncbn tests"
python3.6 -m torch.distributed.launch --nproc_per_node=2 synced_batchnorm/two_gpu_test_different_batch_size.py --apex
python -m torch.distributed.launch --nproc_per_node=2 synced_batchnorm/two_gpu_unit_test.py
python -m torch.distributed.launch --nproc_per_node=2 synced_batchnorm/two_gpu_unit_test.py --fp16
python -m torch.distributed.launch --nproc_per_node=2 synced_batchnorm/two_gpu_test_different_batch_size.py --apex
echo "Running syncbn python only tests"
python3.6 synced_batchnorm/python_single_gpu_unit_test.py
python synced_batchnorm/python_single_gpu_unit_test.py
echo "Running syncbn batchnorm1d tests"
python synced_batchnorm/test_batchnorm1d.py
## Run the DDP Tests
echo "running DDP tests"
HIP_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 DDP/ddp_race_condition_test.py
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment