Merge pull request #58 from ROCmSoftwarePlatform/dev/hubertlu/unit_tests

Add more unit tests for both distributed and extensions

Merge pull request #58 from ROCmSoftwarePlatform/dev/hubertlu/unit_tests
Add more unit tests for both distributed and extensions
541da7a0 · Hubert Lu · GitHub · 08e88b1b · 2228f1bf · 541da7a0
Unverified Commit 541da7a0 authored Dec 02, 2021 by Hubert Lu Committed by GitHub Dec 02, 2021
4 changed files
--- a/apex/contrib/multihead_attn/self_multihead_attn.py
+++ b/apex/contrib/multihead_attn/self_multihead_attn.py
@@ -160,7 +160,7 @@ class SelfMultiheadAttn(nn.Module):
                outputs = self.attn_func(attn_mask is not None, is_training, self.num_heads, self.scaling, lyr_nrm_results,
                                         input_weights, self.out_proj_weight,
                                         input_bias, self.out_proj_bias,
-                                         mask, self.dropout)
+                                         mask, self.mask_additive, self.dropout)
                if is_training:
                    outputs = jit_dropout_add(outputs, query, self.dropout, is_training)
                else:

--- a/apex/contrib/test/run_rocm_extensions.py
+++ b/apex/contrib/test/run_rocm_extensions.py
+import unittest
+import sys
+test_dirs = ["groupbn", "layer_norm", "multihead_attn", "."] # "." for test_label_smoothing.py
+ROCM_BLACKLIST = [
+    "groupbn",
+    "layer_norm"
+]
+runner = unittest.TextTestRunner(verbosity=2)
+errcode = 0
+for test_dir in test_dirs:
+    if test_dir in ROCM_BLACKLIST:
+        continue
+    suite = unittest.TestLoader().discover(test_dir)
+    print("\nExecuting tests from " + test_dir)
+    result = runner.run(suite)
+    if not result.wasSuccessful():
+        errcode = 1
+sys.exit(errcode)
--- a/tests/L0/run_rocm.sh
+++ b/tests/L0/run_rocm.sh
 #!/bin/bash
-APEX_TEST_WITH_ROCM=1 python3.6 run_test.py
+APEX_TEST_WITH_ROCM=1 python run_test.py
--- a/tests/distributed/run_rocm_distributed.sh
+++ b/tests/distributed/run_rocm_distributed.sh
@@ -6,8 +6,8 @@ export WORLD_SIZE=2
 # Test with opt_level="O2"
 echo "running opt_level O2"
-python3.6 -m torch.distributed.launch --nproc_per_node=2 amp_master_params/amp_master_params.py --opt_level "O2"
+python -m torch.distributed.launch --nproc_per_node=2 amp_master_params/amp_master_params.py --opt_level "O2"
-python3.6 amp_master_params/compare.py
+python amp_master_params/compare.py
 # delete the model files
 echo -e "O2 test completed. Deleting model files\n"
@@ -19,9 +19,9 @@ rm rank1master.pth
 # Test with opt_level="O5"
 #echo "running opt_level O5"
-#python3.6 -m torch.distributed.launch --nproc_per_node=2 amp_master_params/amp_master_params.py --opt_level "O5"
+#python -m torch.distributed.launch --nproc_per_node=2 amp_master_params/amp_master_params.py --opt_level "O5"
-#python3.6 amp_master_params/compare.py
+#python amp_master_params/compare.py
-#
 ## delete the model files
 #echo "O5 test completed. Deleting model files"
 #rm rank0model.pth
@@ -31,7 +31,16 @@ rm rank1master.pth
 ## Run the Sync BN Tests.
 echo "Running syncbn tests"
-python3.6 -m torch.distributed.launch --nproc_per_node=2 synced_batchnorm/two_gpu_test_different_batch_size.py --apex
+python -m torch.distributed.launch --nproc_per_node=2 synced_batchnorm/two_gpu_unit_test.py
+python -m torch.distributed.launch --nproc_per_node=2 synced_batchnorm/two_gpu_unit_test.py --fp16
+python -m torch.distributed.launch --nproc_per_node=2 synced_batchnorm/two_gpu_test_different_batch_size.py --apex
 echo "Running syncbn python only tests"
-python3.6 synced_batchnorm/python_single_gpu_unit_test.py
+python synced_batchnorm/python_single_gpu_unit_test.py
+echo "Running syncbn batchnorm1d tests"
+python synced_batchnorm/test_batchnorm1d.py 
+#beware, you need a system with at least 4 gpus to test group_size<world_size    (currently fail both on upstream and rocm fork)
+#python -m torch.distributed.launch --nproc_per_node=4 test_groups.py --group_size=2
+## Run the DDP Tests
+echo "running DDP tests"
+HIP_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 DDP/ddp_race_condition_test.py