Merge branch 'master' into deyuf/update_norm

2f0bf594 · Deyu Fu · GitHub · 99495376 · 40555b3a · 2f0bf594
Unverified Commit 2f0bf594 authored Mar 08, 2019 by Deyu Fu Committed by GitHub Mar 08, 2019
11 changed files
--- a/tests/L1/common/run_test.sh
+++ b/tests/L1/common/run_test.sh
+#!/bin/bash
+
+print_banner() {
+  printf "\n\n\n\e[30m\e[42m$1\e[0m\n\n\n\n"
+}
+
+print_banner "Distributed status:  $1"
+
+# DATADIR="/home/mcarilli/Desktop/pt18data/apex/examples/imagenet/bare_metal_train_val/"
+DATADIR="/opt/home/apex/examples/imagenet/"
+
+if [ "$1" == "single_gpu" ]
+then
+  BASE_CMD="python main_amp.py -a resnet50 --b 128 --workers 4 --deterministic --prints-to-process 5"
+fi
+
+if [ "$1" == "distributed" ]
+then
+  BASE_CMD="python -m torch.distributed.launch --nproc_per_node=2 main_amp.py -a resnet50 --b 128 --workers 4 --deterministic --prints-to-process 5"
+fi
+
+ADAM_ARGS="--opt-level O2 --keep-batchnorm-fp32 False --fused-adam"
+
+keep_batchnorms=(
+""
+"--keep-batchnorm-fp32 True"
+"--keep-batchnorm-fp32 False"
+)
+
+loss_scales=(
+""
+"--loss-scale 1.0"
+"--loss-scale 128.0"
+"--loss-scale dynamic"
+)
+
+opt_levels=(
+"O0"
+"O1"
+"O2"
+"O3"
+)
+
+rm True*
+rm False*
+
+set -e
+
+print_banner "Installing Apex with --cuda_ext and --cpp_ext"
+
+pushd ../../..
+python setup.py install --cuda_ext --cpp_ext
+popd
+
+for opt_level in "${opt_levels[@]}"
+do
+  for loss_scale in "${loss_scales[@]}"
+  do
+    for keep_batchnorm in "${keep_batchnorms[@]}"
+    do
+      if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]
+      then
+        print_banner "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"
+        continue
+      fi
+      print_banner "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --has-ext $DATADIR"
+      set -x
+      ${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} --has-ext $DATADIR
+      set +x
+    done
+  done
+done
+
+# Handle FusedAdam separately due to limited support.
+# FusedAdam will not be tested for bitwise accuracy against the Python implementation.
+# The L0 tests already do so.  These tests are here to ensure that it actually runs,
+# and get an idea of performance.
+for loss_scale in "${loss_scales[@]}"
+do
+  print_banner "${BASE_CMD} ${ADAM_ARGS} ${loss_scale} --has-ext $DATADIR"
+  set -x
+  ${BASE_CMD} ${ADAM_ARGS} ${loss_scale} --has-ext $DATADIR
+  set +x
+done
+
+print_banner "Reinstalling apex without extensions"
+
+pushd ../../..
+python setup.py install
+popd
+
+for opt_level in "${opt_levels[@]}"
+do
+  for loss_scale in "${loss_scales[@]}"
+  do
+    for keep_batchnorm in "${keep_batchnorms[@]}"
+    do
+      if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]
+      then
+        print_banner "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"
+        continue
+      fi
+      print_banner "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} $DATADIR"
+      set -x
+      ${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} $DATADIR
+      set +x
+    done
+  done
+done
+
+print_banner "Checking for bitwise accuracy between Python-only and cpp/cuda extension installs"
+
+for opt_level in "${opt_levels[@]}"
+do
+  for loss_scale in "${loss_scales[@]}"
+  do
+    for keep_batchnorm in "${keep_batchnorms[@]}"
+    do
+      echo ""
+      if [ "$opt_level" == "O1" ] && [ -n "${keep_batchnorm}" ]
+      then
+        echo "Skipping ${opt_level} ${loss_scale} ${keep_batchnorm}"
+        continue
+      fi
+      echo "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} [--has-ext] $DATADIR"
+      set -x
+      python compare.py --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm}
+      set +x
+    done
+  done
+done
+
+print_banner "Reinstalling Apex with --cuda_ext and --cpp_ext"
+
+pushd ../../..
+python setup.py install --cuda_ext --cpp_ext
+popd
--- a/tests/L1/cross_product/run.sh
+++ b/tests/L1/cross_product/run.sh
+#!/bin/bash
+
+cp ../common/* .
+bash run_test.sh single_gpu
--- a/tests/L1/cross_product_distributed/run.sh
+++ b/tests/L1/cross_product_distributed/run.sh
+#!/bin/bash
+
+cp ../common/* .
+bash run_test.sh distributed
--- a/tests/RNN/RNN_tests.py
+++ b/tests/RNN/RNN_tests.py
--- a/tests/distributed/ddp_race_condition_test.py
+++ b/tests/distributed/ddp_race_condition_test.py
--- a/tests/distributed/run_race_test.sh
+++ b/tests/distributed/run_race_test.sh
--- a/tests/synced_batchnorm/single_gpu_unit_test.py
+++ b/tests/synced_batchnorm/single_gpu_unit_test.py
--- a/tests/synced_batchnorm/two_gpu_unit_test.py
+++ b/tests/synced_batchnorm/two_gpu_unit_test.py
--- a/tests/synced_batchnorm/unit_test.sh
+++ b/tests/synced_batchnorm/unit_test.sh
--- a/tests/run_fp16_optimizer/test_fp16_optimizer.py
+++ b/tests/run_fp16_optimizer/test_fp16_optimizer.py
-import unittest
-
-import functools as ft
-import itertools as it
-
-import torch
-from apex.fp16_utils import FP16_Optimizer
-
-# Currently no-ops (tested via examples).
-# FP16_Optimizer to be deprecated and moved under unified Amp API.
-class TestFP16Optimizer(unittest.TestCase):
-    def setUp(self):
-        N, D_in, D_out = 64, 1024, 16
-        self.N = N
-        self.D_in = D_in
-        self.D_out = D_out
-        self.x = torch.randn((N, D_in), dtype=torch.float16, device='cuda')
-        self.y = torch.randn((N, D_out), dtype=torch.float16, device='cuda')
-        self.model = torch.nn.Linear(D_in, D_out).cuda().half()
-
-    # def tearDown(self):
-    #     pass
-
-    def test_minimal(self):
-        pass
-
-    def test_minimal_static(self):
-        pass
-
-    def test_minimal_dynamic(self):
-        pass
-
-    def test_closure(self):
-        pass
-
-    def test_closure_dynamic(self):
-        pass
-
-    def test_save_load(self):
-        pass
-
-if __name__ == '__main__':
-    unittest.main()
--- a/tests/run_mixed_adam/__init__.py
+++ b/tests/run_mixed_adam/__init__.py