some test cleanup

484292f0 · Michael Carilli · 2445031d · 484292f0 · 484292f0 · 484292f0
Commit 484292f0 authored Mar 02, 2019 by Michael Carilli
5 changed files
--- a/apex/amp/handle.py
+++ b/apex/amp/handle.py
 import contextlib
 import logging
 import warnings
+import torch
 from . import utils
 from .opt import OptimWrapper
@@ -83,7 +84,6 @@ def scale_loss(loss,
                                       "loss scale to {}".format(optimizer.loss_scaler.loss_scale()))
                        optimizer.step = optimizer_step
                    optimizer.step = skip_step
    # Probably ok to skip this if not delay_unscale
    if _amp_state.opt_properties.patch_torch_functions:
        _amp_state.handle._clear_cache()

--- a/apex/amp/scaler.py
+++ b/apex/amp/scaler.py
@@ -81,9 +81,7 @@ class LossScaler(object):
            self._overflow_buf.zero_()
    def unscale(self, model_params, master_params, scale):
-        # torch.cuda.nvtx.range_push("unscale")
        if self._has_overflow:
-            # torch.cuda.nvtx.range_pop()
            return
        # Lots of defensive list processing going on here.  Way more less efficient than
@@ -92,6 +90,12 @@ class LossScaler(object):
            in zip(model_params, master_params)] # some of these may be None
        if LossScaler.has_fused_kernel:
+            # TODO:  Make these lists permanent attributes of self, so they don't need to be created
+            # or garbage collected.  Profiler shows that garbage collection overhead may be
+            # substantial (200-300 usec).
+            # This may be tricky because right now the lists need to be packed densely.
+            # Maybe this could be handled within the multi_tensor_apply wrapper
+            # (allow some Tensors to be None using at::optional).
            src_dst_pairs = {torch.float16 : {torch.float16 : [[],[]], torch.float32 : [[],[]]},
                             torch.float32 : {torch.float16 : [[],[]], torch.float32 : [[],[]]}}
@@ -142,6 +146,8 @@ class LossScaler(object):
            if scale == 1.0 and all_same and not self.dynamic:
                return
+            # TODO:  Make these lists permanent attributes of self, so they don't need to be created
+            # or garbage collected?
            model_grads = [mmp[0].grad.data for mmp in model_master_params if mmp[0].grad is not None]
            master_grads = [mmp[1].grad.data for mmp in model_master_params if mmp[1].grad is not None]
@@ -151,8 +157,6 @@ class LossScaler(object):
        if LossScaler.has_fused_kernel and self.dynamic and not self._has_overflow:
            self._has_overflow = self._overflow_buf.item()
-        # torch.cuda.nvtx.range_pop()
    # Separate so unscale() can be called more that once before updating.
    def update_scale(self):
        if self._has_overflow and self.dynamic:

--- a/apex/multi_tensor_apply/multi_tensor_apply.py
+++ b/apex/multi_tensor_apply/multi_tensor_apply.py
@@ -10,7 +10,7 @@ class MultiTensorApply(object):
            MultiTensorApply.available = True
            self.chunk_size = chunk_size
        except ImportError as err:
-            MultiTensorApply.availble = False
+            MultiTensorApply.available = False
            MultiTensorApply.import_err = err
    def check_avail(self):

--- a/tests/L1/common/main_amp.py
+++ b/tests/L1/common/main_amp.py
@@ -107,6 +107,8 @@ print("keep_batchnorm_fp32 = {}".format(args.keep_batchnorm_fp32), type(args.kee
 print("loss_scale = {}".format(args.loss_scale), type(args.loss_scale))
+print("\nCUDNN VERSION: {}\n".format(torch.backends.cudnn.version()))
 if args.deterministic:
    cudnn.benchmark = False
    cudnn.deterministic = True

--- a/tests/L1/common/run_test.sh
+++ b/tests/L1/common/run_test.sh
@@ -46,6 +46,8 @@ rm False*
 set -e
+print_banner "Installing Apex with --cuda_ext and --cpp_ext"
 pushd ../../..
 python setup.py install --cuda_ext --cpp_ext
 popd
@@ -76,6 +78,8 @@ do
  set +x
 done
+print_banner "Reinstalling apex without extensions"
 pushd ../../..
 python setup.py install
 popd
@@ -102,6 +106,8 @@ do
  do
    for keep_batchnorm in "${keep_batchnorms[@]}"
    do
+      echo ""
+      echo "${BASE_CMD} --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm} [--has-ext] $DATADIR"
      set -x
      python compare.py --opt-level ${opt_level} ${loss_scale} ${keep_batchnorm}
      set +x
@@ -109,6 +115,8 @@ do
  done
 done
+print_banner "Reinstalling Apex with --cuda_ext and --cpp_ext"
 pushd ../../..
 python setup.py install --cuda_ext --cpp_ext
 popd