Lazy imports to reduce error spam

48299b0d · Michael Carilli · cc85a2e5 · 48299b0d · 48299b0d · 48299b0d
Commit 48299b0d authored Feb 02, 2019 by Michael Carilli
8 changed files
--- a/apex/__init__.py
+++ b/apex/__init__.py
@@ -3,23 +3,13 @@
 from . import fp16_utils
 from . import parallel
 from . import amp
-try:
-    from . import optimizers
-except ImportError:
-    # An attempt to fix https://github.com/NVIDIA/apex/issues/97.  I'm not sure why 97 is even
-    # happening because Python modules should only be imported once, even if import is called
-    # multiple times.
-    try:
-        _ = warned_optimizers
-    except NameError:
-        print("Warning:  apex was installed without --cuda_ext.  FusedAdam will be unavailable.")
-        warned_optimizers = True
-try:
-    from . import normalization
-except ImportError:
-    try:
-        _ = warned_normalization
-    except NameError:
-        print("Warning:  apex was installed without --cuda_ext.  FusedLayerNorm will be unavailable.")
-        warned_normalization = True

+# For optimizers and normalization there is no Python fallback.
+# Absence of cuda backend is a hard error.
+# I would like the errors from importing fused_adam_cuda or fused_layer_norm_cuda
+# to be triggered lazily, because if someone has installed with --cpp_ext and --cuda_ext
+# so they expect those backends to be available, but for some reason they actually aren't
+# available (for example because they built improperly in a way that isn't revealed until
+# load time) the error message is timely and visible.
+from . import optimizers
+from . import normalization
--- a/apex/normalization/fused_layer_norm.py
+++ b/apex/normalization/fused_layer_norm.py
@@ -3,11 +3,13 @@ import torch
 import numbers
 from torch.nn.parameter import Parameter
 from torch.nn import init
-
-import fused_layer_norm_cuda
+import importlib

 class FusedLayerNormAffineFunction(torch.autograd.Function):
  def __init__(self, normalized_shape, eps=1e-6):
+    global fused_layer_norm_cuda
+    fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")
+
    self.normalized_shape = normalized_shape
    self.eps = eps

@@ -31,6 +33,8 @@ class FusedLayerNormAffineFunction(torch.autograd.Function):
    
 class FusedLayerNormFunction(torch.autograd.Function):
  def __init__(self, normalized_shape, eps=1e-6):
+    global fused_layer_norm_cuda
+    fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")
    self.normalized_shape = normalized_shape
    self.eps = eps

@@ -117,6 +121,10 @@ class FusedLayerNorm(torch.nn.Module):
    """
    def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
        super(FusedLayerNorm, self).__init__()
+
+        global fused_layer_norm_cuda
+        fused_layer_norm_cuda = importlib.import_module("fused_layer_norm_cuda")
+
        if isinstance(normalized_shape, numbers.Integral):
            normalized_shape = (normalized_shape,)
        self.normalized_shape = torch.Size(normalized_shape)

--- a/apex/optimizers/fused_adam.py
+++ b/apex/optimizers/fused_adam.py
 import types
 import torch
-import fused_adam_cuda
+import importlib

 class FusedAdam(torch.optim.Optimizer):

@@ -36,6 +36,9 @@ class FusedAdam(torch.optim.Optimizer):
                 lr=1e-3, bias_correction = True,
                 betas=(0.9, 0.999), eps=1e-8, eps_inside_sqrt = False,
                 weight_decay=0., max_grad_norm=0., amsgrad=False):
+        global fused_adam_cuda
+        fused_adam_cuda = importlib.import_module("fused_adam_cuda")
+
        if amsgrad:
            raise RuntimeError('FusedAdam does not support the AMSGrad variant.')
        defaults = dict(lr=lr, bias_correction=bias_correction,

--- a/apex/parallel/__init__.py
+++ b/apex/parallel/__init__.py
@@ -8,16 +8,15 @@ else:
    ReduceOp = torch.distributed.deprecated.reduce_op

 from .distributed import DistributedDataParallel, Reducer
+# This is tricky because I'd like SyncBatchNorm to be exposed the same way
+# for both the cuda-enabled and python-fallback versions, and I don't want
+# to suppress the error information.
 try:
    import syncbn
    from .optimized_sync_batchnorm import SyncBatchNorm
-except ImportError:
-    try:
-        _ = warned_syncbn
-    except NameError:
-        print("Warning:  apex was installed without --cuda_ext. Fused syncbn kernels will be unavailable.  Python fallbacks will be used instead.")
-        warned_syncbn = True
+except ImportError as err:
    from .sync_batchnorm import SyncBatchNorm
+    SyncBatchNorm.syncbn_import_error = err

 def convert_syncbn_model(module, process_group=None, channel_last=False):
    '''

--- a/apex/parallel/distributed.py
+++ b/apex/parallel/distributed.py
 import torch
-# from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
-try: 
-    from apex_C import flatten
-    from apex_C import unflatten
-except ImportError:
-    try:
-        _ = warned_flatten
-    except NameError:
-        print("Warning:  apex was installed without --cpp_ext.  Falling back to Python flatten and unflatten.")
-        warned_flatten = True
-    from torch._utils import _flatten_dense_tensors as flatten
-    from torch._utils import _unflatten_dense_tensors as unflatten
 import torch.distributed as dist
 from torch.nn.modules import Module
 from torch.autograd import Variable
 from collections import OrderedDict
 from itertools import chain
 import copy
+import importlib
+
+imported_flatten_impl = False
+
+def import_flatten_impl():
+    global flatten_impl, unflatten_impl, imported_flatten_impl
+    try:
+        import apex_C
+        flatten_impl = apex_C.flatten
+        unflatten_impl = apex_C.unflatten
+    except ImportError:
+        print("Warning:  apex was installed without --cpp_ext.  Falling back to Python flatten and unflatten.")
+        flatten_impl = torch._utils._flatten_dense_tensors
+        unflatten_impl = torch._utils._unflatten_dense_tensors
+    imported_flatten_impl = True
+
+def flatten(bucket):
+    if not imported_flatten_impl:
+        import_flatten_impl()
+    return flatten_impl(bucket)
+
+def unflatten(coalesced, bucket):
+    if not imported_flatten_impl:
+        import_flatten_impl()
+    return unflatten_impl(coalesced, bucket)

 # apply_dist_call requires that tensors in 'bucket' are all the same type.
 def apply_flat_dist_call(bucket, call, extra_args=None):

--- a/apex/parallel/optimized_sync_batchnorm.py
+++ b/apex/parallel/optimized_sync_batchnorm.py
@@ -2,6 +2,7 @@ import torch
 from torch.nn.modules.batchnorm import _BatchNorm
 from torch.nn import functional as F

+import syncbn
 from .optimized_sync_batchnorm_kernel import SyncBatchnormFunction



--- a/apex/parallel/sync_batchnorm.py
+++ b/apex/parallel/sync_batchnorm.py
@@ -45,7 +45,14 @@ class SyncBatchNorm(_BatchNorm):
        >>> out = sbn(inp)
    """

+    warned = False
+
    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True, process_group=None):
+
+        if not SyncBatchNorm.warned:
+            print("Warning:  using Python fallback for SyncBatchNorm, possibly because apex was installed without --cuda_ext.  The exception raised when attempting to import the cuda backend was: ", self.syncbn_import_error)
+            SyncBatchNorm.warned = True
+
        super(SyncBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine, track_running_stats=track_running_stats)
        self.process_group = process_group


--- a/tests/distributed/ddp_race_condition_test.py
+++ b/tests/distributed/ddp_race_condition_test.py
@@ -34,8 +34,8 @@ class Model(Module):
        return (input*self.a)*self.b

 model = Model()
-model = DDP(model, message_size=1, gradient_predivide_factor=8.0)
-# model = DDP(model, delay_allreduce=True)
+# model = DDP(model, message_size=1, gradient_predivide_factor=8.0)
+model = DDP(model, delay_allreduce=True)
 # model = DDP(model, message_size=1, allreduce_trigger_params=[model.b])

 x = torch.cuda.FloatTensor(4096*4096)