[SyncBatchNorm update] (#285)

resolves issue #254 Added input casting for pure python implementation, this supports mismatched input and layer dtype.

[SyncBatchNorm update] (#285)
resolves issue #254 Added input casting for pure python implementation, this supports mismatched input and layer dtype.
ffbb52ba · jjsjann123 · mcarilli · 4d325d2f · ffbb52ba
Commit ffbb52ba authored May 17, 2019 by jjsjann123 Committed by mcarilli May 17, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 16 additions and 2 deletions

apex/parallel/sync_batchnorm.py apex/parallel/sync_batchnorm.py +16 -2

No files found.
--- a/apex/parallel/sync_batchnorm.py
+++ b/apex/parallel/sync_batchnorm.py
@@ -66,10 +66,23 @@ class SyncBatchNorm(_BatchNorm):
        torch.cuda.nvtx.range_push("sync_bn_fw_with_mean_var")
        mean = None
        var = None
+        cast = None
+        out = None
+        # casting to handle mismatch input type to layer type
+        if self.running_mean is not None:
+            if self.running_mean.dtype != input.dtype:
+                input = input.to(self.running_mean.dtype)
+                cast = input.dtype
+        elif self.weight is not None:
+            if self.weight.dtype != input.dtype:
+                input = input.to(self.weight.dtype)
+                cast = input.dtype
        if not self.training and self.track_running_stats:
            # fall back to pytorch implementation for inference
            torch.cuda.nvtx.range_pop()
-            return F.batch_norm(input, self.running_mean, self.running_var, self.weight, self.bias, False, 0.0, self.eps)
+            out = F.batch_norm(input, self.running_mean, self.running_var, self.weight, self.bias, False, 0.0, self.eps)
        else:
            process_group = self.process_group
            world_size = 1
@@ -114,4 +127,5 @@ class SyncBatchNorm(_BatchNorm):
                        (m-1) * self.momentum * var + \
                        (1 - self.momentum) * self.running_var
            torch.cuda.nvtx.range_pop()
-            return SyncBatchnormFunction.apply(input, self.weight, self.bias, mean, var, self.eps, process_group, world_size)
+            out = SyncBatchnormFunction.apply(input, self.weight, self.bias, mean, var, self.eps, process_group, world_size)
+        out = out.to(cast)