[fix] check before calling _specify_ddp_gpu_num (#626)

- this function is being removed in pytorch - we only need to call it in case we are working with older pytorch Co-authored-by: Min Xu <min.xu@acm.org>

[fix] check before calling _specify_ddp_gpu_num (#626)
- this function is being removed in pytorch - we only need to call it in case we are working with older pytorch Co-authored-by: Min Xu <min.xu@acm.org>
5cddaea4 · Min Xu · GitHub · d3b86d65 · 5cddaea4 · 5cddaea4
Unverified Commit 5cddaea4 authored Apr 23, 2021 by Min Xu Committed by GitHub Apr 23, 2021
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 2 deletions

fairscale/nn/data_parallel/sharded_ddp.py fairscale/nn/data_parallel/sharded_ddp.py +2 -1

fairscale/utils/parallel.py fairscale/utils/parallel.py +2 -1

No files found.
--- a/fairscale/nn/data_parallel/sharded_ddp.py
+++ b/fairscale/nn/data_parallel/sharded_ddp.py
@@ -539,10 +539,11 @@ class ShardedDataParallel(nn.Module):
        Adapted from ``torch.nn.distributed.DistributedDataParallel``.
        """
        for layer in module.modules():
-            if isinstance(layer, torch.nn.modules.SyncBatchNorm):
+            if isinstance(layer, torch.nn.modules.SyncBatchNorm) and hasattr(layer, "_specify_ddp_gpu_num"):
                assert self.device_type != "cpu", "SyncBatchNorm layers only work with GPU modules"
                # device_id logic has not been handled, assume single-process single-device
                # SyncBatchNorm only supports DDP with single-process single-device anyway'
+                # This function is removed from pytorch since 1.9.
                layer._specify_ddp_gpu_num(1)  # type: ignore
    def _setup_bucket_strategy(self) -> None:

--- a/fairscale/utils/parallel.py
+++ b/fairscale/utils/parallel.py
@@ -50,8 +50,9 @@ def enable_pytorch_sync_bn(module: torch.nn.Module) -> None:
       is happily running even without DDP. E.g. this is used by FSDP.
    """
    for layer in module.modules():
-        if isinstance(layer, torch.nn.modules.SyncBatchNorm):
+        if isinstance(layer, torch.nn.modules.SyncBatchNorm) and hasattr(layer, "_specify_ddp_gpu_num"):
            # Number "1" below meant to be the number of GPUs for each DDP worker.
            # (i.e. "device_ids" in DDP. As far as I see, the value is not actually
            # used, but this call needs to be made to avoid an exception.
+            # This function is removed from pytorch since 1.9.
            layer._specify_ddp_gpu_num(1)  # type: ignore