[minor] add a checking around local_state_dict (#1040)

Co-authored-by: Min Xu <min.xu.public@gmail.com>

[minor] add a checking around local_state_dict (#1040)
Co-authored-by: Min Xu <min.xu.public@gmail.com>
b0c3fe1e · Min Xu · GitHub · 16fba4c0 · b0c3fe1e
Unverified Commit b0c3fe1e authored Jul 25, 2022 by Min Xu Committed by GitHub Jul 25, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 0 deletions

fairscale/nn/data_parallel/fully_sharded_data_parallel.py fairscale/nn/data_parallel/fully_sharded_data_parallel.py +10 -0

No files found.
--- a/fairscale/nn/data_parallel/fully_sharded_data_parallel.py
+++ b/fairscale/nn/data_parallel/fully_sharded_data_parallel.py
@@ -962,6 +962,11 @@ class FullyShardedDataParallel(nn.Module):
        so the resulting state_dict can only be loaded after the Module has been
        wrapped with FSDP.
        """
+        # Check state, specifically, we shouldn't be in SUMMON_FULL_PARAMS since
+        # that will produce full state, not sharded state.
+        self.assert_state(
+            [TrainingState.IDLE, TrainingState.FORWARD, TrainingState.BACKWARD_PRE, TrainingState.BACKWARD_POST]
+        )
        with contextlib.ExitStack() as stack:
            # Tell any nested FSDP instances not to auto summon full params.
            for module in self.modules():  # includes self
@@ -1025,6 +1030,11 @@ class FullyShardedDataParallel(nn.Module):
        self, state_dict: Union[Dict[str, torch.Tensor], "OrderedDict[str, torch.Tensor]"], strict: bool = True
    ) -> NamedTuple:
        """Load a local (sharded) state_dict."""
+        # Check state, specifically, we shouldn't be in SUMMON_FULL_PARAMS since
+        # that will load full state, not sharded state.
+        self.assert_state(
+            [TrainingState.IDLE, TrainingState.FORWARD, TrainingState.BACKWARD_PRE, TrainingState.BACKWARD_POST]
+        )
        with contextlib.ExitStack() as stack:
            # Tell any nested FSDP instances not to auto summon full params.
            for module in self.modules():  # includes self