upgrade pytorch-lightning version to 1.8.6

Summary: Pull Request resolved: https://github.com/facebookresearch/d2go/pull/453 Previous diffs updated the LRScheduler to public version (eg. https://github.com/facebookresearch/detectron2/pull/4709), this also requires newer version of pytorch-lightning. This diff upgrades the lightning version to 1.8.6, also fixes some deprecated call sites of old lightning versions. - `deepcopy` seems to be supported now, remove `_deepcopy` (there's now not allowed to access `trainer` attributed when it is `None`) - `dataloader_idx` is removed from `on_train_batch_start`. - stop using `_accelerator_connector` (the AcceleratorConnector doesn't have those attributes anymore). - deprecated `on_pretrain_routine_end` -> `on_fit_start` Reviewed By: YanjunChen329 Differential Revision: D42319019 fbshipit-source-id: ba46abbd98da96783e15d187a361fda47dc7d4d6

upgrade pytorch-lightning version to 1.8.6
Summary: Pull Request resolved: https://github.com/facebookresearch/d2go/pull/453 Previous diffs updated the LRScheduler to public version (eg. https://github.com/facebookresearch/detectron2/pull/4709), this also requires newer version of pytorch-lightning. This diff upgrades the lightning version to 1.8.6, also fixes some deprecated call sites of old lightning versions. - `deepcopy` seems to be supported now, remove `_deepcopy` (there's now not allowed to access `trainer` attributed when it is `None`) - `dataloader_idx` is removed from `on_train_batch_start`. - stop using `_accelerator_connector` (the AcceleratorConnector doesn't have those attributes anymore). - deprecated `on_pretrain_routine_end` -> `on_fit_start` Reviewed By: YanjunChen329 Differential Revision: D42319019 fbshipit-source-id: ba46abbd98da96783e15d187a361fda47dc7d4d6
9e93852d · Yanghan Wang · Facebook GitHub Bot · 2246aba3 · 9e93852d · 9e93852d
Commit 9e93852d authored Jan 04, 2023 by Yanghan Wang Committed by Facebook GitHub Bot Jan 04, 2023
4 changed files
--- a/d2go/runner/callbacks/quantization.py
+++ b/d2go/runner/callbacks/quantization.py
@@ -54,18 +54,6 @@ def rhasattr(obj: Any, attr: str, *args) -> bool:
    return True
-def _deepcopy(pl_module: LightningModule) -> LightningModule:
-    """Copy a LightningModule. Some properties need to be ignored."""
-    # Remove trainer reference.
-    trainer = pl_module.trainer
-    try:
-        pl_module.trainer = None
-        copy = deepcopy(pl_module)
-    finally:
-        pl_module.trainer = trainer
-    return copy
 def _quantized_forward(self, *args, **kwargs):
    """Forward method for a quantized module."""
    if not self.training and hasattr(self, "_quantized"):
@@ -100,7 +88,7 @@ def checkpoint_has_prepared(checkpoint: Dict[str, Any]) -> bool:
 def maybe_prepare_for_quantization(model: LightningModule, checkpoint: Dict[str, Any]):
    if checkpoint_has_prepared(checkpoint) and not hasattr(model, PREPARED):
        # model has been prepared for QAT before saving into checkpoint
-        copied = _deepcopy(model)
+        copied = deepcopy(model)
        prepared = prepare_fake_quant_model(copied.cfg, copied.model, is_qat=True)
        copied.model = prepared
        setattr(model, PREPARED, copied)
@@ -465,7 +453,7 @@ class QuantizationAwareTraining(Callback, QuantizationMixin):
        with mode(pl_module, training=True) as train:
            prepared = self.prepare(
-                _deepcopy(train),
+                deepcopy(train),
                configs=self.qconfig_dicts,
                attrs=self.preserved_attrs,
            )
@@ -483,7 +471,6 @@ class QuantizationAwareTraining(Callback, QuantizationMixin):
        pl_module: LightningModule,
        batch: Any,
        batch_idx: int,
-        dataloader_idx: int,
    ) -> None:
        """Applies model transforms at as specified during training."""
        apply_only_once = []
@@ -603,7 +590,7 @@ class PostTrainingQuantization(Callback, QuantizationMixin):
        """
        # Pass a copy to quantization APIs.
        self.prepared = self.prepare(
-            _deepcopy(pl_module).eval(),
+            deepcopy(pl_module).eval(),
            configs=self.qconfig_dicts,
            attrs=self.preserved_attrs,
        )

--- a/d2go/runner/lightning_task.py
+++ b/d2go/runner/lightning_task.py
@@ -25,6 +25,7 @@ from d2go.runner.default_runner import (
 from d2go.utils.ema_state import EMAState
 from d2go.utils.misc import get_tensorboard_log_dir
 from detectron2.solver import build_lr_scheduler as d2_build_lr_scheduler
+from pytorch_lightning.strategies import DDPStrategy, SingleDeviceStrategy
 from pytorch_lightning.utilities import rank_zero_info, rank_zero_only
 from pytorch_lightning.utilities.logger import _flatten_dict
@@ -274,10 +275,10 @@ class DefaultTask(D2GoDataAPIMixIn, pl.LightningModule):
    def _reset_dataset_evaluators(self):
        """reset validation dataset evaluator to be run in EVAL_PERIOD steps"""
-        assert (
+        assert isinstance(self.trainer.strategy, (SingleDeviceStrategy, DDPStrategy)), (
-            len(self.trainer._accelerator_connector.parallel_devices) == 1
+            "Only Single Device or DDP strategies are supported,"
-            or self.trainer._accelerator_connector.use_ddp
+            f" instead found: {self.trainer.strategy}"
-        ), "Only DDP and DDP_CPU distributed backend are supported"
+        )
        def _get_inference_dir_name(
            base_dir, inference_type, dataset_name, model_tag: ModelTag
@@ -391,7 +392,7 @@ class DefaultTask(D2GoDataAPIMixIn, pl.LightningModule):
    # ---------------------------------------------------------------------------
    # Hooks
    # ---------------------------------------------------------------------------
-    def on_pretrain_routine_end(self) -> None:
+    def on_fit_start(self) -> None:
        if self.cfg.MODEL_EMA.ENABLED:
            if self.ema_state and self.ema_state.has_inited():
                # ema_state could have been loaded from checkpoint

--- a/setup.py
+++ b/setup.py
@@ -27,7 +27,7 @@ requirements = [
    "Pillow",
    "mock",
    "torch",
-    "pytorch-lightning @ git+https://github.com/PyTorchLightning/pytorch-lightning@9b011606f",
+    "pytorch-lightning==1.8.6",
    "opencv-python",
    "parameterized",
    # Downgrade the protobuf package to 3.20.x or lower, related:

--- a/tests/runner/test_runner_lightning_quantization.py
+++ b/tests/runner/test_runner_lightning_quantization.py
@@ -131,9 +131,7 @@ class TestQuantizationAwareTraining(unittest.TestCase):
                f"step={step}",
            )
            trainer.fit_loop.global_step = step
-            qat.on_train_batch_start(
+            qat.on_train_batch_start(trainer, module, batch=None, batch_idx=0)
-                trainer, module, batch=None, batch_idx=0, dataloader_idx=0
-            )
            self.assertEqual(
                len(