Load `hook_msgs` when resume checkpoint (#962)

4c2b05f9 · Jintao Lin · GitHub · 2623fbf2 · 4c2b05f9 · 4c2b05f9
Unverified Commit 4c2b05f9 authored May 13, 2021 by Jintao Lin Committed by GitHub May 13, 2021
Showing with 34 additions and 20 deletions

mmcv/runner/base_runner.py mmcv/runner/base_runner.py +5 -0

mmcv/runner/hooks/evaluation.py mmcv/runner/hooks/evaluation.py +19 -16

tests/test_runner/test_eval_hook.py tests/test_runner/test_eval_hook.py +10 -4

No files found.
--- a/mmcv/runner/base_runner.py
+++ b/mmcv/runner/base_runner.py
@@ -339,6 +339,11 @@ class BaseRunner(metaclass=ABCMeta):

        self._epoch = checkpoint['meta']['epoch']
        self._iter = checkpoint['meta']['iter']
+        if self.meta is None:
+            self.meta = {}
+        self.meta.setdefault('hook_msgs', {})
+        # load `last_ckpt`, `best_score`, `best_ckpt`, etc. for hook messages
+        self.meta['hook_msgs'].update(checkpoint['meta'].get('hook_msgs', {}))

        # Re-calculate the number of iterations when resuming
        # models with different number of GPUs

--- a/mmcv/runner/hooks/evaluation.py
+++ b/mmcv/runner/hooks/evaluation.py
@@ -29,13 +29,13 @@ class EvalHook(Hook):
            default: True.
        save_best (str, optional): If a metric is specified, it would measure
            the best checkpoint during evaluation. The information about best
-            checkpoint would be save in ``runner.meta['hook_msgs']``.
-            Options are the evaluation metrics to the test dataset. e.g.,
-            ``bbox_mAP``, ``segm_mAP`` for bbox detection and instance
-            segmentation. ``AR@100`` for proposal recall. If ``save_best`` is
-            ``auto``, the first key of the returned ``OrderedDict`` result
-            will be used. The interval of ``EvalHook`` should be
-            divisible of that in ``CheckpointHook``. Default: None.
+            checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep
+            best score value and best checkpoint path, which will be also
+            loaded when resume checkpoint. Options are the evaluation metrics
+            on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox
+            detection and instance segmentation. ``AR@100`` for proposal
+            recall. If ``save_best`` is ``auto``, the first key of the returned
+             ``OrderedDict`` result will be used. Default: None.
        rule (str | None, optional): Comparison rule for best score. If set to
            None, it will infer a reasonable rule. Keys such as 'acc', 'top'
            .etc will be inferred by 'greater' rule. Keys contain 'loss' will
@@ -144,6 +144,8 @@ class EvalHook(Hook):
                warnings.warn('runner.meta is None. Creating an empty one.')
                runner.meta = dict()
            runner.meta.setdefault('hook_msgs', dict())
+            self.best_ckpt_path = runner.meta['hook_msgs'].get(
+                'best_ckpt', None)

    def before_train_iter(self, runner):
        """Evaluate the model only at the start of training by iteration."""
@@ -241,10 +243,11 @@ class EvalHook(Hook):
                os.remove(self.best_ckpt_path)

            best_ckpt_name = f'best_{self.key_indicator}_{current}.pth'
-            runner.save_checkpoint(
-                runner.work_dir, best_ckpt_name, create_symlink=False)
            self.best_ckpt_path = osp.join(runner.work_dir, best_ckpt_name)
            runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path
+
+            runner.save_checkpoint(
+                runner.work_dir, best_ckpt_name, create_symlink=False)
            runner.logger.info(
                f'Now best checkpoint is saved as {best_ckpt_name}.')
            runner.logger.info(
@@ -292,13 +295,13 @@ class DistEvalHook(EvalHook):
            default: True.
        save_best (str, optional): If a metric is specified, it would measure
            the best checkpoint during evaluation. The information about best
-            checkpoint would be save in ``runner.meta['hook_msgs']``.
-            Options are the evaluation metrics to the test dataset. e.g.,
-            ``bbox_mAP``, ``segm_mAP`` for bbox detection and instance
-            segmentation. ``AR@100`` for proposal recall. If ``save_best`` is
-            ``auto``, the first key of the returned ``OrderedDict`` result
-            will be used. The interval of ``EvalHook`` should depend on
-            ``CheckpointHook``. Default: None.
+            checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep
+            best score value and best checkpoint path, which will be also
+            loaded when resume checkpoint. Options are the evaluation metrics
+            on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox
+            detection and instance segmentation. ``AR@100`` for proposal
+            recall. If ``save_best`` is ``auto``, the first key of the returned
+             ``OrderedDict`` result will be used. Default: None.
        rule (str | None, optional): Comparison rule for best score. If set to
            None, it will infer a reasonable rule. Keys such as 'acc', 'top'
            .etc will be inferred by 'greater' rule. Keys contain 'loss' will

--- a/tests/test_runner/test_eval_hook.py
+++ b/tests/test_runner/test_eval_hook.py
@@ -246,19 +246,24 @@ def test_eval_hook():
        runner.register_hook(eval_hook)
        runner.run([loader], [('train', 1)], 2)

-        ckpt_path = osp.join(tmpdir, 'best_acc_epoch_2.pth')
+        old_ckpt_path = osp.join(tmpdir, 'best_acc_epoch_2.pth')

-        assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
-        assert osp.exists(ckpt_path)
+        assert runner.meta['hook_msgs']['best_ckpt'] == old_ckpt_path
+        assert osp.exists(old_ckpt_path)
        assert runner.meta['hook_msgs']['best_score'] == 4

-        resume_from = osp.join(tmpdir, 'latest.pth')
+        resume_from = old_ckpt_path
        loader = DataLoader(ExampleDataset())
        eval_hook = EvalHook(data_loader, save_best='acc')
        runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
        runner.register_checkpoint_hook(dict(interval=1))
        runner.register_hook(eval_hook)
+
        runner.resume(resume_from)
+        assert runner.meta['hook_msgs']['best_ckpt'] == old_ckpt_path
+        assert osp.exists(old_ckpt_path)
+        assert runner.meta['hook_msgs']['best_score'] == 4
+
        runner.run([loader], [('train', 1)], 8)

        ckpt_path = osp.join(tmpdir, 'best_acc_epoch_4.pth')
@@ -266,6 +271,7 @@ def test_eval_hook():
        assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
        assert osp.exists(ckpt_path)
        assert runner.meta['hook_msgs']['best_score'] == 7
+        assert not osp.exists(old_ckpt_path)


 @patch('mmcv.engine.single_gpu_test', MagicMock)