Unverified Commit 4c2b05f9 authored by Jintao Lin's avatar Jintao Lin Committed by GitHub
Browse files

Load `hook_msgs` when resume checkpoint (#962)

parent 2623fbf2
...@@ -339,6 +339,11 @@ class BaseRunner(metaclass=ABCMeta): ...@@ -339,6 +339,11 @@ class BaseRunner(metaclass=ABCMeta):
self._epoch = checkpoint['meta']['epoch'] self._epoch = checkpoint['meta']['epoch']
self._iter = checkpoint['meta']['iter'] self._iter = checkpoint['meta']['iter']
if self.meta is None:
self.meta = {}
self.meta.setdefault('hook_msgs', {})
# load `last_ckpt`, `best_score`, `best_ckpt`, etc. for hook messages
self.meta['hook_msgs'].update(checkpoint['meta'].get('hook_msgs', {}))
# Re-calculate the number of iterations when resuming # Re-calculate the number of iterations when resuming
# models with different number of GPUs # models with different number of GPUs
......
...@@ -29,13 +29,13 @@ class EvalHook(Hook): ...@@ -29,13 +29,13 @@ class EvalHook(Hook):
default: True. default: True.
save_best (str, optional): If a metric is specified, it would measure save_best (str, optional): If a metric is specified, it would measure
the best checkpoint during evaluation. The information about best the best checkpoint during evaluation. The information about best
checkpoint would be save in ``runner.meta['hook_msgs']``. checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep
Options are the evaluation metrics to the test dataset. e.g., best score value and best checkpoint path, which will be also
``bbox_mAP``, ``segm_mAP`` for bbox detection and instance loaded when resume checkpoint. Options are the evaluation metrics
segmentation. ``AR@100`` for proposal recall. If ``save_best`` is on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox
``auto``, the first key of the returned ``OrderedDict`` result detection and instance segmentation. ``AR@100`` for proposal
will be used. The interval of ``EvalHook`` should be recall. If ``save_best`` is ``auto``, the first key of the returned
divisible of that in ``CheckpointHook``. Default: None. ``OrderedDict`` result will be used. Default: None.
rule (str | None, optional): Comparison rule for best score. If set to rule (str | None, optional): Comparison rule for best score. If set to
None, it will infer a reasonable rule. Keys such as 'acc', 'top' None, it will infer a reasonable rule. Keys such as 'acc', 'top'
.etc will be inferred by 'greater' rule. Keys contain 'loss' will .etc will be inferred by 'greater' rule. Keys contain 'loss' will
...@@ -144,6 +144,8 @@ class EvalHook(Hook): ...@@ -144,6 +144,8 @@ class EvalHook(Hook):
warnings.warn('runner.meta is None. Creating an empty one.') warnings.warn('runner.meta is None. Creating an empty one.')
runner.meta = dict() runner.meta = dict()
runner.meta.setdefault('hook_msgs', dict()) runner.meta.setdefault('hook_msgs', dict())
self.best_ckpt_path = runner.meta['hook_msgs'].get(
'best_ckpt', None)
def before_train_iter(self, runner): def before_train_iter(self, runner):
"""Evaluate the model only at the start of training by iteration.""" """Evaluate the model only at the start of training by iteration."""
...@@ -241,10 +243,11 @@ class EvalHook(Hook): ...@@ -241,10 +243,11 @@ class EvalHook(Hook):
os.remove(self.best_ckpt_path) os.remove(self.best_ckpt_path)
best_ckpt_name = f'best_{self.key_indicator}_{current}.pth' best_ckpt_name = f'best_{self.key_indicator}_{current}.pth'
runner.save_checkpoint(
runner.work_dir, best_ckpt_name, create_symlink=False)
self.best_ckpt_path = osp.join(runner.work_dir, best_ckpt_name) self.best_ckpt_path = osp.join(runner.work_dir, best_ckpt_name)
runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path
runner.save_checkpoint(
runner.work_dir, best_ckpt_name, create_symlink=False)
runner.logger.info( runner.logger.info(
f'Now best checkpoint is saved as {best_ckpt_name}.') f'Now best checkpoint is saved as {best_ckpt_name}.')
runner.logger.info( runner.logger.info(
...@@ -292,13 +295,13 @@ class DistEvalHook(EvalHook): ...@@ -292,13 +295,13 @@ class DistEvalHook(EvalHook):
default: True. default: True.
save_best (str, optional): If a metric is specified, it would measure save_best (str, optional): If a metric is specified, it would measure
the best checkpoint during evaluation. The information about best the best checkpoint during evaluation. The information about best
checkpoint would be save in ``runner.meta['hook_msgs']``. checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep
Options are the evaluation metrics to the test dataset. e.g., best score value and best checkpoint path, which will be also
``bbox_mAP``, ``segm_mAP`` for bbox detection and instance loaded when resume checkpoint. Options are the evaluation metrics
segmentation. ``AR@100`` for proposal recall. If ``save_best`` is on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox
``auto``, the first key of the returned ``OrderedDict`` result detection and instance segmentation. ``AR@100`` for proposal
will be used. The interval of ``EvalHook`` should depend on recall. If ``save_best`` is ``auto``, the first key of the returned
``CheckpointHook``. Default: None. ``OrderedDict`` result will be used. Default: None.
rule (str | None, optional): Comparison rule for best score. If set to rule (str | None, optional): Comparison rule for best score. If set to
None, it will infer a reasonable rule. Keys such as 'acc', 'top' None, it will infer a reasonable rule. Keys such as 'acc', 'top'
.etc will be inferred by 'greater' rule. Keys contain 'loss' will .etc will be inferred by 'greater' rule. Keys contain 'loss' will
......
...@@ -246,19 +246,24 @@ def test_eval_hook(): ...@@ -246,19 +246,24 @@ def test_eval_hook():
runner.register_hook(eval_hook) runner.register_hook(eval_hook)
runner.run([loader], [('train', 1)], 2) runner.run([loader], [('train', 1)], 2)
ckpt_path = osp.join(tmpdir, 'best_acc_epoch_2.pth') old_ckpt_path = osp.join(tmpdir, 'best_acc_epoch_2.pth')
assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path assert runner.meta['hook_msgs']['best_ckpt'] == old_ckpt_path
assert osp.exists(ckpt_path) assert osp.exists(old_ckpt_path)
assert runner.meta['hook_msgs']['best_score'] == 4 assert runner.meta['hook_msgs']['best_score'] == 4
resume_from = osp.join(tmpdir, 'latest.pth') resume_from = old_ckpt_path
loader = DataLoader(ExampleDataset()) loader = DataLoader(ExampleDataset())
eval_hook = EvalHook(data_loader, save_best='acc') eval_hook = EvalHook(data_loader, save_best='acc')
runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger) runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
runner.register_checkpoint_hook(dict(interval=1)) runner.register_checkpoint_hook(dict(interval=1))
runner.register_hook(eval_hook) runner.register_hook(eval_hook)
runner.resume(resume_from) runner.resume(resume_from)
assert runner.meta['hook_msgs']['best_ckpt'] == old_ckpt_path
assert osp.exists(old_ckpt_path)
assert runner.meta['hook_msgs']['best_score'] == 4
runner.run([loader], [('train', 1)], 8) runner.run([loader], [('train', 1)], 8)
ckpt_path = osp.join(tmpdir, 'best_acc_epoch_4.pth') ckpt_path = osp.join(tmpdir, 'best_acc_epoch_4.pth')
...@@ -266,6 +271,7 @@ def test_eval_hook(): ...@@ -266,6 +271,7 @@ def test_eval_hook():
assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
assert osp.exists(ckpt_path) assert osp.exists(ckpt_path)
assert runner.meta['hook_msgs']['best_score'] == 7 assert runner.meta['hook_msgs']['best_score'] == 7
assert not osp.exists(old_ckpt_path)
@patch('mmcv.engine.single_gpu_test', MagicMock) @patch('mmcv.engine.single_gpu_test', MagicMock)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment