"vscode:/vscode.git/clone" did not exist on "2287c8f2dc9dcad955318cc022cabe4d53051f65"
Unverified Commit 4c2b05f9 authored by Jintao Lin's avatar Jintao Lin Committed by GitHub
Browse files

Load `hook_msgs` when resume checkpoint (#962)

parent 2623fbf2
......@@ -339,6 +339,11 @@ class BaseRunner(metaclass=ABCMeta):
self._epoch = checkpoint['meta']['epoch']
self._iter = checkpoint['meta']['iter']
if self.meta is None:
self.meta = {}
self.meta.setdefault('hook_msgs', {})
# load `last_ckpt`, `best_score`, `best_ckpt`, etc. for hook messages
self.meta['hook_msgs'].update(checkpoint['meta'].get('hook_msgs', {}))
# Re-calculate the number of iterations when resuming
# models with different number of GPUs
......
......@@ -29,13 +29,13 @@ class EvalHook(Hook):
default: True.
save_best (str, optional): If a metric is specified, it would measure
the best checkpoint during evaluation. The information about best
checkpoint would be save in ``runner.meta['hook_msgs']``.
Options are the evaluation metrics to the test dataset. e.g.,
``bbox_mAP``, ``segm_mAP`` for bbox detection and instance
segmentation. ``AR@100`` for proposal recall. If ``save_best`` is
``auto``, the first key of the returned ``OrderedDict`` result
will be used. The interval of ``EvalHook`` should be
divisible of that in ``CheckpointHook``. Default: None.
checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep
best score value and best checkpoint path, which will be also
loaded when resume checkpoint. Options are the evaluation metrics
on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox
detection and instance segmentation. ``AR@100`` for proposal
recall. If ``save_best`` is ``auto``, the first key of the returned
``OrderedDict`` result will be used. Default: None.
rule (str | None, optional): Comparison rule for best score. If set to
None, it will infer a reasonable rule. Keys such as 'acc', 'top'
.etc will be inferred by 'greater' rule. Keys contain 'loss' will
......@@ -144,6 +144,8 @@ class EvalHook(Hook):
warnings.warn('runner.meta is None. Creating an empty one.')
runner.meta = dict()
runner.meta.setdefault('hook_msgs', dict())
self.best_ckpt_path = runner.meta['hook_msgs'].get(
'best_ckpt', None)
def before_train_iter(self, runner):
"""Evaluate the model only at the start of training by iteration."""
......@@ -241,10 +243,11 @@ class EvalHook(Hook):
os.remove(self.best_ckpt_path)
best_ckpt_name = f'best_{self.key_indicator}_{current}.pth'
runner.save_checkpoint(
runner.work_dir, best_ckpt_name, create_symlink=False)
self.best_ckpt_path = osp.join(runner.work_dir, best_ckpt_name)
runner.meta['hook_msgs']['best_ckpt'] = self.best_ckpt_path
runner.save_checkpoint(
runner.work_dir, best_ckpt_name, create_symlink=False)
runner.logger.info(
f'Now best checkpoint is saved as {best_ckpt_name}.')
runner.logger.info(
......@@ -292,13 +295,13 @@ class DistEvalHook(EvalHook):
default: True.
save_best (str, optional): If a metric is specified, it would measure
the best checkpoint during evaluation. The information about best
checkpoint would be save in ``runner.meta['hook_msgs']``.
Options are the evaluation metrics to the test dataset. e.g.,
``bbox_mAP``, ``segm_mAP`` for bbox detection and instance
segmentation. ``AR@100`` for proposal recall. If ``save_best`` is
``auto``, the first key of the returned ``OrderedDict`` result
will be used. The interval of ``EvalHook`` should depend on
``CheckpointHook``. Default: None.
checkpoint would be saved in ``runner.meta['hook_msgs']`` to keep
best score value and best checkpoint path, which will be also
loaded when resume checkpoint. Options are the evaluation metrics
on the test dataset. e.g., ``bbox_mAP``, ``segm_mAP`` for bbox
detection and instance segmentation. ``AR@100`` for proposal
recall. If ``save_best`` is ``auto``, the first key of the returned
``OrderedDict`` result will be used. Default: None.
rule (str | None, optional): Comparison rule for best score. If set to
None, it will infer a reasonable rule. Keys such as 'acc', 'top'
.etc will be inferred by 'greater' rule. Keys contain 'loss' will
......
......@@ -246,19 +246,24 @@ def test_eval_hook():
runner.register_hook(eval_hook)
runner.run([loader], [('train', 1)], 2)
ckpt_path = osp.join(tmpdir, 'best_acc_epoch_2.pth')
old_ckpt_path = osp.join(tmpdir, 'best_acc_epoch_2.pth')
assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
assert osp.exists(ckpt_path)
assert runner.meta['hook_msgs']['best_ckpt'] == old_ckpt_path
assert osp.exists(old_ckpt_path)
assert runner.meta['hook_msgs']['best_score'] == 4
resume_from = osp.join(tmpdir, 'latest.pth')
resume_from = old_ckpt_path
loader = DataLoader(ExampleDataset())
eval_hook = EvalHook(data_loader, save_best='acc')
runner = EpochBasedRunner(model=model, work_dir=tmpdir, logger=logger)
runner.register_checkpoint_hook(dict(interval=1))
runner.register_hook(eval_hook)
runner.resume(resume_from)
assert runner.meta['hook_msgs']['best_ckpt'] == old_ckpt_path
assert osp.exists(old_ckpt_path)
assert runner.meta['hook_msgs']['best_score'] == 4
runner.run([loader], [('train', 1)], 8)
ckpt_path = osp.join(tmpdir, 'best_acc_epoch_4.pth')
......@@ -266,6 +271,7 @@ def test_eval_hook():
assert runner.meta['hook_msgs']['best_ckpt'] == ckpt_path
assert osp.exists(ckpt_path)
assert runner.meta['hook_msgs']['best_score'] == 7
assert not osp.exists(old_ckpt_path)
@patch('mmcv.engine.single_gpu_test', MagicMock)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment