Remove runner, parallel, engine and device (#2216)

* Remove runner, parallel, engine and device * fix format * remove outdated docs

Remove runner, parallel, engine and device (#2216)
* Remove runner, parallel, engine and device * fix format * remove outdated docs
9185eee8 · Zaida Zhou · GitHub · 19a02415 · 19a02415 · 19a02415
Unverified Commit 9185eee8 authored Aug 22, 2022 by Zaida Zhou Committed by GitHub Aug 22, 2022
20 changed files
--- a/mmcv/runner/hooks/hook.py
+++ b/mmcv/runner/hooks/hook.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from mmcv.utils import Registry, is_method_overridden
-
-HOOKS = Registry('hook')
-
-
-class Hook:
-    stages = ('before_run', 'before_train_epoch', 'before_train_iter',
-              'after_train_iter', 'after_train_epoch', 'before_val_epoch',
-              'before_val_iter', 'after_val_iter', 'after_val_epoch',
-              'after_run')
-
-    def before_run(self, runner):
-        pass
-
-    def after_run(self, runner):
-        pass
-
-    def before_epoch(self, runner):
-        pass
-
-    def after_epoch(self, runner):
-        pass
-
-    def before_iter(self, runner):
-        pass
-
-    def after_iter(self, runner):
-        pass
-
-    def before_train_epoch(self, runner):
-        self.before_epoch(runner)
-
-    def before_val_epoch(self, runner):
-        self.before_epoch(runner)
-
-    def after_train_epoch(self, runner):
-        self.after_epoch(runner)
-
-    def after_val_epoch(self, runner):
-        self.after_epoch(runner)
-
-    def before_train_iter(self, runner):
-        self.before_iter(runner)
-
-    def before_val_iter(self, runner):
-        self.before_iter(runner)
-
-    def after_train_iter(self, runner):
-        self.after_iter(runner)
-
-    def after_val_iter(self, runner):
-        self.after_iter(runner)
-
-    def every_n_epochs(self, runner, n):
-        return (runner.epoch + 1) % n == 0 if n > 0 else False
-
-    def every_n_inner_iters(self, runner, n):
-        return (runner.inner_iter + 1) % n == 0 if n > 0 else False
-
-    def every_n_iters(self, runner, n):
-        return (runner.iter + 1) % n == 0 if n > 0 else False
-
-    def end_of_epoch(self, runner):
-        return runner.inner_iter + 1 == len(runner.data_loader)
-
-    def is_last_epoch(self, runner):
-        return runner.epoch + 1 == runner._max_epochs
-
-    def is_last_iter(self, runner):
-        return runner.iter + 1 == runner._max_iters
-
-    def get_triggered_stages(self):
-        trigger_stages = set()
-        for stage in Hook.stages:
-            if is_method_overridden(stage, Hook, self):
-                trigger_stages.add(stage)
-
-        # some methods will be triggered in multi stages
-        # use this dict to map method to stages.
-        method_stages_map = {
-            'before_epoch': ['before_train_epoch', 'before_val_epoch'],
-            'after_epoch': ['after_train_epoch', 'after_val_epoch'],
-            'before_iter': ['before_train_iter', 'before_val_iter'],
-            'after_iter': ['after_train_iter', 'after_val_iter'],
-        }
-
-        for method, map_stages in method_stages_map.items():
-            if is_method_overridden(method, Hook, self):
-                trigger_stages.update(map_stages)
-
-        return [stage for stage in Hook.stages if stage in trigger_stages]
--- a/mmcv/runner/hooks/iter_timer.py
+++ b/mmcv/runner/hooks/iter_timer.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import time
-
-from .hook import HOOKS, Hook
-
-
-@HOOKS.register_module()
-class IterTimerHook(Hook):
-
-    def before_epoch(self, runner):
-        self.t = time.time()
-
-    def before_iter(self, runner):
-        runner.log_buffer.update({'data_time': time.time() - self.t})
-
-    def after_iter(self, runner):
-        runner.log_buffer.update({'time': time.time() - self.t})
-        self.t = time.time()
--- a/mmcv/runner/hooks/logger/__init__.py
+++ b/mmcv/runner/hooks/logger/__init__.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from .base import LoggerHook
-from .clearml import ClearMLLoggerHook
-from .dvclive import DvcliveLoggerHook
-from .mlflow import MlflowLoggerHook
-from .neptune import NeptuneLoggerHook
-from .pavi import PaviLoggerHook
-from .segmind import SegmindLoggerHook
-from .tensorboard import TensorboardLoggerHook
-from .text import TextLoggerHook
-from .wandb import WandbLoggerHook
-
-__all__ = [
-    'LoggerHook', 'MlflowLoggerHook', 'PaviLoggerHook',
-    'TensorboardLoggerHook', 'TextLoggerHook', 'WandbLoggerHook',
-    'NeptuneLoggerHook', 'DvcliveLoggerHook', 'SegmindLoggerHook',
-    'ClearMLLoggerHook'
-]
--- a/mmcv/runner/hooks/logger/base.py
+++ b/mmcv/runner/hooks/logger/base.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import numbers
-from abc import ABCMeta, abstractmethod
-from typing import Dict
-
-import numpy as np
-import torch
-
-from ..hook import Hook
-
-
-class LoggerHook(Hook):
-    """Base class for logger hooks.
-
-    Args:
-        interval (int): Logging interval (every k iterations). Default 10.
-        ignore_last (bool): Ignore the log of last iterations in each epoch
-            if less than `interval`. Default True.
-        reset_flag (bool): Whether to clear the output buffer after logging.
-            Default False.
-        by_epoch (bool): Whether EpochBasedRunner is used. Default True.
-    """
-
-    __metaclass__ = ABCMeta
-
-    def __init__(self,
-                 interval: int = 10,
-                 ignore_last: bool = True,
-                 reset_flag: bool = False,
-                 by_epoch: bool = True):
-        self.interval = interval
-        self.ignore_last = ignore_last
-        self.reset_flag = reset_flag
-        self.by_epoch = by_epoch
-
-    @abstractmethod
-    def log(self, runner):
-        pass
-
-    @staticmethod
-    def is_scalar(val,
-                  include_np: bool = True,
-                  include_torch: bool = True) -> bool:
-        """Tell the input variable is a scalar or not.
-
-        Args:
-            val: Input variable.
-            include_np (bool): Whether include 0-d np.ndarray as a scalar.
-            include_torch (bool): Whether include 0-d torch.Tensor as a scalar.
-
-        Returns:
-            bool: True or False.
-        """
-        if isinstance(val, numbers.Number):
-            return True
-        elif include_np and isinstance(val, np.ndarray) and val.ndim == 0:
-            return True
-        elif include_torch and isinstance(val, torch.Tensor) and len(val) == 1:
-            return True
-        else:
-            return False
-
-    def get_mode(self, runner) -> str:
-        if runner.mode == 'train':
-            if 'time' in runner.log_buffer.output:
-                mode = 'train'
-            else:
-                mode = 'val'
-        elif runner.mode == 'val':
-            mode = 'val'
-        else:
-            raise ValueError(f"runner mode should be 'train' or 'val', "
-                             f'but got {runner.mode}')
-        return mode
-
-    def get_epoch(self, runner) -> int:
-        if runner.mode == 'train':
-            epoch = runner.epoch + 1
-        elif runner.mode == 'val':
-            # normal val mode
-            # runner.epoch += 1 has been done before val workflow
-            epoch = runner.epoch
-        else:
-            raise ValueError(f"runner mode should be 'train' or 'val', "
-                             f'but got {runner.mode}')
-        return epoch
-
-    def get_iter(self, runner, inner_iter: bool = False) -> int:
-        """Get the current training iteration step."""
-        if self.by_epoch and inner_iter:
-            current_iter = runner.inner_iter + 1
-        else:
-            current_iter = runner.iter + 1
-        return current_iter
-
-    def get_lr_tags(self, runner) -> Dict[str, float]:
-        tags = {}
-        lrs = runner.current_lr()
-        if isinstance(lrs, dict):
-            for name, value in lrs.items():
-                tags[f'learning_rate/{name}'] = value[0]
-        else:
-            tags['learning_rate'] = lrs[0]
-        return tags
-
-    def get_momentum_tags(self, runner) -> Dict[str, float]:
-        tags = {}
-        momentums = runner.current_momentum()
-        if isinstance(momentums, dict):
-            for name, value in momentums.items():
-                tags[f'momentum/{name}'] = value[0]
-        else:
-            tags['momentum'] = momentums[0]
-        return tags
-
-    def get_loggable_tags(
-        self,
-        runner,
-        allow_scalar: bool = True,
-        allow_text: bool = False,
-        add_mode: bool = True,
-        tags_to_skip: tuple = ('time', 'data_time')
-    ) -> Dict:
-        tags = {}
-        for var, val in runner.log_buffer.output.items():
-            if var in tags_to_skip:
-                continue
-            if self.is_scalar(val) and not allow_scalar:
-                continue
-            if isinstance(val, str) and not allow_text:
-                continue
-            if add_mode:
-                var = f'{self.get_mode(runner)}/{var}'
-            tags[var] = val
-        tags.update(self.get_lr_tags(runner))
-        tags.update(self.get_momentum_tags(runner))
-        return tags
-
-    def before_run(self, runner) -> None:
-        for hook in runner.hooks[::-1]:
-            if isinstance(hook, LoggerHook):
-                hook.reset_flag = True
-                break
-
-    def before_epoch(self, runner) -> None:
-        runner.log_buffer.clear()  # clear logs of last epoch
-
-    def after_train_iter(self, runner) -> None:
-        if self.by_epoch and self.every_n_inner_iters(runner, self.interval):
-            runner.log_buffer.average(self.interval)
-        elif not self.by_epoch and self.every_n_iters(runner, self.interval):
-            runner.log_buffer.average(self.interval)
-        elif self.end_of_epoch(runner) and not self.ignore_last:
-            # not precise but more stable
-            runner.log_buffer.average(self.interval)
-
-        if runner.log_buffer.ready:
-            self.log(runner)
-            if self.reset_flag:
-                runner.log_buffer.clear_output()
-
-    def after_train_epoch(self, runner) -> None:
-        if runner.log_buffer.ready:
-            self.log(runner)
-            if self.reset_flag:
-                runner.log_buffer.clear_output()
-
-    def after_val_epoch(self, runner) -> None:
-        runner.log_buffer.average()
-        self.log(runner)
-        if self.reset_flag:
-            runner.log_buffer.clear_output()
--- a/mmcv/runner/hooks/logger/clearml.py
+++ b/mmcv/runner/hooks/logger/clearml.py
-# Copyright (c) OpenMMLab. All rights reserved.
-
-from typing import Dict, Optional
-
-from ...dist_utils import master_only
-from ..hook import HOOKS
-from .base import LoggerHook
-
-
-@HOOKS.register_module()
-class ClearMLLoggerHook(LoggerHook):
-    """Class to log metrics with clearml.
-
-    It requires `clearml`_ to be installed.
-
-
-    Args:
-        init_kwargs (dict): A dict contains the `clearml.Task.init`
-            initialization keys. See `taskinit`_  for more details.
-        interval (int): Logging interval (every k iterations). Default 10.
-        ignore_last (bool): Ignore the log of last iterations in each epoch
-            if less than `interval`. Default: True.
-        reset_flag (bool): Whether to clear the output buffer after logging.
-            Default: False.
-        by_epoch (bool): Whether EpochBasedRunner is used. Default: True.
-
-    .. _clearml:
-        https://clear.ml/docs/latest/docs/
-    .. _taskinit:
-        https://clear.ml/docs/latest/docs/references/sdk/task/#taskinit
-    """
-
-    def __init__(self,
-                 init_kwargs: Optional[Dict] = None,
-                 interval: int = 10,
-                 ignore_last: bool = True,
-                 reset_flag: bool = False,
-                 by_epoch: bool = True):
-        super().__init__(interval, ignore_last, reset_flag, by_epoch)
-        self.import_clearml()
-        self.init_kwargs = init_kwargs
-
-    def import_clearml(self):
-        try:
-            import clearml
-        except ImportError:
-            raise ImportError(
-                'Please run "pip install clearml" to install clearml')
-        self.clearml = clearml
-
-    @master_only
-    def before_run(self, runner) -> None:
-        super().before_run(runner)
-        task_kwargs = self.init_kwargs if self.init_kwargs else {}
-        self.task = self.clearml.Task.init(**task_kwargs)
-        self.task_logger = self.task.get_logger()
-
-    @master_only
-    def log(self, runner) -> None:
-        tags = self.get_loggable_tags(runner)
-        for tag, val in tags.items():
-            self.task_logger.report_scalar(tag, tag, val,
-                                           self.get_iter(runner))
--- a/mmcv/runner/hooks/logger/dvclive.py
+++ b/mmcv/runner/hooks/logger/dvclive.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from pathlib import Path
-from typing import Optional
-
-from ...dist_utils import master_only
-from ..hook import HOOKS
-from .base import LoggerHook
-
-
-@HOOKS.register_module()
-class DvcliveLoggerHook(LoggerHook):
-    """Class to log metrics with dvclive.
-
-    It requires `dvclive`_ to be installed.
-
-    Args:
-        model_file (str): Default None. If not None, after each epoch the
-            model will be saved to {model_file}.
-        interval (int): Logging interval (every k iterations). Default 10.
-        ignore_last (bool): Ignore the log of last iterations in each epoch
-            if less than `interval`. Default: True.
-        reset_flag (bool): Whether to clear the output buffer after logging.
-            Default: False.
-        by_epoch (bool): Whether EpochBasedRunner is used. Default: True.
-        kwargs: Arguments for instantiating `Live`_.
-
-    .. _dvclive:
-        https://dvc.org/doc/dvclive
-
-    .. _Live:
-        https://dvc.org/doc/dvclive/api-reference/live#parameters
-    """
-
-    def __init__(self,
-                 model_file: Optional[str] = None,
-                 interval: int = 10,
-                 ignore_last: bool = True,
-                 reset_flag: bool = False,
-                 by_epoch: bool = True,
-                 **kwargs):
-        super().__init__(interval, ignore_last, reset_flag, by_epoch)
-        self.model_file = model_file
-        self.import_dvclive(**kwargs)
-
-    def import_dvclive(self, **kwargs) -> None:
-        try:
-            from dvclive import Live
-        except ImportError:
-            raise ImportError(
-                'Please run "pip install dvclive" to install dvclive')
-        self.dvclive = Live(**kwargs)
-
-    @master_only
-    def log(self, runner) -> None:
-        tags = self.get_loggable_tags(runner)
-        if tags:
-            self.dvclive.set_step(self.get_iter(runner))
-            for k, v in tags.items():
-                self.dvclive.log(k, v)
-
-    @master_only
-    def after_train_epoch(self, runner) -> None:
-        super().after_train_epoch(runner)
-        if self.model_file is not None:
-            runner.save_checkpoint(
-                Path(self.model_file).parent,
-                filename_tmpl=Path(self.model_file).name,
-                create_symlink=False,
-            )
--- a/mmcv/runner/hooks/logger/mlflow.py
+++ b/mmcv/runner/hooks/logger/mlflow.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Dict, Optional
-
-from mmcv.utils import TORCH_VERSION
-from ...dist_utils import master_only
-from ..hook import HOOKS
-from .base import LoggerHook
-
-
-@HOOKS.register_module()
-class MlflowLoggerHook(LoggerHook):
-    """Class to log metrics and (optionally) a trained model to MLflow.
-
-    It requires `MLflow`_ to be installed.
-
-    Args:
-        exp_name (str, optional): Name of the experiment to be used.
-            Default None. If not None, set the active experiment.
-            If experiment does not exist, an experiment with provided name
-            will be created.
-        tags (Dict[str], optional): Tags for the current run.
-            Default None. If not None, set tags for the current run.
-        log_model (bool, optional): Whether to log an MLflow artifact.
-            Default True. If True, log runner.model as an MLflow artifact
-            for the current run.
-        interval (int): Logging interval (every k iterations). Default: 10.
-        ignore_last (bool): Ignore the log of last iterations in each epoch
-            if less than `interval`. Default: True.
-        reset_flag (bool): Whether to clear the output buffer after logging.
-            Default: False.
-        by_epoch (bool): Whether EpochBasedRunner is used. Default: True.
-
-    .. _MLflow:
-        https://www.mlflow.org/docs/latest/index.html
-    """
-
-    def __init__(self,
-                 exp_name: Optional[str] = None,
-                 tags: Optional[Dict] = None,
-                 log_model: bool = True,
-                 interval: int = 10,
-                 ignore_last: bool = True,
-                 reset_flag: bool = False,
-                 by_epoch: bool = True):
-        super().__init__(interval, ignore_last, reset_flag, by_epoch)
-        self.import_mlflow()
-        self.exp_name = exp_name
-        self.tags = tags
-        self.log_model = log_model
-
-    def import_mlflow(self) -> None:
-        try:
-            import mlflow
-            import mlflow.pytorch as mlflow_pytorch
-        except ImportError:
-            raise ImportError(
-                'Please run "pip install mlflow" to install mlflow')
-        self.mlflow = mlflow
-        self.mlflow_pytorch = mlflow_pytorch
-
-    @master_only
-    def before_run(self, runner) -> None:
-        super().before_run(runner)
-        if self.exp_name is not None:
-            self.mlflow.set_experiment(self.exp_name)
-        if self.tags is not None:
-            self.mlflow.set_tags(self.tags)
-
-    @master_only
-    def log(self, runner) -> None:
-        tags = self.get_loggable_tags(runner)
-        if tags:
-            self.mlflow.log_metrics(tags, step=self.get_iter(runner))
-
-    @master_only
-    def after_run(self, runner) -> None:
-        if self.log_model:
-            self.mlflow_pytorch.log_model(
-                runner.model,
-                'models',
-                pip_requirements=[f'torch=={TORCH_VERSION}'])
--- a/mmcv/runner/hooks/logger/neptune.py
+++ b/mmcv/runner/hooks/logger/neptune.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Dict, Optional
-
-from ...dist_utils import master_only
-from ..hook import HOOKS
-from .base import LoggerHook
-
-
-@HOOKS.register_module()
-class NeptuneLoggerHook(LoggerHook):
-    """Class to log metrics to NeptuneAI.
-
-    It requires `Neptune`_ to be installed.
-
-    Args:
-        init_kwargs (dict): a dict contains the initialization keys as below:
-
-            - project (str): Name of a project in a form of
-              namespace/project_name. If None, the value of NEPTUNE_PROJECT
-              environment variable will be taken.
-            - api_token (str): User’s API token. If None, the value of
-              NEPTUNE_API_TOKEN environment variable will be taken. Note: It is
-              strongly recommended to use NEPTUNE_API_TOKEN environment
-              variable rather than placing your API token in plain text in your
-              source code.
-            - name (str, optional, default is 'Untitled'): Editable name of the
-              run. Name is displayed in the run's Details and in Runs table as
-              a column.
-
-            Check https://docs.neptune.ai/api-reference/neptune#init for more
-            init arguments.
-        interval (int): Logging interval (every k iterations). Default: 10.
-        ignore_last (bool): Ignore the log of last iterations in each epoch
-            if less than ``interval``. Default: True.
-        reset_flag (bool): Whether to clear the output buffer after logging.
-            Default: True.
-        with_step (bool): If True, the step will be logged from
-            ``self.get_iters``. Otherwise, step will not be logged.
-            Default: True.
-        by_epoch (bool): Whether EpochBasedRunner is used. Default: True.
-
-    .. _Neptune:
-        https://docs.neptune.ai
-    """
-
-    def __init__(self,
-                 init_kwargs: Optional[Dict] = None,
-                 interval: int = 10,
-                 ignore_last: bool = True,
-                 reset_flag: bool = True,
-                 with_step: bool = True,
-                 by_epoch: bool = True):
-
-        super().__init__(interval, ignore_last, reset_flag, by_epoch)
-        self.import_neptune()
-        self.init_kwargs = init_kwargs
-        self.with_step = with_step
-
-    def import_neptune(self) -> None:
-        try:
-            import neptune.new as neptune
-        except ImportError:
-            raise ImportError(
-                'Please run "pip install neptune-client" to install neptune')
-        self.neptune = neptune
-        self.run = None
-
-    @master_only
-    def before_run(self, runner) -> None:
-        if self.init_kwargs:
-            self.run = self.neptune.init(**self.init_kwargs)
-        else:
-            self.run = self.neptune.init()
-
-    @master_only
-    def log(self, runner) -> None:
-        tags = self.get_loggable_tags(runner)
-        if tags:
-            for tag_name, tag_value in tags.items():
-                if self.with_step:
-                    self.run[tag_name].log(  # type: ignore
-                        tag_value, step=self.get_iter(runner))
-                else:
-                    tags['global_step'] = self.get_iter(runner)
-                    self.run[tag_name].log(tags)  # type: ignore
-
-    @master_only
-    def after_run(self, runner) -> None:
-        self.run.stop()  # type: ignore
--- a/mmcv/runner/hooks/logger/pavi.py
+++ b/mmcv/runner/hooks/logger/pavi.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import json
-import os
-import os.path as osp
-from typing import Dict, Optional
-
-import mmengine
-import torch
-import yaml
-
-import mmcv
-from ....parallel.utils import is_module_wrapper
-from ...dist_utils import master_only
-from ..hook import HOOKS
-from .base import LoggerHook
-
-
-@HOOKS.register_module()
-class PaviLoggerHook(LoggerHook):
-    """Class to visual model, log metrics (for internal use).
-
-    Args:
-        init_kwargs (dict): A dict contains the initialization keys as below:
-
-            - name (str, optional): Custom training name. Defaults to None,
-              which means current work_dir.
-            - project (str, optional): Project name. Defaults to "default".
-            - model (str, optional): Training model name. Defaults to current
-              model.
-            - session_text (str, optional): Session string in YAML format.
-              Defaults to current config.
-            - training_id (int, optional): Training ID in PAVI, if you want to
-              use an existing training. Defaults to None.
-            - compare_id (int, optional): Compare ID in PAVI, if you want to
-              add the task to an existing compare. Defaults to None.
-            - overwrite_last_training (bool, optional): Whether to upload data
-              to the training with the same name in the same project, rather
-              than creating a new one. Defaults to False.
-        add_graph (bool): Whether to visual model. Default: False.
-        add_last_ckpt (bool): Whether to save checkpoint after run.
-            Default: False.
-        interval (int): Logging interval (every k iterations). Default: True.
-        ignore_last (bool): Ignore the log of last iterations in each epoch
-            if less than `interval`. Default: True.
-        reset_flag (bool): Whether to clear the output buffer after logging.
-            Default: False.
-        by_epoch (bool): Whether EpochBasedRunner is used. Default: True.
-        img_key (string): Get image data from Dataset. Default: 'img_info'.
-    """
-
-    def __init__(self,
-                 init_kwargs: Optional[Dict] = None,
-                 add_graph: bool = False,
-                 add_last_ckpt: bool = False,
-                 interval: int = 10,
-                 ignore_last: bool = True,
-                 reset_flag: bool = False,
-                 by_epoch: bool = True,
-                 img_key: str = 'img_info'):
-        super().__init__(interval, ignore_last, reset_flag, by_epoch)
-        self.init_kwargs = init_kwargs
-        self.add_graph = add_graph
-        self.add_last_ckpt = add_last_ckpt
-        self.img_key = img_key
-
-    @master_only
-    def before_run(self, runner) -> None:
-        super().before_run(runner)
-        try:
-            from pavi import SummaryWriter
-        except ImportError:
-            raise ImportError(
-                'No module named pavi, please contact pavi team or visit'
-                'document for pavi installation instructions.')
-
-        self.run_name = runner.work_dir.split('/')[-1]
-
-        if not self.init_kwargs:
-            self.init_kwargs = dict()
-        self.init_kwargs.setdefault('name', self.run_name)
-        self.init_kwargs.setdefault('model', runner._model_name)
-        if runner.meta is not None:
-            if 'config_dict' in runner.meta:
-                config_dict = runner.meta['config_dict']
-                assert isinstance(
-                    config_dict,
-                    dict), ('meta["config_dict"] has to be of a dict, '
-                            f'but got {type(config_dict)}')
-            elif 'config_file' in runner.meta:
-                config_file = runner.meta['config_file']
-                config_dict = dict(mmcv.Config.fromfile(config_file))
-            else:
-                config_dict = None
-            if config_dict is not None:
-                # 'max_.*iter' is parsed in pavi sdk as the maximum iterations
-                #  to properly set up the progress bar.
-                config_dict = config_dict.copy()
-                config_dict.setdefault('max_iter', runner.max_iters)
-                # non-serializable values are first converted in
-                # mmengine.dump to json
-                config_dict = json.loads(
-                    mmengine.dump(config_dict, file_format='json'))
-                session_text = yaml.dump(config_dict)
-                self.init_kwargs.setdefault('session_text', session_text)
-        self.writer = SummaryWriter(**self.init_kwargs)
-
-    def get_step(self, runner) -> int:
-        """Get the total training step/epoch."""
-        if self.get_mode(runner) == 'val' and self.by_epoch:
-            return self.get_epoch(runner)
-        else:
-            return self.get_iter(runner)
-
-    @master_only
-    def log(self, runner) -> None:
-        tags = self.get_loggable_tags(runner, add_mode=False)
-        if tags:
-            self.writer.add_scalars(
-                self.get_mode(runner), tags, self.get_step(runner))
-
-    @master_only
-    def after_run(self, runner) -> None:
-        if self.add_last_ckpt:
-            ckpt_path = osp.join(runner.work_dir, 'latest.pth')
-            if osp.islink(ckpt_path):
-                ckpt_path = osp.join(runner.work_dir, os.readlink(ckpt_path))
-
-            if osp.isfile(ckpt_path):
-                # runner.epoch += 1 has been done before `after_run`.
-                iteration = runner.epoch if self.by_epoch else runner.iter
-                return self.writer.add_snapshot_file(
-                    tag=self.run_name,
-                    snapshot_file_path=ckpt_path,
-                    iteration=iteration)
-
-        # flush the buffer and send a task ending signal to Pavi
-        self.writer.close()
-
-    @master_only
-    def before_epoch(self, runner) -> None:
-        if runner.epoch == 0 and self.add_graph:
-            if is_module_wrapper(runner.model):
-                _model = runner.model.module
-            else:
-                _model = runner.model
-            device = next(_model.parameters()).device
-            data = next(iter(runner.data_loader))
-            image = data[self.img_key][0:1].to(device)
-            with torch.no_grad():
-                self.writer.add_graph(_model, image)
--- a/mmcv/runner/hooks/logger/segmind.py
+++ b/mmcv/runner/hooks/logger/segmind.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from ...dist_utils import master_only
-from ..hook import HOOKS
-from .base import LoggerHook
-
-
-@HOOKS.register_module()
-class SegmindLoggerHook(LoggerHook):
-    """Class to log metrics to Segmind.
-
-    It requires `Segmind`_ to be installed.
-
-    Args:
-        interval (int): Logging interval (every k iterations). Default: 10.
-        ignore_last (bool): Ignore the log of last iterations in each epoch
-            if less than `interval`. Default True.
-        reset_flag (bool): Whether to clear the output buffer after logging.
-            Default False.
-        by_epoch (bool): Whether EpochBasedRunner is used. Default True.
-
-    .. _Segmind:
-        https://docs.segmind.com/python-library
-    """
-
-    def __init__(self,
-                 interval: int = 10,
-                 ignore_last: bool = True,
-                 reset_flag: bool = False,
-                 by_epoch=True):
-        super().__init__(interval, ignore_last, reset_flag, by_epoch)
-        self.import_segmind()
-
-    def import_segmind(self) -> None:
-        try:
-            import segmind
-        except ImportError:
-            raise ImportError(
-                "Please run 'pip install segmind' to install segmind")
-        self.log_metrics = segmind.tracking.fluent.log_metrics
-        self.mlflow_log = segmind.utils.logging_utils.try_mlflow_log
-
-    @master_only
-    def log(self, runner) -> None:
-        tags = self.get_loggable_tags(runner)
-        if tags:
-            # logging metrics to segmind
-            self.mlflow_log(
-                self.log_metrics, tags, step=runner.epoch, epoch=runner.epoch)
--- a/mmcv/runner/hooks/logger/tensorboard.py
+++ b/mmcv/runner/hooks/logger/tensorboard.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import os.path as osp
-from typing import Optional
-
-from mmcv.utils import TORCH_VERSION, digit_version
-from ...dist_utils import master_only
-from ..hook import HOOKS
-from .base import LoggerHook
-
-
-@HOOKS.register_module()
-class TensorboardLoggerHook(LoggerHook):
-    """Class to log metrics to Tensorboard.
-
-    Args:
-        log_dir (string): Save directory location. Default: None. If default
-            values are used, directory location is ``runner.work_dir``/tf_logs.
-        interval (int): Logging interval (every k iterations). Default: True.
-        ignore_last (bool): Ignore the log of last iterations in each epoch
-            if less than `interval`. Default: True.
-        reset_flag (bool): Whether to clear the output buffer after logging.
-            Default: False.
-        by_epoch (bool): Whether EpochBasedRunner is used. Default: True.
-    """
-
-    def __init__(self,
-                 log_dir: Optional[str] = None,
-                 interval: int = 10,
-                 ignore_last: bool = True,
-                 reset_flag: bool = False,
-                 by_epoch: bool = True):
-        super().__init__(interval, ignore_last, reset_flag, by_epoch)
-        self.log_dir = log_dir
-
-    @master_only
-    def before_run(self, runner) -> None:
-        super().before_run(runner)
-        if (TORCH_VERSION == 'parrots'
-                or digit_version(TORCH_VERSION) < digit_version('1.1')):
-            try:
-                from tensorboardX import SummaryWriter
-            except ImportError:
-                raise ImportError('Please install tensorboardX to use '
-                                  'TensorboardLoggerHook.')
-        else:
-            try:
-                from torch.utils.tensorboard import SummaryWriter
-            except ImportError:
-                raise ImportError(
-                    'Please run "pip install future tensorboard" to install '
-                    'the dependencies to use torch.utils.tensorboard '
-                    '(applicable to PyTorch 1.1 or higher)')
-
-        if self.log_dir is None:
-            self.log_dir = osp.join(runner.work_dir, 'tf_logs')
-        self.writer = SummaryWriter(self.log_dir)
-
-    @master_only
-    def log(self, runner) -> None:
-        tags = self.get_loggable_tags(runner, allow_text=True)
-        for tag, val in tags.items():
-            if isinstance(val, str):
-                self.writer.add_text(tag, val, self.get_iter(runner))
-            else:
-                self.writer.add_scalar(tag, val, self.get_iter(runner))
-
-    @master_only
-    def after_run(self, runner) -> None:
-        self.writer.close()
--- a/mmcv/runner/hooks/logger/text.py
+++ b/mmcv/runner/hooks/logger/text.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import datetime
-import os
-import os.path as osp
-from collections import OrderedDict
-from typing import Dict, Optional, Union
-
-import mmengine
-import torch
-import torch.distributed as dist
-from mmengine.fileio.file_client import FileClient
-
-from mmcv.utils import is_tuple_of, scandir
-from ..hook import HOOKS
-from .base import LoggerHook
-
-
-@HOOKS.register_module()
-class TextLoggerHook(LoggerHook):
-    """Logger hook in text.
-
-    In this logger hook, the information will be printed on terminal and
-    saved in json file.
-
-    Args:
-        by_epoch (bool, optional): Whether EpochBasedRunner is used.
-            Default: True.
-        interval (int, optional): Logging interval (every k iterations).
-            Default: 10.
-        ignore_last (bool, optional): Ignore the log of last iterations in each
-            epoch if less than :attr:`interval`. Default: True.
-        reset_flag (bool, optional): Whether to clear the output buffer after
-            logging. Default: False.
-        interval_exp_name (int, optional): Logging interval for experiment
-            name. This feature is to help users conveniently get the experiment
-            information from screen or log file. Default: 1000.
-        out_dir (str, optional): Logs are saved in ``runner.work_dir`` default.
-            If ``out_dir`` is specified, logs will be copied to a new directory
-            which is the concatenation of ``out_dir`` and the last level
-            directory of ``runner.work_dir``. Default: None.
-            `New in version 1.3.16.`
-        out_suffix (str or tuple[str], optional): Those filenames ending with
-            ``out_suffix`` will be copied to ``out_dir``.
-            Default: ('.log.json', '.log', '.py').
-            `New in version 1.3.16.`
-        keep_local (bool, optional): Whether to keep local log when
-            :attr:`out_dir` is specified. If False, the local log will be
-            removed. Default: True.
-            `New in version 1.3.16.`
-        file_client_args (dict, optional): Arguments to instantiate a
-            FileClient. See :class:`mmengine.fileio.FileClient` for details.
-            Default: None.
-            `New in version 1.3.16.`
-    """
-
-    def __init__(self,
-                 by_epoch: bool = True,
-                 interval: int = 10,
-                 ignore_last: bool = True,
-                 reset_flag: bool = False,
-                 interval_exp_name: int = 1000,
-                 out_dir: Optional[str] = None,
-                 out_suffix: Union[str, tuple] = ('.log.json', '.log', '.py'),
-                 keep_local: bool = True,
-                 file_client_args: Optional[Dict] = None):
-        super().__init__(interval, ignore_last, reset_flag, by_epoch)
-        self.by_epoch = by_epoch
-        self.time_sec_tot = 0
-        self.interval_exp_name = interval_exp_name
-
-        if out_dir is None and file_client_args is not None:
-            raise ValueError(
-                'file_client_args should be "None" when `out_dir` is not'
-                'specified.')
-        self.out_dir = out_dir
-
-        if not (out_dir is None or isinstance(out_dir, str)
-                or is_tuple_of(out_dir, str)):
-            raise TypeError('out_dir should be  "None" or string or tuple of '
-                            'string, but got {out_dir}')
-        self.out_suffix = out_suffix
-
-        self.keep_local = keep_local
-        self.file_client_args = file_client_args
-        if self.out_dir is not None:
-            self.file_client = FileClient.infer_client(file_client_args,
-                                                       self.out_dir)
-
-    def before_run(self, runner) -> None:
-        super().before_run(runner)
-
-        if self.out_dir is not None:
-            self.file_client = FileClient.infer_client(self.file_client_args,
-                                                       self.out_dir)
-            # The final `self.out_dir` is the concatenation of `self.out_dir`
-            # and the last level directory of `runner.work_dir`
-            basename = osp.basename(runner.work_dir.rstrip(osp.sep))
-            self.out_dir = self.file_client.join_path(self.out_dir, basename)
-            runner.logger.info(
-                f'Text logs will be saved to {self.out_dir} by '
-                f'{self.file_client.name} after the training process.')
-
-        self.start_iter = runner.iter
-        self.json_log_path = osp.join(runner.work_dir,
-                                      f'{runner.timestamp}.log.json')
-        if runner.meta is not None:
-            self._dump_log(runner.meta, runner)
-
-    def _get_max_memory(self, runner) -> int:
-        device = getattr(runner.model, 'output_device', None)
-        mem = torch.cuda.max_memory_allocated(device=device)
-        mem_mb = torch.tensor([int(mem) // (1024 * 1024)],
-                              dtype=torch.int,
-                              device=device)
-        if runner.world_size > 1:
-            dist.reduce(mem_mb, 0, op=dist.ReduceOp.MAX)
-        return mem_mb.item()
-
-    def _log_info(self, log_dict: Dict, runner) -> None:
-        # print exp name for users to distinguish experiments
-        # at every ``interval_exp_name`` iterations and the end of each epoch
-        if runner.meta is not None and 'exp_name' in runner.meta:
-            if (self.every_n_iters(runner, self.interval_exp_name)) or (
-                    self.by_epoch and self.end_of_epoch(runner)):
-                exp_info = f'Exp name: {runner.meta["exp_name"]}'
-                runner.logger.info(exp_info)
-
-        if log_dict['mode'] == 'train':
-            if isinstance(log_dict['lr'], dict):
-                lr_str = []
-                for k, val in log_dict['lr'].items():
-                    lr_str.append(f'lr_{k}: {val:.3e}')
-                lr_str = ' '.join(lr_str)  # type: ignore
-            else:
-                lr_str = f'lr: {log_dict["lr"]:.3e}'  # type: ignore
-
-            # by epoch: Epoch [4][100/1000]
-            # by iter:  Iter [100/100000]
-            if self.by_epoch:
-                log_str = f'Epoch [{log_dict["epoch"]}]' \
-                          f'[{log_dict["iter"]}/{len(runner.data_loader)}]\t'
-            else:
-                log_str = f'Iter [{log_dict["iter"]}/{runner.max_iters}]\t'
-            log_str += f'{lr_str}, '
-
-            if 'time' in log_dict.keys():
-                self.time_sec_tot += (log_dict['time'] * self.interval)
-                time_sec_avg = self.time_sec_tot / (
-                    runner.iter - self.start_iter + 1)
-                eta_sec = time_sec_avg * (runner.max_iters - runner.iter - 1)
-                eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
-                log_str += f'eta: {eta_str}, '
-                log_str += f'time: {log_dict["time"]:.3f}, ' \
-                           f'data_time: {log_dict["data_time"]:.3f}, '
-                # statistic memory
-                if torch.cuda.is_available():
-                    log_str += f'memory: {log_dict["memory"]}, '
-        else:
-            # val/test time
-            # here 1000 is the length of the val dataloader
-            # by epoch: Epoch[val] [4][1000]
-            # by iter: Iter[val] [1000]
-            if self.by_epoch:
-                log_str = f'Epoch({log_dict["mode"]}) ' \
-                    f'[{log_dict["epoch"]}][{log_dict["iter"]}]\t'
-            else:
-                log_str = f'Iter({log_dict["mode"]}) [{log_dict["iter"]}]\t'
-
-        log_items = []
-        for name, val in log_dict.items():
-            # TODO: resolve this hack
-            # these items have been in log_str
-            if name in [
-                    'mode', 'Epoch', 'iter', 'lr', 'time', 'data_time',
-                    'memory', 'epoch'
-            ]:
-                continue
-            if isinstance(val, float):
-                val = f'{val:.4f}'
-            log_items.append(f'{name}: {val}')
-        log_str += ', '.join(log_items)
-
-        runner.logger.info(log_str)
-
-    def _dump_log(self, log_dict: Dict, runner) -> None:
-        # dump log in json format
-        json_log = OrderedDict()
-        for k, v in log_dict.items():
-            json_log[k] = self._round_float(v)
-        # only append log at last line
-        if runner.rank == 0:
-            with open(self.json_log_path, 'a+') as f:
-                mmengine.dump(json_log, f, file_format='json')
-                f.write('\n')
-
-    def _round_float(self, items):
-        if isinstance(items, list):
-            return [self._round_float(item) for item in items]
-        elif isinstance(items, float):
-            return round(items, 5)
-        else:
-            return items
-
-    def log(self, runner) -> OrderedDict:
-        if 'eval_iter_num' in runner.log_buffer.output:
-            # this doesn't modify runner.iter and is regardless of by_epoch
-            cur_iter = runner.log_buffer.output.pop('eval_iter_num')
-        else:
-            cur_iter = self.get_iter(runner, inner_iter=True)
-
-        log_dict = OrderedDict(
-            mode=self.get_mode(runner),
-            epoch=self.get_epoch(runner),
-            iter=cur_iter)
-
-        # only record lr of the first param group
-        cur_lr = runner.current_lr()
-        if isinstance(cur_lr, list):
-            log_dict['lr'] = cur_lr[0]
-        else:
-            assert isinstance(cur_lr, dict)
-            log_dict['lr'] = {}
-            for k, lr_ in cur_lr.items():
-                assert isinstance(lr_, list)
-                log_dict['lr'].update({k: lr_[0]})
-
-        if 'time' in runner.log_buffer.output:
-            # statistic memory
-            if torch.cuda.is_available():
-                log_dict['memory'] = self._get_max_memory(runner)
-
-        log_dict = dict(log_dict, **runner.log_buffer.output)  # type: ignore
-
-        self._log_info(log_dict, runner)
-        self._dump_log(log_dict, runner)
-        return log_dict
-
-    def after_run(self, runner) -> None:
-        # copy or upload logs to self.out_dir
-        if self.out_dir is not None:
-            for filename in scandir(runner.work_dir, self.out_suffix, True):
-                local_filepath = osp.join(runner.work_dir, filename)
-                out_filepath = self.file_client.join_path(
-                    self.out_dir, filename)
-                with open(local_filepath) as f:
-                    self.file_client.put_text(f.read(), out_filepath)
-
-                runner.logger.info(
-                    f'The file {local_filepath} has been uploaded to '
-                    f'{out_filepath}.')
-
-                if not self.keep_local:
-                    os.remove(local_filepath)
-                    runner.logger.info(
-                        f'{local_filepath} was removed due to the '
-                        '`self.keep_local=False`')
--- a/mmcv/runner/hooks/logger/wandb.py
+++ b/mmcv/runner/hooks/logger/wandb.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import os.path as osp
-from typing import Dict, Optional, Union
-
-from mmcv.utils import scandir
-from ...dist_utils import master_only
-from ..hook import HOOKS
-from .base import LoggerHook
-
-
-@HOOKS.register_module()
-class WandbLoggerHook(LoggerHook):
-    """Class to log metrics with wandb.
-
-    It requires `wandb`_ to be installed.
-
-
-    Args:
-        init_kwargs (dict): A dict contains the initialization keys. Check
-            https://docs.wandb.ai/ref/python/init for more init arguments.
-        interval (int): Logging interval (every k iterations).
-            Default 10.
-        ignore_last (bool): Ignore the log of last iterations in each epoch
-            if less than `interval`.
-            Default: True.
-        reset_flag (bool): Whether to clear the output buffer after logging.
-            Default: False.
-        commit (bool): Save the metrics dict to the wandb server and increment
-            the step. If false ``wandb.log`` just updates the current metrics
-            dict with the row argument and metrics won't be saved until
-            ``wandb.log`` is called with ``commit=True``.
-            Default: True.
-        by_epoch (bool): Whether EpochBasedRunner is used.
-            Default: True.
-        with_step (bool): If True, the step will be logged from
-            ``self.get_iters``. Otherwise, step will not be logged.
-            Default: True.
-        log_artifact (bool): If True, artifacts in {work_dir} will be uploaded
-            to wandb after training ends.
-            Default: True
-            `New in version 1.4.3.`
-        out_suffix (str or tuple[str], optional): Those filenames ending with
-            ``out_suffix`` will be uploaded to wandb.
-            Default: ('.log.json', '.log', '.py').
-            `New in version 1.4.3.`
-
-    .. _wandb:
-        https://docs.wandb.ai
-    """
-
-    def __init__(self,
-                 init_kwargs: Optional[Dict] = None,
-                 interval: int = 10,
-                 ignore_last: bool = True,
-                 reset_flag: bool = False,
-                 commit: bool = True,
-                 by_epoch: bool = True,
-                 with_step: bool = True,
-                 log_artifact: bool = True,
-                 out_suffix: Union[str, tuple] = ('.log.json', '.log', '.py')):
-        super().__init__(interval, ignore_last, reset_flag, by_epoch)
-        self.import_wandb()
-        self.init_kwargs = init_kwargs
-        self.commit = commit
-        self.with_step = with_step
-        self.log_artifact = log_artifact
-        self.out_suffix = out_suffix
-
-    def import_wandb(self) -> None:
-        try:
-            import wandb
-        except ImportError:
-            raise ImportError(
-                'Please run "pip install wandb" to install wandb')
-        self.wandb = wandb
-
-    @master_only
-    def before_run(self, runner) -> None:
-        super().before_run(runner)
-        if self.wandb is None:
-            self.import_wandb()
-        if self.init_kwargs:
-            self.wandb.init(**self.init_kwargs)  # type: ignore
-        else:
-            self.wandb.init()  # type: ignore
-
-    @master_only
-    def log(self, runner) -> None:
-        tags = self.get_loggable_tags(runner)
-        if tags:
-            if self.with_step:
-                self.wandb.log(
-                    tags, step=self.get_iter(runner), commit=self.commit)
-            else:
-                tags['global_step'] = self.get_iter(runner)
-                self.wandb.log(tags, commit=self.commit)
-
-    @master_only
-    def after_run(self, runner) -> None:
-        if self.log_artifact:
-            wandb_artifact = self.wandb.Artifact(
-                name='artifacts', type='model')
-            for filename in scandir(runner.work_dir, self.out_suffix, True):
-                local_filepath = osp.join(runner.work_dir, filename)
-                wandb_artifact.add_file(local_filepath)
-            self.wandb.log_artifact(wandb_artifact)
-        self.wandb.join()
--- a/mmcv/runner/hooks/lr_updater.py
+++ b/mmcv/runner/hooks/lr_updater.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import numbers
-from math import cos, pi
-from typing import Callable, List, Optional, Union
-
-import mmcv
-from mmcv import runner
-from .hook import HOOKS, Hook
-
-
-class LrUpdaterHook(Hook):
-    """LR Scheduler in MMCV.
-
-    Args:
-        by_epoch (bool): LR changes epoch by epoch
-        warmup (string): Type of warmup used. It can be None(use no warmup),
-            'constant', 'linear' or 'exp'
-        warmup_iters (int): The number of iterations or epochs that warmup
-            lasts
-        warmup_ratio (float): LR used at the beginning of warmup equals to
-            warmup_ratio * initial_lr
-        warmup_by_epoch (bool): When warmup_by_epoch == True, warmup_iters
-            means the number of epochs that warmup lasts, otherwise means the
-            number of iteration that warmup lasts
-    """
-
-    def __init__(self,
-                 by_epoch: bool = True,
-                 warmup: Optional[str] = None,
-                 warmup_iters: int = 0,
-                 warmup_ratio: float = 0.1,
-                 warmup_by_epoch: bool = False) -> None:
-        # validate the "warmup" argument
-        if warmup is not None:
-            if warmup not in ['constant', 'linear', 'exp']:
-                raise ValueError(
-                    f'"{warmup}" is not a supported type for warming up, valid'
-                    ' types are "constant", "linear" and "exp"')
-        if warmup is not None:
-            assert warmup_iters > 0, \
-                '"warmup_iters" must be a positive integer'
-            assert 0 < warmup_ratio <= 1.0, \
-                '"warmup_ratio" must be in range (0,1]'
-
-        self.by_epoch = by_epoch
-        self.warmup = warmup
-        self.warmup_iters: Optional[int] = warmup_iters
-        self.warmup_ratio = warmup_ratio
-        self.warmup_by_epoch = warmup_by_epoch
-
-        if self.warmup_by_epoch:
-            self.warmup_epochs: Optional[int] = self.warmup_iters
-            self.warmup_iters = None
-        else:
-            self.warmup_epochs = None
-
-        self.base_lr: Union[list, dict] = []  # initial lr for all param groups
-        self.regular_lr: list = []  # expected lr if no warming up is performed
-
-    def _set_lr(self, runner, lr_groups):
-        if isinstance(runner.optimizer, dict):
-            for k, optim in runner.optimizer.items():
-                for param_group, lr in zip(optim.param_groups, lr_groups[k]):
-                    param_group['lr'] = lr
-        else:
-            for param_group, lr in zip(runner.optimizer.param_groups,
-                                       lr_groups):
-                param_group['lr'] = lr
-
-    def get_lr(self, runner: 'runner.BaseRunner', base_lr: float):
-        raise NotImplementedError
-
-    def get_regular_lr(self, runner: 'runner.BaseRunner'):
-        if isinstance(runner.optimizer, dict):
-            lr_groups = {}
-            for k in runner.optimizer.keys():
-                _lr_group = [
-                    self.get_lr(runner, _base_lr)
-                    for _base_lr in self.base_lr[k]
-                ]
-                lr_groups.update({k: _lr_group})
-
-            return lr_groups
-        else:
-            return [self.get_lr(runner, _base_lr) for _base_lr in self.base_lr]
-
-    def get_warmup_lr(self, cur_iters: int):
-
-        def _get_warmup_lr(cur_iters, regular_lr):
-            if self.warmup == 'constant':
-                warmup_lr = [_lr * self.warmup_ratio for _lr in regular_lr]
-            elif self.warmup == 'linear':
-                k = (1 - cur_iters / self.warmup_iters) * (1 -
-                                                           self.warmup_ratio)
-                warmup_lr = [_lr * (1 - k) for _lr in regular_lr]
-            elif self.warmup == 'exp':
-                k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters)
-                warmup_lr = [_lr * k for _lr in regular_lr]
-            return warmup_lr
-
-        if isinstance(self.regular_lr, dict):
-            lr_groups = {}
-            for key, regular_lr in self.regular_lr.items():
-                lr_groups[key] = _get_warmup_lr(cur_iters, regular_lr)
-            return lr_groups
-        else:
-            return _get_warmup_lr(cur_iters, self.regular_lr)
-
-    def before_run(self, runner: 'runner.BaseRunner'):
-        # NOTE: when resuming from a checkpoint, if 'initial_lr' is not saved,
-        # it will be set according to the optimizer params
-        if isinstance(runner.optimizer, dict):
-            self.base_lr = {}
-            for k, optim in runner.optimizer.items():
-                for group in optim.param_groups:
-                    group.setdefault('initial_lr', group['lr'])
-                _base_lr = [
-                    group['initial_lr'] for group in optim.param_groups
-                ]
-                self.base_lr.update({k: _base_lr})
-        else:
-            for group in runner.optimizer.param_groups:  # type: ignore
-                group.setdefault('initial_lr', group['lr'])
-            self.base_lr = [
-                group['initial_lr']
-                for group in runner.optimizer.param_groups  # type: ignore
-            ]
-
-    def before_train_epoch(self, runner: 'runner.BaseRunner'):
-        if self.warmup_iters is None:
-            epoch_len = len(runner.data_loader)  # type: ignore
-            self.warmup_iters = self.warmup_epochs * epoch_len  # type: ignore
-
-        if not self.by_epoch:
-            return
-
-        self.regular_lr = self.get_regular_lr(runner)
-        self._set_lr(runner, self.regular_lr)
-
-    def before_train_iter(self, runner: 'runner.BaseRunner'):
-        cur_iter = runner.iter
-        assert isinstance(self.warmup_iters, int)
-        if not self.by_epoch:
-            self.regular_lr = self.get_regular_lr(runner)
-            if self.warmup is None or cur_iter >= self.warmup_iters:
-                self._set_lr(runner, self.regular_lr)
-            else:
-                warmup_lr = self.get_warmup_lr(cur_iter)
-                self._set_lr(runner, warmup_lr)
-        elif self.by_epoch:
-            if self.warmup is None or cur_iter > self.warmup_iters:
-                return
-            elif cur_iter == self.warmup_iters:
-                self._set_lr(runner, self.regular_lr)
-            else:
-                warmup_lr = self.get_warmup_lr(cur_iter)
-                self._set_lr(runner, warmup_lr)
-
-
-@HOOKS.register_module()
-class FixedLrUpdaterHook(LrUpdaterHook):
-
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-
-    def get_lr(self, runner, base_lr):
-        return base_lr
-
-
-@HOOKS.register_module()
-class StepLrUpdaterHook(LrUpdaterHook):
-    """Step LR scheduler with min_lr clipping.
-
-    Args:
-        step (int | list[int]): Step to decay the LR. If an int value is given,
-            regard it as the decay interval. If a list is given, decay LR at
-            these steps.
-        gamma (float): Decay LR ratio. Defaults to 0.1.
-        min_lr (float, optional): Minimum LR value to keep. If LR after decay
-            is lower than `min_lr`, it will be clipped to this value. If None
-            is given, we don't perform lr clipping. Default: None.
-    """
-
-    def __init__(self,
-                 step: Union[int, List[int]],
-                 gamma: float = 0.1,
-                 min_lr: Optional[float] = None,
-                 **kwargs) -> None:
-        if isinstance(step, list):
-            assert mmcv.is_list_of(step, int)
-            assert all([s > 0 for s in step])
-        elif isinstance(step, int):
-            assert step > 0
-        else:
-            raise TypeError('"step" must be a list or integer')
-        self.step = step
-        self.gamma = gamma
-        self.min_lr = min_lr
-        super().__init__(**kwargs)
-
-    def get_lr(self, runner: 'runner.BaseRunner', base_lr: float):
-        progress = runner.epoch if self.by_epoch else runner.iter
-
-        # calculate exponential term
-        if isinstance(self.step, int):
-            exp = progress // self.step
-        else:
-            exp = len(self.step)
-            for i, s in enumerate(self.step):
-                if progress < s:
-                    exp = i
-                    break
-
-        lr = base_lr * (self.gamma**exp)
-        if self.min_lr is not None:
-            # clip to a minimum value
-            lr = max(lr, self.min_lr)
-        return lr
-
-
-@HOOKS.register_module()
-class ExpLrUpdaterHook(LrUpdaterHook):
-
-    def __init__(self, gamma: float, **kwargs) -> None:
-        self.gamma = gamma
-        super().__init__(**kwargs)
-
-    def get_lr(self, runner: 'runner.BaseRunner', base_lr: float):
-        progress = runner.epoch if self.by_epoch else runner.iter
-        return base_lr * self.gamma**progress
-
-
-@HOOKS.register_module()
-class PolyLrUpdaterHook(LrUpdaterHook):
-
-    def __init__(self,
-                 power: float = 1.,
-                 min_lr: float = 0.,
-                 **kwargs) -> None:
-        self.power = power
-        self.min_lr = min_lr
-        super().__init__(**kwargs)
-
-    def get_lr(self, runner: 'runner.BaseRunner', base_lr: float):
-        if self.by_epoch:
-            progress = runner.epoch
-            max_progress = runner.max_epochs
-        else:
-            progress = runner.iter
-            max_progress = runner.max_iters
-        coeff = (1 - progress / max_progress)**self.power
-        return (base_lr - self.min_lr) * coeff + self.min_lr
-
-
-@HOOKS.register_module()
-class InvLrUpdaterHook(LrUpdaterHook):
-
-    def __init__(self, gamma: float, power: float = 1., **kwargs) -> None:
-        self.gamma = gamma
-        self.power = power
-        super().__init__(**kwargs)
-
-    def get_lr(self, runner: 'runner.BaseRunner', base_lr: float):
-        progress = runner.epoch if self.by_epoch else runner.iter
-        return base_lr * (1 + self.gamma * progress)**(-self.power)
-
-
-@HOOKS.register_module()
-class CosineAnnealingLrUpdaterHook(LrUpdaterHook):
-    """CosineAnnealing LR scheduler.
-
-    Args:
-        min_lr (float, optional): The minimum lr. Default: None.
-        min_lr_ratio (float, optional): The ratio of minimum lr to the base lr.
-            Either `min_lr` or `min_lr_ratio` should be specified.
-            Default: None.
-    """
-
-    def __init__(self,
-                 min_lr: Optional[float] = None,
-                 min_lr_ratio: Optional[float] = None,
-                 **kwargs) -> None:
-        assert (min_lr is None) ^ (min_lr_ratio is None)
-        self.min_lr = min_lr
-        self.min_lr_ratio = min_lr_ratio
-        super().__init__(**kwargs)
-
-    def get_lr(self, runner: 'runner.BaseRunner', base_lr: float):
-        if self.by_epoch:
-            progress = runner.epoch
-            max_progress = runner.max_epochs
-        else:
-            progress = runner.iter
-            max_progress = runner.max_iters
-
-        if self.min_lr_ratio is not None:
-            target_lr = base_lr * self.min_lr_ratio
-        else:
-            target_lr = self.min_lr  # type:ignore
-        return annealing_cos(base_lr, target_lr, progress / max_progress)
-
-
-@HOOKS.register_module()
-class FlatCosineAnnealingLrUpdaterHook(LrUpdaterHook):
-    """Flat + Cosine lr schedule.
-
-    Modified from https://github.com/fastai/fastai/blob/master/fastai/callback/schedule.py#L128 # noqa: E501
-
-    Args:
-        start_percent (float): When to start annealing the learning rate
-            after the percentage of the total training steps.
-            The value should be in range [0, 1).
-            Default: 0.75
-        min_lr (float, optional): The minimum lr. Default: None.
-        min_lr_ratio (float, optional): The ratio of minimum lr to the base lr.
-            Either `min_lr` or `min_lr_ratio` should be specified.
-            Default: None.
-    """
-
-    def __init__(self,
-                 start_percent: float = 0.75,
-                 min_lr: Optional[float] = None,
-                 min_lr_ratio: Optional[float] = None,
-                 **kwargs) -> None:
-        assert (min_lr is None) ^ (min_lr_ratio is None)
-        if start_percent < 0 or start_percent > 1 or not isinstance(
-                start_percent, float):
-            raise ValueError(
-                'expected float between 0 and 1 start_percent, but '
-                f'got {start_percent}')
-        self.start_percent = start_percent
-        self.min_lr = min_lr
-        self.min_lr_ratio = min_lr_ratio
-        super().__init__(**kwargs)
-
-    def get_lr(self, runner: 'runner.BaseRunner', base_lr: float):
-        if self.by_epoch:
-            start = round(runner.max_epochs * self.start_percent)
-            progress = runner.epoch - start
-            max_progress = runner.max_epochs - start
-        else:
-            start = round(runner.max_iters * self.start_percent)
-            progress = runner.iter - start
-            max_progress = runner.max_iters - start
-
-        if self.min_lr_ratio is not None:
-            target_lr = base_lr * self.min_lr_ratio
-        else:
-            target_lr = self.min_lr  # type:ignore
-
-        if progress < 0:
-            return base_lr
-        else:
-            return annealing_cos(base_lr, target_lr, progress / max_progress)
-
-
-@HOOKS.register_module()
-class CosineRestartLrUpdaterHook(LrUpdaterHook):
-    """Cosine annealing with restarts learning rate scheme.
-
-    Args:
-        periods (list[int]): Periods for each cosine anneling cycle.
-        restart_weights (list[float]): Restart weights at each
-            restart iteration. Defaults to [1].
-        min_lr (float, optional): The minimum lr. Default: None.
-        min_lr_ratio (float, optional): The ratio of minimum lr to the base lr.
-            Either `min_lr` or `min_lr_ratio` should be specified.
-            Default: None.
-    """
-
-    def __init__(self,
-                 periods: List[int],
-                 restart_weights: List[float] = [1],
-                 min_lr: Optional[float] = None,
-                 min_lr_ratio: Optional[float] = None,
-                 **kwargs) -> None:
-        assert (min_lr is None) ^ (min_lr_ratio is None)
-        self.periods = periods
-        self.min_lr = min_lr
-        self.min_lr_ratio = min_lr_ratio
-        self.restart_weights = restart_weights
-        assert (len(self.periods) == len(self.restart_weights)
-                ), 'periods and restart_weights should have the same length.'
-        super().__init__(**kwargs)
-
-        self.cumulative_periods = [
-            sum(self.periods[0:i + 1]) for i in range(0, len(self.periods))
-        ]
-
-    def get_lr(self, runner: 'runner.BaseRunner', base_lr: float):
-        if self.by_epoch:
-            progress = runner.epoch
-        else:
-            progress = runner.iter
-
-        if self.min_lr_ratio is not None:
-            target_lr = base_lr * self.min_lr_ratio
-        else:
-            target_lr = self.min_lr  # type:ignore
-
-        idx = get_position_from_periods(progress, self.cumulative_periods)
-        current_weight = self.restart_weights[idx]
-        nearest_restart = 0 if idx == 0 else self.cumulative_periods[idx - 1]
-        current_periods = self.periods[idx]
-
-        alpha = min((progress - nearest_restart) / current_periods, 1)
-        return annealing_cos(base_lr, target_lr, alpha, current_weight)
-
-
-def get_position_from_periods(iteration: int, cumulative_periods: List[int]):
-    """Get the position from a period list.
-
-    It will return the index of the right-closest number in the period list.
-    For example, the cumulative_periods = [100, 200, 300, 400],
-    if iteration == 50, return 0;
-    if iteration == 210, return 2;
-    if iteration == 300, return 3.
-
-    Args:
-        iteration (int): Current iteration.
-        cumulative_periods (list[int]): Cumulative period list.
-
-    Returns:
-        int: The position of the right-closest number in the period list.
-    """
-    for i, period in enumerate(cumulative_periods):
-        if iteration < period:
-            return i
-    raise ValueError(f'Current iteration {iteration} exceeds '
-                     f'cumulative_periods {cumulative_periods}')
-
-
-@HOOKS.register_module()
-class CyclicLrUpdaterHook(LrUpdaterHook):
-    """Cyclic LR Scheduler.
-
-    Implement the cyclical learning rate policy (CLR) described in
-    https://arxiv.org/pdf/1506.01186.pdf
-
-    Different from the original paper, we use cosine annealing rather than
-    triangular policy inside a cycle. This improves the performance in the
-    3D detection area.
-
-    Args:
-        by_epoch (bool, optional): Whether to update LR by epoch.
-        target_ratio (tuple[float], optional): Relative ratio of the highest LR
-            and the lowest LR to the initial LR.
-        cyclic_times (int, optional): Number of cycles during training
-        step_ratio_up (float, optional): The ratio of the increasing process of
-            LR in the total cycle.
-        anneal_strategy (str, optional): {'cos', 'linear'}
-            Specifies the annealing strategy: 'cos' for cosine annealing,
-            'linear' for linear annealing. Default: 'cos'.
-        gamma (float, optional): Cycle decay ratio. Default: 1.
-            It takes values in the range (0, 1]. The difference between the
-            maximum learning rate and the minimum learning rate decreases
-            periodically when it is less than 1. `New in version 1.4.4.`
-    """
-
-    def __init__(self,
-                 by_epoch: bool = False,
-                 target_ratio: Union[float, tuple] = (10, 1e-4),
-                 cyclic_times: int = 1,
-                 step_ratio_up: float = 0.4,
-                 anneal_strategy: str = 'cos',
-                 gamma: float = 1,
-                 **kwargs) -> None:
-        if isinstance(target_ratio, float):
-            target_ratio = (target_ratio, target_ratio / 1e5)
-        elif isinstance(target_ratio, tuple):
-            target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \
-                if len(target_ratio) == 1 else target_ratio
-        else:
-            raise ValueError('target_ratio should be either float '
-                             f'or tuple, got {type(target_ratio)}')
-
-        assert len(target_ratio) == 2, \
-            '"target_ratio" must be list or tuple of two floats'
-        assert 0 <= step_ratio_up < 1.0, \
-            '"step_ratio_up" must be in range [0,1)'
-        assert 0 < gamma <= 1, \
-            '"gamma" must be in range (0, 1]'
-
-        self.target_ratio = target_ratio
-        self.cyclic_times = cyclic_times
-        self.step_ratio_up = step_ratio_up
-        self.gamma = gamma
-        self.max_iter_per_phase = None
-        self.lr_phases: list = []  # init lr_phases
-        # validate anneal_strategy
-        if anneal_strategy not in ['cos', 'linear']:
-            raise ValueError('anneal_strategy must be one of "cos" or '
-                             f'"linear", instead got {anneal_strategy}')
-        elif anneal_strategy == 'cos':
-            self.anneal_func: Callable[[float, float, float],
-                                       float] = annealing_cos
-        elif anneal_strategy == 'linear':
-            self.anneal_func = annealing_linear
-
-        assert not by_epoch, \
-            'currently only support "by_epoch" = False'
-        super().__init__(by_epoch, **kwargs)
-
-    def before_run(self, runner: 'runner.BaseRunner'):
-        super().before_run(runner)
-        # initiate lr_phases
-        # total lr_phases are separated as up and down
-        self.max_iter_per_phase = runner.max_iters // self.cyclic_times
-        iter_up_phase = int(self.step_ratio_up *
-                            self.max_iter_per_phase)  # type: ignore
-        self.lr_phases.append([0, iter_up_phase, 1, self.target_ratio[0]])
-        self.lr_phases.append([
-            iter_up_phase, self.max_iter_per_phase, self.target_ratio[0],
-            self.target_ratio[1]
-        ])
-
-    def get_lr(self, runner: 'runner.BaseRunner', base_lr: float):
-        curr_iter = runner.iter % self.max_iter_per_phase  # type: ignore
-        curr_cycle = runner.iter // self.max_iter_per_phase  # type: ignore
-        # Update weight decay
-        scale = self.gamma**curr_cycle
-
-        for (start_iter, end_iter, start_ratio, end_ratio) in self.lr_phases:
-            if start_iter <= curr_iter < end_iter:
-                # Apply cycle scaling to gradually reduce the difference
-                # between max_lr and base lr. The target end_ratio can be
-                # expressed as:
-                # end_ratio = (base_lr + scale * (max_lr - base_lr)) / base_lr
-                # iteration: 0-iter_up_phase:
-                if start_iter == 0:
-                    end_ratio = 1 - scale + end_ratio * scale
-                # iteration: iter_up_phase-self.max_iter_per_phase
-                else:
-                    start_ratio = 1 - scale + start_ratio * scale
-                progress = curr_iter - start_iter
-                return self.anneal_func(base_lr * start_ratio,
-                                        base_lr * end_ratio,
-                                        progress / (end_iter - start_iter))
-
-
-@HOOKS.register_module()
-class OneCycleLrUpdaterHook(LrUpdaterHook):
-    """One Cycle LR Scheduler.
-
-    The 1cycle learning rate policy changes the learning rate after every
-    batch. The one cycle learning rate policy is described in
-    https://arxiv.org/pdf/1708.07120.pdf
-
-    Args:
-        max_lr (float or list): Upper learning rate boundaries in the cycle
-            for each parameter group.
-        total_steps (int, optional): The total number of steps in the cycle.
-            Note that if a value is not provided here, it will be the max_iter
-            of runner. Default: None.
-        pct_start (float): The percentage of the cycle (in number of steps)
-            spent increasing the learning rate.
-            Default: 0.3
-        anneal_strategy (str): {'cos', 'linear'}
-            Specifies the annealing strategy: 'cos' for cosine annealing,
-            'linear' for linear annealing.
-            Default: 'cos'
-        div_factor (float): Determines the initial learning rate via
-            initial_lr = max_lr/div_factor
-            Default: 25
-        final_div_factor (float): Determines the minimum learning rate via
-            min_lr = initial_lr/final_div_factor
-            Default: 1e4
-        three_phase (bool): If three_phase is True, use a third phase of the
-            schedule to annihilate the learning rate according to
-            final_div_factor instead of modifying the second phase (the first
-            two phases will be symmetrical about the step indicated by
-            pct_start).
-            Default: False
-    """
-
-    def __init__(self,
-                 max_lr: Union[float, List],
-                 total_steps: Optional[int] = None,
-                 pct_start: float = 0.3,
-                 anneal_strategy: str = 'cos',
-                 div_factor: float = 25,
-                 final_div_factor: float = 1e4,
-                 three_phase: bool = False,
-                 **kwargs) -> None:
-        # validate by_epoch, currently only support by_epoch = False
-        if 'by_epoch' not in kwargs:
-            kwargs['by_epoch'] = False
-        else:
-            assert not kwargs['by_epoch'], \
-                'currently only support "by_epoch" = False'
-        if not isinstance(max_lr, (numbers.Number, list, dict)):
-            raise ValueError('the type of max_lr must be the one of list or '
-                             f'dict, but got {type(max_lr)}')
-        self._max_lr = max_lr
-        if total_steps is not None:
-            if not isinstance(total_steps, int):
-                raise ValueError('the type of total_steps must be int, but'
-                                 f'got {type(total_steps)}')
-            self.total_steps = total_steps
-        # validate pct_start
-        if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float):
-            raise ValueError('expected float between 0 and 1 pct_start, but '
-                             f'got {pct_start}')
-        self.pct_start = pct_start
-        # validate anneal_strategy
-        if anneal_strategy not in ['cos', 'linear']:
-            raise ValueError('anneal_strategy must be one of "cos" or '
-                             f'"linear", instead got {anneal_strategy}')
-        elif anneal_strategy == 'cos':
-            self.anneal_func: Callable[[float, float, float],
-                                       float] = annealing_cos
-        elif anneal_strategy == 'linear':
-            self.anneal_func = annealing_linear
-        self.div_factor = div_factor
-        self.final_div_factor = final_div_factor
-        self.three_phase = three_phase
-        self.lr_phases: list = []  # init lr_phases
-        super().__init__(**kwargs)
-
-    def before_run(self, runner: 'runner.BaseRunner'):
-        if hasattr(self, 'total_steps'):
-            total_steps = self.total_steps
-        else:
-            total_steps = runner.max_iters
-        if total_steps < runner.max_iters:
-            raise ValueError(
-                'The total steps must be greater than or equal to max '
-                f'iterations {runner.max_iters} of runner, but total steps '
-                f'is {total_steps}.')
-
-        if isinstance(runner.optimizer, dict):
-            self.base_lr = {}
-            for k, optim in runner.optimizer.items():
-                _max_lr = format_param(k, optim, self._max_lr)
-                self.base_lr[k] = [lr / self.div_factor for lr in _max_lr]
-                for group, lr in zip(optim.param_groups, self.base_lr[k]):
-                    group.setdefault('initial_lr', lr)
-        else:
-            k = type(runner.optimizer).__name__
-            _max_lr = format_param(k, runner.optimizer, self._max_lr)
-            self.base_lr = [lr / self.div_factor for lr in _max_lr]
-            optim_param_groups = runner.optimizer.param_groups  # type: ignore
-            for group, lr in zip(optim_param_groups, self.base_lr):
-                group.setdefault('initial_lr', lr)
-
-        if self.three_phase:
-            self.lr_phases.append(
-                [float(self.pct_start * total_steps) - 1, 1, self.div_factor])
-            self.lr_phases.append([
-                float(2 * self.pct_start * total_steps) - 2, self.div_factor, 1
-            ])
-            self.lr_phases.append(
-                [total_steps - 1, 1, 1 / self.final_div_factor])
-        else:
-            self.lr_phases.append(
-                [float(self.pct_start * total_steps) - 1, 1, self.div_factor])
-            self.lr_phases.append(
-                [total_steps - 1, self.div_factor, 1 / self.final_div_factor])
-
-    def get_lr(self, runner: 'runner.BaseRunner', base_lr: float):
-        curr_iter = runner.iter
-        start_iter = 0
-        for i, (end_iter, start_lr, end_lr) in enumerate(self.lr_phases):
-            if curr_iter <= end_iter:
-                pct = (curr_iter - start_iter) / (end_iter - start_iter)
-                lr = self.anneal_func(base_lr * start_lr, base_lr * end_lr,
-                                      pct)
-                break
-            start_iter = end_iter
-        return lr
-
-
-@HOOKS.register_module()
-class LinearAnnealingLrUpdaterHook(LrUpdaterHook):
-    """Linear annealing LR Scheduler decays the learning rate of each parameter
-    group linearly.
-
-    Args:
-        min_lr (float, optional): The minimum lr. Default: None.
-        min_lr_ratio (float, optional): The ratio of minimum lr to the base lr.
-            Either `min_lr` or `min_lr_ratio` should be specified.
-            Default: None.
-    """
-
-    def __init__(self,
-                 min_lr: Optional[float] = None,
-                 min_lr_ratio: Optional[float] = None,
-                 **kwargs):
-        assert (min_lr is None) ^ (min_lr_ratio is None)
-        self.min_lr = min_lr
-        self.min_lr_ratio = min_lr_ratio
-        super().__init__(**kwargs)
-
-    def get_lr(self, runner: 'runner.BaseRunner', base_lr: float):
-        if self.by_epoch:
-            progress = runner.epoch
-            max_progress = runner.max_epochs
-        else:
-            progress = runner.iter
-            max_progress = runner.max_iters
-        if self.min_lr_ratio is not None:
-            target_lr = base_lr * self.min_lr_ratio
-        else:
-            target_lr = self.min_lr  # type:ignore
-        return annealing_linear(base_lr, target_lr, progress / max_progress)
-
-
-def annealing_cos(start: float,
-                  end: float,
-                  factor: float,
-                  weight: float = 1.) -> float:
-    """Calculate annealing cos learning rate.
-
-    Cosine anneal from `weight * start + (1 - weight) * end` to `end` as
-    percentage goes from 0.0 to 1.0.
-
-    Args:
-        start (float): The starting learning rate of the cosine annealing.
-        end (float): The ending learing rate of the cosine annealing.
-        factor (float): The coefficient of `pi` when calculating the current
-            percentage. Range from 0.0 to 1.0.
-        weight (float, optional): The combination factor of `start` and `end`
-            when calculating the actual starting learning rate. Default to 1.
-    """
-    cos_out = cos(pi * factor) + 1
-    return end + 0.5 * weight * (start - end) * cos_out
-
-
-def annealing_linear(start: float, end: float, factor: float) -> float:
-    """Calculate annealing linear learning rate.
-
-    Linear anneal from `start` to `end` as percentage goes from 0.0 to 1.0.
-
-    Args:
-        start (float): The starting learning rate of the linear annealing.
-        end (float): The ending learing rate of the linear annealing.
-        factor (float): The coefficient of `pi` when calculating the current
-            percentage. Range from 0.0 to 1.0.
-    """
-    return start + (end - start) * factor
-
-
-def format_param(name, optim, param):
-    if isinstance(param, numbers.Number):
-        return [param] * len(optim.param_groups)
-    elif isinstance(param, (list, tuple)):  # multi param groups
-        if len(param) != len(optim.param_groups):
-            raise ValueError(f'expected {len(optim.param_groups)} '
-                             f'values for {name}, got {len(param)}')
-        return param
-    else:  # multi optimizers
-        if name not in param:
-            raise KeyError(f'{name} is not found in {param.keys()}')
-        return param[name]
--- a/mmcv/runner/hooks/memory.py
+++ b/mmcv/runner/hooks/memory.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import torch
-
-from .hook import HOOKS, Hook
-
-
-@HOOKS.register_module()
-class EmptyCacheHook(Hook):
-
-    def __init__(self,
-                 before_epoch: bool = False,
-                 after_epoch: bool = True,
-                 after_iter: bool = False):
-        self._before_epoch = before_epoch
-        self._after_epoch = after_epoch
-        self._after_iter = after_iter
-
-    def after_iter(self, runner):
-        if self._after_iter:
-            torch.cuda.empty_cache()
-
-    def before_epoch(self, runner):
-        if self._before_epoch:
-            torch.cuda.empty_cache()
-
-    def after_epoch(self, runner):
-        if self._after_epoch:
-            torch.cuda.empty_cache()
--- a/mmcv/runner/hooks/momentum_updater.py
+++ b/mmcv/runner/hooks/momentum_updater.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Callable, Dict, List, Optional, Tuple, Union
-
-import mmcv
-from .hook import HOOKS, Hook
-from .lr_updater import annealing_cos, annealing_linear, format_param
-
-
-class MomentumUpdaterHook(Hook):
-
-    def __init__(self,
-                 by_epoch: bool = True,
-                 warmup: Optional[str] = None,
-                 warmup_iters: int = 0,
-                 warmup_ratio: float = 0.9):
-        # validate the "warmup" argument
-        if warmup is not None:
-            if warmup not in ['constant', 'linear', 'exp']:
-                raise ValueError(
-                    f'"{warmup}" is not a supported type for warming up, valid'
-                    ' types are "constant" and "linear"')
-        if warmup is not None:
-            assert warmup_iters > 0, \
-                '"warmup_iters" must be a positive integer'
-            assert 0 < warmup_ratio <= 1.0, \
-                '"warmup_momentum" must be in range (0,1]'
-
-        self.by_epoch = by_epoch
-        self.warmup = warmup
-        self.warmup_iters = warmup_iters
-        self.warmup_ratio = warmup_ratio
-
-        # initial momentum for all param groups
-        self.base_momentum: Union[list, dict] = []
-        # expected momentum if no warming up is performed
-        self.regular_momentum: Union[list, dict] = []
-
-    def _set_momentum(self, runner, momentum_groups):
-        if isinstance(runner.optimizer, dict):
-            for k, optim in runner.optimizer.items():
-                for param_group, mom in zip(optim.param_groups,
-                                            momentum_groups[k]):
-                    if 'momentum' in param_group.keys():
-                        param_group['momentum'] = mom
-                    elif 'betas' in param_group.keys():
-                        param_group['betas'] = (mom, param_group['betas'][1])
-        else:
-            for param_group, mom in zip(runner.optimizer.param_groups,
-                                        momentum_groups):
-                if 'momentum' in param_group.keys():
-                    param_group['momentum'] = mom
-                elif 'betas' in param_group.keys():
-                    param_group['betas'] = (mom, param_group['betas'][1])
-
-    def get_momentum(self, runner, base_momentum) -> float:
-        raise NotImplementedError
-
-    def get_regular_momentum(self, runner) -> Union[list, Dict[str, list]]:
-        if isinstance(runner.optimizer, dict):
-            assert isinstance(self.base_momentum, dict)
-            momentum_groups: Dict[str, List[float]] = {}
-            for k in runner.optimizer.keys():
-                _momentum_group: List[float] = [
-                    self.get_momentum(runner, _base_momentum)
-                    for _base_momentum in self.base_momentum[k]
-                ]
-                momentum_groups.update({k: _momentum_group})
-            return momentum_groups
-        else:
-            assert isinstance(self.base_momentum, list)
-            return [
-                self.get_momentum(runner, _base_momentum)
-                for _base_momentum in self.base_momentum
-            ]
-
-    def get_warmup_momentum(
-            self,
-            cur_iters: int) -> Union[List[float], Dict[str, List[float]]]:
-
-        def _get_warmup_momentum(cur_iters, regular_momentum):
-            if self.warmup == 'constant':
-                warmup_momentum = [
-                    _momentum / self.warmup_ratio
-                    for _momentum in regular_momentum
-                ]
-            elif self.warmup == 'linear':
-                k = (1 - cur_iters / self.warmup_iters) * (1 -
-                                                           self.warmup_ratio)
-                warmup_momentum = [
-                    _momentum / (1 - k) for _momentum in regular_momentum
-                ]
-            elif self.warmup == 'exp':
-                k = self.warmup_ratio**(1 - cur_iters / self.warmup_iters)
-                warmup_momentum = [
-                    _momentum / k for _momentum in regular_momentum
-                ]
-            else:
-                raise ValueError(
-                    'Expected values of `self.warmup` to be "constant", '
-                    f'"linear", or "exp", got {self.warmup}')
-            return warmup_momentum
-
-        if isinstance(self.regular_momentum, dict):
-            momentum_groups = {}
-            for key, regular_momentum in self.regular_momentum.items():
-                momentum_groups[key] = _get_warmup_momentum(
-                    cur_iters, regular_momentum)
-            return momentum_groups
-        else:
-            return _get_warmup_momentum(cur_iters, self.regular_momentum)
-
-    def before_run(self, runner):
-        # NOTE: when resuming from a checkpoint,
-        # if 'initial_momentum' is not saved,
-        # it will be set according to the optimizer params
-        if isinstance(runner.optimizer, dict):
-            self.base_momentum = {}
-            for k, optim in runner.optimizer.items():
-                for group in optim.param_groups:
-                    if 'momentum' in group.keys():
-                        group.setdefault('initial_momentum', group['momentum'])
-                    else:
-                        group.setdefault('initial_momentum', group['betas'][0])
-                _base_momentum = [
-                    group['initial_momentum'] for group in optim.param_groups
-                ]
-                self.base_momentum.update({k: _base_momentum})
-        else:
-            for group in runner.optimizer.param_groups:
-                if 'momentum' in group.keys():
-                    group.setdefault('initial_momentum', group['momentum'])
-                else:
-                    group.setdefault('initial_momentum', group['betas'][0])
-            self.base_momentum = [
-                group['initial_momentum']
-                for group in runner.optimizer.param_groups
-            ]
-
-    def before_train_epoch(self, runner):
-        if not self.by_epoch:
-            return
-        self.regular_momentum = self.get_regular_momentum(runner)
-        self._set_momentum(runner, self.regular_momentum)
-
-    def before_train_iter(self, runner):
-        cur_iter = runner.iter
-        if not self.by_epoch:
-            self.regular_momentum = self.get_regular_momentum(runner)
-            if self.warmup is None or cur_iter >= self.warmup_iters:
-                self._set_momentum(runner, self.regular_momentum)
-            else:
-                warmup_momentum = self.get_warmup_momentum(cur_iter)
-                self._set_momentum(runner, warmup_momentum)
-        elif self.by_epoch:
-            if self.warmup is None or cur_iter > self.warmup_iters:
-                return
-            elif cur_iter == self.warmup_iters:
-                self._set_momentum(runner, self.regular_momentum)
-            else:
-                warmup_momentum = self.get_warmup_momentum(cur_iter)
-                self._set_momentum(runner, warmup_momentum)
-
-
-@HOOKS.register_module()
-class StepMomentumUpdaterHook(MomentumUpdaterHook):
-    """Step momentum scheduler with min value clipping.
-
-    Args:
-        step (int | list[int]): Step to decay the momentum. If an int value is
-            given, regard it as the decay interval. If a list is given, decay
-            momentum at these steps.
-        gamma (float, optional): Decay momentum ratio. Default: 0.5.
-        min_momentum (float, optional): Minimum momentum value to keep. If
-            momentum after decay is lower than this value, it will be clipped
-            accordingly. If None is given, we don't perform lr clipping.
-            Default: None.
-    """
-
-    def __init__(self,
-                 step: Union[int, List[int]],
-                 gamma: float = 0.5,
-                 min_momentum: Optional[float] = None,
-                 **kwargs):
-        if isinstance(step, list):
-            assert mmcv.is_list_of(step, int)
-            assert all([s > 0 for s in step])
-        elif isinstance(step, int):
-            assert step > 0
-        else:
-            raise TypeError('"step" must be a list or integer')
-        self.step = step
-        self.gamma = gamma
-        self.min_momentum = min_momentum
-        super().__init__(**kwargs)
-
-    def get_momentum(self, runner, base_momentum: float) -> float:
-        progress = runner.epoch if self.by_epoch else runner.iter
-
-        # calculate exponential term
-        if isinstance(self.step, int):
-            exp = progress // self.step
-        else:
-            exp = len(self.step)
-            for i, s in enumerate(self.step):
-                if progress < s:
-                    exp = i
-                    break
-
-        momentum = base_momentum * (self.gamma**exp)
-        if self.min_momentum is not None:
-            # clip to a minimum value
-            momentum = max(momentum, self.min_momentum)
-        return momentum
-
-
-@HOOKS.register_module()
-class CosineAnnealingMomentumUpdaterHook(MomentumUpdaterHook):
-    """Cosine annealing LR Momentum decays the Momentum of each parameter group
-    linearly.
-
-    Args:
-        min_momentum (float, optional): The minimum momentum. Default: None.
-        min_momentum_ratio (float, optional): The ratio of minimum momentum to
-            the base momentum. Either `min_momentum` or `min_momentum_ratio`
-            should be specified. Default: None.
-    """
-
-    def __init__(self,
-                 min_momentum: Optional[float] = None,
-                 min_momentum_ratio: Optional[float] = None,
-                 **kwargs):
-        assert (min_momentum is None) ^ (min_momentum_ratio is None)
-        self.min_momentum = min_momentum
-        self.min_momentum_ratio = min_momentum_ratio
-        super().__init__(**kwargs)
-
-    def get_momentum(self, runner, base_momentum: float) -> float:
-        if self.by_epoch:
-            progress = runner.epoch
-            max_progress = runner.max_epochs
-        else:
-            progress = runner.iter
-            max_progress = runner.max_iters
-        if self.min_momentum_ratio is not None:
-            target_momentum = base_momentum * self.min_momentum_ratio
-        else:
-            assert self.min_momentum is not None
-            target_momentum = self.min_momentum
-        return annealing_cos(base_momentum, target_momentum,
-                             progress / max_progress)
-
-
-@HOOKS.register_module()
-class LinearAnnealingMomentumUpdaterHook(MomentumUpdaterHook):
-    """Linear annealing LR Momentum decays the Momentum of each parameter group
-    linearly.
-
-    Args:
-        min_momentum (float, optional): The minimum momentum. Default: None.
-        min_momentum_ratio (float, optional): The ratio of minimum momentum to
-            the base momentum. Either `min_momentum` or `min_momentum_ratio`
-            should be specified. Default: None.
-    """
-
-    def __init__(self,
-                 min_momentum: Optional[float] = None,
-                 min_momentum_ratio: Optional[float] = None,
-                 **kwargs):
-        assert (min_momentum is None) ^ (min_momentum_ratio is None)
-        self.min_momentum = min_momentum
-        self.min_momentum_ratio = min_momentum_ratio
-        super().__init__(**kwargs)
-
-    def get_momentum(self, runner, base_momentum: float) -> float:
-        if self.by_epoch:
-            progress = runner.epoch
-            max_progress = runner.max_epochs
-        else:
-            progress = runner.iter
-            max_progress = runner.max_iters
-        if self.min_momentum_ratio is not None:
-            target_momentum = base_momentum * self.min_momentum_ratio
-        else:
-            assert self.min_momentum is not None
-            target_momentum = self.min_momentum
-        return annealing_linear(base_momentum, target_momentum,
-                                progress / max_progress)
-
-
-@HOOKS.register_module()
-class CyclicMomentumUpdaterHook(MomentumUpdaterHook):
-    """Cyclic momentum Scheduler.
-
-    Implement the cyclical momentum scheduler policy described in
-    https://arxiv.org/pdf/1708.07120.pdf
-
-    This momentum scheduler usually used together with the CyclicLRUpdater
-    to improve the performance in the 3D detection area.
-
-    Args:
-        target_ratio (tuple[float]): Relative ratio of the lowest momentum and
-            the highest momentum to the initial momentum.
-        cyclic_times (int): Number of cycles during training
-        step_ratio_up (float): The ratio of the increasing process of momentum
-            in  the total cycle.
-        by_epoch (bool): Whether to update momentum by epoch.
-        anneal_strategy (str, optional): {'cos', 'linear'}
-            Specifies the annealing strategy: 'cos' for cosine annealing,
-            'linear' for linear annealing. Default: 'cos'.
-        gamma (float, optional): Cycle decay ratio. Default: 1.
-            It takes values in the range (0, 1]. The difference between the
-            maximum learning rate and the minimum learning rate decreases
-            periodically when it is less than 1. `New in version 1.4.4.`
-    """
-
-    def __init__(self,
-                 by_epoch: bool = False,
-                 target_ratio: Tuple[float, float] = (0.85 / 0.95, 1.),
-                 cyclic_times: int = 1,
-                 step_ratio_up: float = 0.4,
-                 anneal_strategy: str = 'cos',
-                 gamma: float = 1.,
-                 **kwargs):
-        if isinstance(target_ratio, float):
-            target_ratio = (target_ratio, target_ratio / 1e5)
-        elif isinstance(target_ratio, tuple):
-            target_ratio = (target_ratio[0], target_ratio[0] / 1e5) \
-                if len(target_ratio) == 1 else target_ratio
-        else:
-            raise ValueError('target_ratio should be either float '
-                             f'or tuple, got {type(target_ratio)}')
-
-        assert len(target_ratio) == 2, \
-            '"target_ratio" must be list or tuple of two floats'
-        assert 0 <= step_ratio_up < 1.0, \
-            '"step_ratio_up" must be in range [0,1)'
-
-        self.target_ratio = target_ratio
-        self.cyclic_times = cyclic_times
-        self.step_ratio_up = step_ratio_up
-        self.gamma = gamma
-        self.momentum_phases: List[list] = []  # init momentum_phases
-
-        self.anneal_func: Callable[[float, float, float], float]
-        if anneal_strategy not in ['cos', 'linear']:
-            raise ValueError('anneal_strategy must be one of "cos" or '
-                             f'"linear", instead got {anneal_strategy}')
-        elif anneal_strategy == 'cos':
-            self.anneal_func = annealing_cos
-        elif anneal_strategy == 'linear':
-            self.anneal_func = annealing_linear
-        # currently only support by_epoch=False
-        assert not by_epoch, \
-            'currently only support "by_epoch" = False'
-        super().__init__(by_epoch, **kwargs)
-
-    def before_run(self, runner):
-        super().before_run(runner)
-        # initiate momentum_phases
-        # total momentum_phases are separated as up and down
-        max_iter_per_phase = runner.max_iters // self.cyclic_times
-        iter_up_phase = int(self.step_ratio_up * max_iter_per_phase)
-        self.max_iter_per_phase = max_iter_per_phase
-        self.momentum_phases.append(
-            [0, iter_up_phase, 1, self.target_ratio[0]])
-        self.momentum_phases.append([
-            iter_up_phase, max_iter_per_phase, self.target_ratio[0],
-            self.target_ratio[1]
-        ])
-
-    def get_momentum(self, runner, base_momentum: float) -> float:
-        curr_iter = runner.iter % self.max_iter_per_phase
-        curr_cycle = runner.iter // self.max_iter_per_phase
-        scale = self.gamma**curr_cycle
-        for (start_iter, end_iter, start_ratio, end_ratio) \
-                in self.momentum_phases:
-            if start_iter <= curr_iter < end_iter:
-                # Apply cycle scaling to gradually reduce the difference
-                # between max_momentum and base momentum. The target end_ratio
-                # can be expressed as:
-                # end_ratio = (base_momentum + scale * \
-                # (max_momentum - base_momentum)) / base_momentum
-                # iteration: 0-iter_up_phase:
-                if start_iter == 0:
-                    end_ratio = 1 - scale + end_ratio * scale
-                # iteration: iter_up_phase-self.max_iter_per_phase
-                else:
-                    start_ratio = 1 - scale + start_ratio * scale
-                progress = curr_iter - start_iter
-                return self.anneal_func(base_momentum * start_ratio,
-                                        base_momentum * end_ratio,
-                                        progress / (end_iter - start_iter))
-        raise RuntimeError('The method should return in the for-loop and '
-                           'should not be executed until this')
-
-
-@HOOKS.register_module()
-class OneCycleMomentumUpdaterHook(MomentumUpdaterHook):
-    """OneCycle momentum Scheduler.
-
-    This momentum scheduler usually used together with the OneCycleLrUpdater
-    to improve the performance.
-
-    Args:
-        base_momentum (float or list): Lower momentum boundaries in the cycle
-            for each parameter group. Note that momentum is cycled inversely
-            to learning rate; at the peak of a cycle, momentum is
-            'base_momentum' and learning rate is 'max_lr'.
-            Default: 0.85
-        max_momentum (float or list): Upper momentum boundaries in the cycle
-            for each parameter group. Functionally,
-            it defines the cycle amplitude (max_momentum - base_momentum).
-            Note that momentum is cycled inversely
-            to learning rate; at the start of a cycle, momentum is
-            'max_momentum' and learning rate is 'base_lr'
-            Default: 0.95
-        pct_start (float): The percentage of the cycle (in number of steps)
-            spent increasing the learning rate.
-            Default: 0.3
-        anneal_strategy (str): {'cos', 'linear'}
-            Specifies the annealing strategy: 'cos' for cosine annealing,
-            'linear' for linear annealing.
-            Default: 'cos'
-        three_phase (bool): If three_phase is True, use a third phase of the
-            schedule to annihilate the learning rate according to
-            final_div_factor instead of modifying the second phase (the first
-            two phases will be symmetrical about the step indicated by
-            pct_start).
-            Default: False
-    """
-
-    def __init__(self,
-                 base_momentum: Union[float, list, dict] = 0.85,
-                 max_momentum: Union[float, list, dict] = 0.95,
-                 pct_start: float = 0.3,
-                 anneal_strategy: str = 'cos',
-                 three_phase: bool = False,
-                 **kwargs):
-        # validate by_epoch, currently only support by_epoch=False
-        if 'by_epoch' not in kwargs:
-            kwargs['by_epoch'] = False
-        else:
-            assert not kwargs['by_epoch'], \
-                'currently only support "by_epoch" = False'
-        if not isinstance(base_momentum, (float, list, dict)):
-            raise ValueError('base_momentum must be the type among of float,'
-                             'list or dict.')
-        self._base_momentum = base_momentum
-        if not isinstance(max_momentum, (float, list, dict)):
-            raise ValueError('max_momentum must be the type among of float,'
-                             'list or dict.')
-        self._max_momentum = max_momentum
-        # validate pct_start
-        if pct_start < 0 or pct_start > 1 or not isinstance(pct_start, float):
-            raise ValueError('Expected float between 0 and 1 pct_start, but '
-                             f'got {pct_start}')
-        self.pct_start = pct_start
-        # validate anneal_strategy
-        self.anneal_func: Callable[[float, float, float], float]
-        if anneal_strategy not in ['cos', 'linear']:
-            raise ValueError('anneal_strategy must by one of "cos" or '
-                             f'"linear", instead got {anneal_strategy}')
-        elif anneal_strategy == 'cos':
-            self.anneal_func = annealing_cos
-        elif anneal_strategy == 'linear':
-            self.anneal_func = annealing_linear
-        self.three_phase = three_phase
-        self.momentum_phases: List[dict] = []  # init momentum_phases
-        super().__init__(**kwargs)
-
-    def before_run(self, runner):
-        if isinstance(runner.optimizer, dict):
-            for k, optim in runner.optimizer.items():
-                if ('momentum' not in optim.defaults
-                        and 'betas' not in optim.defaults):
-                    raise ValueError('optimizer must support momentum with'
-                                     'option enabled')
-                self.use_beta1 = 'betas' in optim.defaults
-                _base_momentum = format_param(k, optim, self._base_momentum)
-                _max_momentum = format_param(k, optim, self._max_momentum)
-                for group, b_momentum, m_momentum in zip(
-                        optim.param_groups, _base_momentum, _max_momentum):
-                    if self.use_beta1:
-                        _, beta2 = group['betas']
-                        group['betas'] = (m_momentum, beta2)
-                    else:
-                        group['momentum'] = m_momentum
-                    group['base_momentum'] = b_momentum
-                    group['max_momentum'] = m_momentum
-        else:
-            optim = runner.optimizer
-            if ('momentum' not in optim.defaults
-                    and 'betas' not in optim.defaults):
-                raise ValueError('optimizer must support momentum with'
-                                 'option enabled')
-            self.use_beta1 = 'betas' in optim.defaults
-            k = type(optim).__name__
-            _base_momentum = format_param(k, optim, self._base_momentum)
-            _max_momentum = format_param(k, optim, self._max_momentum)
-            for group, b_momentum, m_momentum in zip(optim.param_groups,
-                                                     _base_momentum,
-                                                     _max_momentum):
-                if self.use_beta1:
-                    _, beta2 = group['betas']
-                    group['betas'] = (m_momentum, beta2)
-                else:
-                    group['momentum'] = m_momentum
-                group['base_momentum'] = b_momentum
-                group['max_momentum'] = m_momentum
-
-        if self.three_phase:
-            self.momentum_phases.append({
-                'end_iter':
-                float(self.pct_start * runner.max_iters) - 1,
-                'start_momentum':
-                'max_momentum',
-                'end_momentum':
-                'base_momentum'
-            })
-            self.momentum_phases.append({
-                'end_iter':
-                float(2 * self.pct_start * runner.max_iters) - 2,
-                'start_momentum':
-                'base_momentum',
-                'end_momentum':
-                'max_momentum'
-            })
-            self.momentum_phases.append({
-                'end_iter': runner.max_iters - 1,
-                'start_momentum': 'max_momentum',
-                'end_momentum': 'max_momentum'
-            })
-        else:
-            self.momentum_phases.append({
-                'end_iter':
-                float(self.pct_start * runner.max_iters) - 1,
-                'start_momentum':
-                'max_momentum',
-                'end_momentum':
-                'base_momentum'
-            })
-            self.momentum_phases.append({
-                'end_iter': runner.max_iters - 1,
-                'start_momentum': 'base_momentum',
-                'end_momentum': 'max_momentum'
-            })
-
-    def _set_momentum(self, runner, momentum_groups):
-        if isinstance(runner.optimizer, dict):
-            for k, optim in runner.optimizer.items():
-                for param_group, mom in zip(optim.param_groups,
-                                            momentum_groups[k]):
-                    if 'momentum' in param_group.keys():
-                        param_group['momentum'] = mom
-                    elif 'betas' in param_group.keys():
-                        param_group['betas'] = (mom, param_group['betas'][1])
-        else:
-            for param_group, mom in zip(runner.optimizer.param_groups,
-                                        momentum_groups):
-                if 'momentum' in param_group.keys():
-                    param_group['momentum'] = mom
-                elif 'betas' in param_group.keys():
-                    param_group['betas'] = (mom, param_group['betas'][1])
-
-    def get_momentum(self, runner, param_group: Dict[str, float]) -> float:
-        curr_iter = runner.iter
-        start_iter = 0
-        momentum = 0.
-        for i, phase in enumerate(self.momentum_phases):
-            end_iter = phase['end_iter']
-            if curr_iter <= end_iter or i == len(self.momentum_phases) - 1:
-                pct = (curr_iter - start_iter) / (end_iter - start_iter)
-                momentum = self.anneal_func(
-                    param_group[phase['start_momentum']],
-                    param_group[phase['end_momentum']], pct)
-                break
-            start_iter = end_iter
-        return momentum
-
-    def get_regular_momentum(self, runner):
-        if isinstance(runner.optimizer, dict):
-            momentum_groups = {}
-            for k, optim in runner.optimizer.items():
-                _momentum_group = [
-                    self.get_momentum(runner, param_group)
-                    for param_group in optim.param_groups
-                ]
-                momentum_groups.update({k: _momentum_group})
-            return momentum_groups
-        else:
-            momentum_groups = []
-            for param_group in runner.optimizer.param_groups:
-                momentum_groups.append(self.get_momentum(runner, param_group))
-            return momentum_groups
--- a/mmcv/runner/hooks/optimizer.py
+++ b/mmcv/runner/hooks/optimizer.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import copy
-import logging
-from collections import defaultdict
-from itertools import chain
-from typing import Optional, Union
-
-import torch.nn as nn
-from torch import Tensor
-from torch.nn.utils import clip_grad
-
-from mmcv.utils import TORCH_VERSION, _BatchNorm, digit_version
-from ..dist_utils import allreduce_grads
-from ..fp16_utils import LossScaler, wrap_fp16_model
-from .hook import HOOKS, Hook
-
-try:
-    # If PyTorch version >= 1.6.0, torch.cuda.amp.GradScaler would be imported
-    # and used; otherwise, auto fp16 will adopt mmcv's implementation.
-    from torch.cuda.amp import GradScaler
-except ImportError:
-    pass
-
-
-@HOOKS.register_module()
-class OptimizerHook(Hook):
-    """A hook contains custom operations for the optimizer.
-
-    Args:
-        grad_clip (dict, optional): A config dict to control the clip_grad.
-            Default: None.
-        detect_anomalous_params (bool): This option is only used for
-            debugging which will slow down the training speed.
-            Detect anomalous parameters that are not included in
-            the computational graph with `loss` as the root.
-            There are two cases
-
-                - Parameters were not used during
-                  forward pass.
-                - Parameters were not used to produce
-                  loss.
-            Default: False.
-    """
-
-    def __init__(self,
-                 grad_clip: Optional[dict] = None,
-                 detect_anomalous_params: bool = False):
-        self.grad_clip = grad_clip
-        self.detect_anomalous_params = detect_anomalous_params
-
-    def clip_grads(self, params):
-        params = list(
-            filter(lambda p: p.requires_grad and p.grad is not None, params))
-        if len(params) > 0:
-            return clip_grad.clip_grad_norm_(params, **self.grad_clip)
-
-    def after_train_iter(self, runner):
-        runner.optimizer.zero_grad()
-        if self.detect_anomalous_params:
-            self.detect_anomalous_parameters(runner.outputs['loss'], runner)
-        runner.outputs['loss'].backward()
-
-        if self.grad_clip is not None:
-            grad_norm = self.clip_grads(runner.model.parameters())
-            if grad_norm is not None:
-                # Add grad norm to the logger
-                runner.log_buffer.update({'grad_norm': float(grad_norm)},
-                                         runner.outputs['num_samples'])
-        runner.optimizer.step()
-
-    def detect_anomalous_parameters(self, loss: Tensor, runner) -> None:
-        logger = runner.logger
-        parameters_in_graph = set()
-        visited = set()
-
-        def traverse(grad_fn):
-            if grad_fn is None:
-                return
-            if grad_fn not in visited:
-                visited.add(grad_fn)
-                if hasattr(grad_fn, 'variable'):
-                    parameters_in_graph.add(grad_fn.variable)
-                parents = grad_fn.next_functions
-                if parents is not None:
-                    for parent in parents:
-                        grad_fn = parent[0]
-                        traverse(grad_fn)
-
-        traverse(loss.grad_fn)
-        for n, p in runner.model.named_parameters():
-            if p not in parameters_in_graph and p.requires_grad:
-                logger.log(
-                    level=logging.ERROR,
-                    msg=f'{n} with shape {p.size()} is not '
-                    f'in the computational graph \n')
-
-
-@HOOKS.register_module()
-class GradientCumulativeOptimizerHook(OptimizerHook):
-    """Optimizer Hook implements multi-iters gradient cumulating.
-
-    Args:
-        cumulative_iters (int, optional): Num of gradient cumulative iters.
-            The optimizer will step every `cumulative_iters` iters.
-            Defaults to 1.
-
-    Examples:
-        >>> # Use cumulative_iters to simulate a large batch size
-        >>> # It is helpful when the hardware cannot handle a large batch size.
-        >>> loader = DataLoader(data, batch_size=64)
-        >>> optim_hook = GradientCumulativeOptimizerHook(cumulative_iters=4)
-        >>> # almost equals to
-        >>> loader = DataLoader(data, batch_size=256)
-        >>> optim_hook = OptimizerHook()
-    """
-
-    def __init__(self, cumulative_iters: int = 1, **kwargs):
-        super().__init__(**kwargs)
-
-        assert isinstance(cumulative_iters, int) and cumulative_iters > 0, \
-            f'cumulative_iters only accepts positive int, but got ' \
-            f'{type(cumulative_iters)} instead.'
-
-        self.cumulative_iters = cumulative_iters
-        self.divisible_iters = 0
-        self.remainder_iters = 0
-        self.initialized = False
-
-    def has_batch_norm(self, module: nn.Module) -> bool:
-        if isinstance(module, _BatchNorm):
-            return True
-        for m in module.children():
-            if self.has_batch_norm(m):
-                return True
-        return False
-
-    def _init(self, runner):
-        if runner.iter % self.cumulative_iters != 0:
-            runner.logger.warning(
-                'Resume iter number is not divisible by cumulative_iters in '
-                'GradientCumulativeOptimizerHook, which means the gradient of '
-                'some iters is lost and the result may be influenced slightly.'
-            )
-
-        if self.has_batch_norm(runner.model) and self.cumulative_iters > 1:
-            runner.logger.warning(
-                'GradientCumulativeOptimizerHook may slightly decrease '
-                'performance if the model has BatchNorm layers.')
-
-        residual_iters = runner.max_iters - runner.iter
-
-        self.divisible_iters = (
-            residual_iters // self.cumulative_iters * self.cumulative_iters)
-        self.remainder_iters = residual_iters - self.divisible_iters
-
-        self.initialized = True
-
-    def after_train_iter(self, runner):
-        if not self.initialized:
-            self._init(runner)
-
-        if runner.iter < self.divisible_iters:
-            loss_factor = self.cumulative_iters
-        else:
-            loss_factor = self.remainder_iters
-        loss = runner.outputs['loss']
-        loss = loss / loss_factor
-        loss.backward()
-
-        if (self.every_n_iters(runner, self.cumulative_iters)
-                or self.is_last_iter(runner)):
-
-            if self.grad_clip is not None:
-                grad_norm = self.clip_grads(runner.model.parameters())
-                if grad_norm is not None:
-                    # Add grad norm to the logger
-                    runner.log_buffer.update({'grad_norm': float(grad_norm)},
-                                             runner.outputs['num_samples'])
-            runner.optimizer.step()
-            runner.optimizer.zero_grad()
-
-
-if (TORCH_VERSION != 'parrots'
-        and digit_version(TORCH_VERSION) >= digit_version('1.6.0')):
-
-    @HOOKS.register_module()
-    class Fp16OptimizerHook(OptimizerHook):
-        """FP16 optimizer hook (using PyTorch's implementation).
-
-        If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend,
-        to take care of the optimization procedure.
-
-        Args:
-            loss_scale (float | str | dict): Scale factor configuration.
-                If loss_scale is a float, static loss scaling will be used with
-                the specified scale. If loss_scale is a string, it must be
-                'dynamic', then dynamic loss scaling will be used.
-                It can also be a dict containing arguments of GradScalar.
-                Defaults to 512. For Pytorch >= 1.6, mmcv uses official
-                implementation of GradScaler. If you use a dict version of
-                loss_scale to create GradScaler, please refer to:
-                https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler
-                for the parameters.
-
-        Examples:
-            >>> loss_scale = dict(
-            ...     init_scale=65536.0,
-            ...     growth_factor=2.0,
-            ...     backoff_factor=0.5,
-            ...     growth_interval=2000
-            ... )
-            >>> optimizer_hook = Fp16OptimizerHook(loss_scale=loss_scale)
-        """
-
-        def __init__(self,
-                     grad_clip: Optional[dict] = None,
-                     coalesce: bool = True,
-                     bucket_size_mb: int = -1,
-                     loss_scale: Union[float, str, dict] = 512.,
-                     distributed: bool = True):
-            self.grad_clip = grad_clip
-            self.coalesce = coalesce
-            self.bucket_size_mb = bucket_size_mb
-            self.distributed = distributed
-            self._scale_update_param = None
-            if loss_scale == 'dynamic':
-                self.loss_scaler = GradScaler()
-            elif isinstance(loss_scale, float):
-                self._scale_update_param = loss_scale
-                self.loss_scaler = GradScaler(init_scale=loss_scale)
-            elif isinstance(loss_scale, dict):
-                self.loss_scaler = GradScaler(**loss_scale)
-            else:
-                raise ValueError('loss_scale must be of type float, dict, or '
-                                 f'"dynamic", got {loss_scale}')
-
-        def before_run(self, runner) -> None:
-            """Preparing steps before Mixed Precision Training."""
-            # wrap model mode to fp16
-            wrap_fp16_model(runner.model)
-            # resume from state dict
-            if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']:
-                scaler_state_dict = runner.meta['fp16']['loss_scaler']
-                self.loss_scaler.load_state_dict(scaler_state_dict)
-
-        def copy_grads_to_fp32(self, fp16_net: nn.Module,
-                               fp32_weights: Tensor) -> None:
-            """Copy gradients from fp16 model to fp32 weight copy."""
-            for fp32_param, fp16_param in zip(fp32_weights,
-                                              fp16_net.parameters()):
-                if fp16_param.grad is not None:
-                    if fp32_param.grad is None:
-                        fp32_param.grad = fp32_param.data.new(
-                            fp32_param.size())
-                    fp32_param.grad.copy_(fp16_param.grad)
-
-        def copy_params_to_fp16(self, fp16_net: nn.Module,
-                                fp32_weights: Tensor) -> None:
-            """Copy updated params from fp32 weight copy to fp16 model."""
-            for fp16_param, fp32_param in zip(fp16_net.parameters(),
-                                              fp32_weights):
-                fp16_param.data.copy_(fp32_param.data)
-
-        def after_train_iter(self, runner) -> None:
-            """Backward optimization steps for Mixed Precision Training. For
-            dynamic loss scaling, please refer to
-            https://pytorch.org/docs/stable/amp.html#torch.cuda.amp.GradScaler.
-
-            1. Scale the loss by a scale factor.
-            2. Backward the loss to obtain the gradients.
-            3. Unscale the optimizer’s gradient tensors.
-            4. Call optimizer.step() and update scale factor.
-            5. Save loss_scaler state_dict for resume purpose.
-            """
-            # clear grads of last iteration
-            runner.model.zero_grad()
-            runner.optimizer.zero_grad()
-
-            self.loss_scaler.scale(runner.outputs['loss']).backward()
-            self.loss_scaler.unscale_(runner.optimizer)
-            # grad clip
-            if self.grad_clip is not None:
-                grad_norm = self.clip_grads(runner.model.parameters())
-                if grad_norm is not None:
-                    # Add grad norm to the logger
-                    runner.log_buffer.update({'grad_norm': float(grad_norm)},
-                                             runner.outputs['num_samples'])
-            # backward and update scaler
-            self.loss_scaler.step(runner.optimizer)
-            self.loss_scaler.update(self._scale_update_param)
-
-            # save state_dict of loss_scaler
-            runner.meta.setdefault(
-                'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict()
-
-    @HOOKS.register_module()
-    class GradientCumulativeFp16OptimizerHook(GradientCumulativeOptimizerHook,
-                                              Fp16OptimizerHook):
-        """Fp16 optimizer Hook (using PyTorch's implementation) implements
-        multi-iters gradient cumulating.
-
-        If you are using PyTorch >= 1.6, torch.cuda.amp is used as the backend,
-        to take care of the optimization procedure.
-        """
-
-        def __init__(self, *args, **kwargs):
-            super().__init__(*args, **kwargs)
-
-        def after_train_iter(self, runner) -> None:
-            if not self.initialized:
-                self._init(runner)
-
-            if runner.iter < self.divisible_iters:
-                loss_factor = self.cumulative_iters
-            else:
-                loss_factor = self.remainder_iters
-            loss = runner.outputs['loss']
-            loss = loss / loss_factor
-
-            self.loss_scaler.scale(loss).backward()
-
-            if (self.every_n_iters(runner, self.cumulative_iters)
-                    or self.is_last_iter(runner)):
-
-                # copy fp16 grads in the model to fp32 params in the optimizer
-                self.loss_scaler.unscale_(runner.optimizer)
-
-                if self.grad_clip is not None:
-                    grad_norm = self.clip_grads(runner.model.parameters())
-                    if grad_norm is not None:
-                        # Add grad norm to the logger
-                        runner.log_buffer.update(
-                            {'grad_norm': float(grad_norm)},
-                            runner.outputs['num_samples'])
-
-                # backward and update scaler
-                self.loss_scaler.step(runner.optimizer)
-                self.loss_scaler.update(self._scale_update_param)
-
-                # save state_dict of loss_scaler
-                runner.meta.setdefault(
-                    'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict()
-
-                # clear grads
-                runner.model.zero_grad()
-                runner.optimizer.zero_grad()
-
-else:
-
-    @HOOKS.register_module()
-    class Fp16OptimizerHook(OptimizerHook):  # type: ignore
-        """FP16 optimizer hook (mmcv's implementation).
-
-        The steps of fp16 optimizer is as follows.
-        1. Scale the loss value.
-        2. BP in the fp16 model.
-        2. Copy gradients from fp16 model to fp32 weights.
-        3. Update fp32 weights.
-        4. Copy updated parameters from fp32 weights to fp16 model.
-
-        Refer to https://arxiv.org/abs/1710.03740 for more details.
-
-        Args:
-            loss_scale (float | str | dict): Scale factor configuration.
-                If loss_scale is a float, static loss scaling will be used with
-                the specified scale. If loss_scale is a string, it must be
-                'dynamic', then dynamic loss scaling will be used.
-                It can also be a dict containing arguments of LossScaler.
-                Defaults to 512.
-        """
-
-        def __init__(self,
-                     grad_clip: Optional[dict] = None,
-                     coalesce: bool = True,
-                     bucket_size_mb: int = -1,
-                     loss_scale: Union[float, str, dict] = 512.,
-                     distributed: bool = True):
-            self.grad_clip = grad_clip
-            self.coalesce = coalesce
-            self.bucket_size_mb = bucket_size_mb
-            self.distributed = distributed
-            if loss_scale == 'dynamic':
-                self.loss_scaler = LossScaler(mode='dynamic')
-            elif isinstance(loss_scale, float):
-                self.loss_scaler = LossScaler(
-                    init_scale=loss_scale, mode='static')
-            elif isinstance(loss_scale, dict):
-                self.loss_scaler = LossScaler(**loss_scale)
-            else:
-                raise ValueError('loss_scale must be of type float, dict, or '
-                                 f'"dynamic", got {loss_scale}')
-
-        def before_run(self, runner) -> None:
-            """Preparing steps before Mixed Precision Training.
-
-            1. Make a master copy of fp32 weights for optimization.
-            2. Convert the main model from fp32 to fp16.
-            """
-            # keep a copy of fp32 weights
-            old_groups = runner.optimizer.param_groups
-            runner.optimizer.param_groups = copy.deepcopy(
-                runner.optimizer.param_groups)
-            state: defaultdict = defaultdict(dict)
-            p_map = {
-                old_p: p
-                for old_p, p in zip(
-                    chain(*(g['params'] for g in old_groups)),
-                    chain(*(g['params']
-                            for g in runner.optimizer.param_groups)))
-            }
-            for k, v in runner.optimizer.state.items():
-                state[p_map[k]] = v
-            runner.optimizer.state = state
-            # convert model to fp16
-            wrap_fp16_model(runner.model)
-            # resume from state dict
-            if 'fp16' in runner.meta and 'loss_scaler' in runner.meta['fp16']:
-                scaler_state_dict = runner.meta['fp16']['loss_scaler']
-                self.loss_scaler.load_state_dict(scaler_state_dict)
-
-        def copy_grads_to_fp32(self, fp16_net: nn.Module,
-                               fp32_weights: Tensor) -> None:
-            """Copy gradients from fp16 model to fp32 weight copy."""
-            for fp32_param, fp16_param in zip(fp32_weights,
-                                              fp16_net.parameters()):
-                if fp16_param.grad is not None:
-                    if fp32_param.grad is None:
-                        fp32_param.grad = fp32_param.data.new(
-                            fp32_param.size())
-                    fp32_param.grad.copy_(fp16_param.grad)
-
-        def copy_params_to_fp16(self, fp16_net: nn.Module,
-                                fp32_weights: Tensor) -> None:
-            """Copy updated params from fp32 weight copy to fp16 model."""
-            for fp16_param, fp32_param in zip(fp16_net.parameters(),
-                                              fp32_weights):
-                fp16_param.data.copy_(fp32_param.data)
-
-        def after_train_iter(self, runner) -> None:
-            """Backward optimization steps for Mixed Precision Training. For
-            dynamic loss scaling, please refer `loss_scalar.py`
-
-            1. Scale the loss by a scale factor.
-            2. Backward the loss to obtain the gradients (fp16).
-            3. Copy gradients from the model to the fp32 weight copy.
-            4. Scale the gradients back and update the fp32 weight copy.
-            5. Copy back the params from fp32 weight copy to the fp16 model.
-            6. Save loss_scaler state_dict for resume purpose.
-            """
-            # clear grads of last iteration
-            runner.model.zero_grad()
-            runner.optimizer.zero_grad()
-            # scale the loss value
-            scaled_loss = runner.outputs['loss'] * self.loss_scaler.loss_scale
-            scaled_loss.backward()
-            # copy fp16 grads in the model to fp32 params in the optimizer
-
-            fp32_weights = []
-            for param_group in runner.optimizer.param_groups:
-                fp32_weights += param_group['params']
-            self.copy_grads_to_fp32(runner.model, fp32_weights)
-            # allreduce grads
-            if self.distributed:
-                allreduce_grads(fp32_weights, self.coalesce,
-                                self.bucket_size_mb)
-
-            has_overflow = self.loss_scaler.has_overflow(fp32_weights)
-            # if has overflow, skip this iteration
-            if not has_overflow:
-                # scale the gradients back
-                for param in fp32_weights:
-                    if param.grad is not None:
-                        param.grad.div_(self.loss_scaler.loss_scale)
-                if self.grad_clip is not None:
-                    grad_norm = self.clip_grads(fp32_weights)
-                    if grad_norm is not None:
-                        # Add grad norm to the logger
-                        runner.log_buffer.update(
-                            {'grad_norm': float(grad_norm)},
-                            runner.outputs['num_samples'])
-                # update fp32 params
-                runner.optimizer.step()
-                # copy fp32 params to the fp16 model
-                self.copy_params_to_fp16(runner.model, fp32_weights)
-            self.loss_scaler.update_scale(has_overflow)
-            if has_overflow:
-                runner.logger.warning('Check overflow, downscale loss scale '
-                                      f'to {self.loss_scaler.cur_scale}')
-
-            # save state_dict of loss_scaler
-            runner.meta.setdefault(
-                'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict()
-
-    @HOOKS.register_module()
-    class GradientCumulativeFp16OptimizerHook(  # type: ignore
-            GradientCumulativeOptimizerHook, Fp16OptimizerHook):
-        """Fp16 optimizer Hook (using mmcv implementation) implements multi-
-        iters gradient cumulating."""
-
-        def __init__(self, *args, **kwargs):
-            super().__init__(*args, **kwargs)
-
-        def after_train_iter(self, runner) -> None:
-            if not self.initialized:
-                self._init(runner)
-
-            if runner.iter < self.divisible_iters:
-                loss_factor = self.cumulative_iters
-            else:
-                loss_factor = self.remainder_iters
-
-            loss = runner.outputs['loss']
-            loss = loss / loss_factor
-
-            # scale the loss value
-            scaled_loss = loss * self.loss_scaler.loss_scale
-            scaled_loss.backward()
-
-            if (self.every_n_iters(runner, self.cumulative_iters)
-                    or self.is_last_iter(runner)):
-
-                # copy fp16 grads in the model to fp32 params in the optimizer
-                fp32_weights = []
-                for param_group in runner.optimizer.param_groups:
-                    fp32_weights += param_group['params']
-                self.copy_grads_to_fp32(runner.model, fp32_weights)
-                # allreduce grads
-                if self.distributed:
-                    allreduce_grads(fp32_weights, self.coalesce,
-                                    self.bucket_size_mb)
-
-                has_overflow = self.loss_scaler.has_overflow(fp32_weights)
-                # if has overflow, skip this iteration
-                if not has_overflow:
-                    # scale the gradients back
-                    for param in fp32_weights:
-                        if param.grad is not None:
-                            param.grad.div_(self.loss_scaler.loss_scale)
-                    if self.grad_clip is not None:
-                        grad_norm = self.clip_grads(fp32_weights)
-                        if grad_norm is not None:
-                            # Add grad norm to the logger
-                            runner.log_buffer.update(
-                                {'grad_norm': float(grad_norm)},
-                                runner.outputs['num_samples'])
-                    # update fp32 params
-                    runner.optimizer.step()
-                    # copy fp32 params to the fp16 model
-                    self.copy_params_to_fp16(runner.model, fp32_weights)
-                else:
-                    runner.logger.warning(
-                        'Check overflow, downscale loss scale '
-                        f'to {self.loss_scaler.cur_scale}')
-
-                self.loss_scaler.update_scale(has_overflow)
-
-                # save state_dict of loss_scaler
-                runner.meta.setdefault(
-                    'fp16', {})['loss_scaler'] = self.loss_scaler.state_dict()
-
-                # clear grads
-                runner.model.zero_grad()
-                runner.optimizer.zero_grad()
--- a/mmcv/runner/hooks/profiler.py
+++ b/mmcv/runner/hooks/profiler.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import os.path as osp
-import warnings
-from typing import Callable, List, Optional, Union
-
-import torch
-
-from ..dist_utils import master_only
-from .hook import HOOKS, Hook
-
-
-@HOOKS.register_module()
-class ProfilerHook(Hook):
-    """Profiler to analyze performance during training.
-
-    PyTorch Profiler is a tool that allows the collection of the performance
-    metrics during the training. More details on Profiler can be found at
-    https://pytorch.org/docs/1.8.1/profiler.html#torch.profiler.profile
-
-    Args:
-        by_epoch (bool): Profile performance by epoch or by iteration.
-            Default: True.
-        profile_iters (int): Number of iterations for profiling.
-            If ``by_epoch=True``, profile_iters indicates that they are the
-            first profile_iters epochs at the beginning of the
-            training, otherwise it indicates the first profile_iters
-            iterations. Default: 1.
-        activities (list[str]): List of activity groups (CPU, CUDA) to use in
-            profiling. Default: ['cpu', 'cuda'].
-        schedule (dict, optional): Config of generating the callable schedule.
-            if schedule is None, profiler will not add step markers into the
-            trace and table view. Default: None.
-        on_trace_ready (callable, dict): Either a handler or a dict of generate
-            handler. Default: None.
-        record_shapes (bool): Save information about operator's input shapes.
-            Default: False.
-        profile_memory (bool): Track tensor memory allocation/deallocation.
-            Default: False.
-        with_stack (bool): Record source information (file and line number)
-            for the ops. Default: False.
-        with_flops (bool): Use formula to estimate the FLOPS of specific
-            operators (matrix multiplication and 2D convolution).
-            Default: False.
-        json_trace_path (str, optional): Exports the collected trace in Chrome
-            JSON format. Default: None.
-
-    Example:
-        >>> runner = ... # instantiate a Runner
-        >>> # tensorboard trace
-        >>> trace_config = dict(type='tb_trace', dir_name='work_dir')
-        >>> profiler_config = dict(on_trace_ready=trace_config)
-        >>> runner.register_profiler_hook(profiler_config)
-        >>> runner.run(data_loaders=[trainloader], workflow=[('train', 1)])
-    """
-
-    def __init__(self,
-                 by_epoch: bool = True,
-                 profile_iters: int = 1,
-                 activities: List[str] = ['cpu', 'cuda'],
-                 schedule: Optional[dict] = None,
-                 on_trace_ready: Optional[Union[Callable, dict]] = None,
-                 record_shapes: bool = False,
-                 profile_memory: bool = False,
-                 with_stack: bool = False,
-                 with_flops: bool = False,
-                 json_trace_path: Optional[str] = None) -> None:
-        try:
-            from torch import profiler  # torch version >= 1.8.1
-        except ImportError:
-            raise ImportError('profiler is the new feature of torch1.8.1, '
-                              f'but your version is {torch.__version__}')
-
-        assert isinstance(by_epoch, bool), '``by_epoch`` should be a boolean.'
-        self.by_epoch = by_epoch
-
-        if profile_iters < 1:
-            raise ValueError('profile_iters should be greater than 0, but got '
-                             f'{profile_iters}')
-        self.profile_iters = profile_iters
-
-        if not isinstance(activities, list):
-            raise ValueError(
-                f'activities should be list, but got {type(activities)}')
-        self.activities = []
-        for activity in activities:
-            activity = activity.lower()
-            if activity == 'cpu':
-                self.activities.append(profiler.ProfilerActivity.CPU)
-            elif activity == 'cuda':
-                self.activities.append(profiler.ProfilerActivity.CUDA)
-            else:
-                raise ValueError(
-                    f'activity should be "cpu" or "cuda", but got {activity}')
-
-        if schedule is not None:
-            self.schedule = profiler.schedule(**schedule)
-        else:
-            self.schedule = None
-
-        self.on_trace_ready = on_trace_ready
-        self.record_shapes = record_shapes
-        self.profile_memory = profile_memory
-        self.with_stack = with_stack
-        self.with_flops = with_flops
-        self.json_trace_path = json_trace_path
-
-    @master_only
-    def before_run(self, runner):
-        if self.by_epoch and runner.max_epochs < self.profile_iters:
-            raise ValueError('self.profile_iters should not be greater than '
-                             f'{runner.max_epochs}')
-
-        if not self.by_epoch and runner.max_iters < self.profile_iters:
-            raise ValueError('self.profile_iters should not be greater than '
-                             f'{runner.max_iters}')
-
-        if callable(self.on_trace_ready):  # handler
-            _on_trace_ready = self.on_trace_ready
-        elif isinstance(self.on_trace_ready, dict):  # config of handler
-            trace_cfg = self.on_trace_ready.copy()
-            trace_type = trace_cfg.pop('type')  # log_trace handler
-            if trace_type == 'log_trace':
-
-                def _log_handler(prof):
-                    print(prof.key_averages().table(**trace_cfg))
-
-                _on_trace_ready = _log_handler
-            elif trace_type == 'tb_trace':  # tensorboard_trace handler
-                try:
-                    import torch_tb_profiler  # noqa: F401
-                except ImportError:
-                    raise ImportError('please run "pip install '
-                                      'torch-tb-profiler" to install '
-                                      'torch_tb_profiler')
-                if 'dir_name' not in trace_cfg:
-                    trace_cfg['dir_name'] = osp.join(runner.work_dir,
-                                                     'tf_tracing_logs')
-                elif not osp.isabs(trace_cfg['dir_name']):
-                    trace_cfg['dir_name'] = osp.join(runner.work_dir,
-                                                     trace_cfg['dir_name'])
-                runner.logger.info(
-                    'tracing files of ProfilerHook will be saved to '
-                    f"{trace_cfg['dir_name']}.")
-                _on_trace_ready = torch.profiler.tensorboard_trace_handler(
-                    **trace_cfg)
-            else:
-                raise ValueError('trace_type should be "log_trace" or '
-                                 f'"tb_trace", but got {trace_type}')
-        elif self.on_trace_ready is None:
-            _on_trace_ready = None  # type: ignore
-        else:
-            raise ValueError('on_trace_ready should be handler, dict or None, '
-                             f'but got {type(self.on_trace_ready)}')
-
-        if self.by_epoch and runner.max_epochs > 1:
-            warnings.warn(f'profiler will profile {runner.max_epochs} epochs '
-                          'instead of 1 epoch. Since profiler will slow down '
-                          'the training, it is recommended to train 1 epoch '
-                          'with ProfilerHook and adjust your setting according'
-                          ' to the profiler summary. During normal training '
-                          '(epoch > 1), you may disable the ProfilerHook.')
-
-        self.profiler = torch.profiler.profile(
-            activities=self.activities,
-            schedule=self.schedule,
-            on_trace_ready=_on_trace_ready,
-            record_shapes=self.record_shapes,
-            profile_memory=self.profile_memory,
-            with_stack=self.with_stack,
-            with_flops=self.with_flops)
-
-        self.profiler.__enter__()
-        runner.logger.info('profiler is profiling...')
-
-    @master_only
-    def after_train_epoch(self, runner):
-        if self.by_epoch and runner.epoch == self.profile_iters - 1:
-            runner.logger.info('profiler may take a few minutes...')
-            self.profiler.__exit__(None, None, None)
-            if self.json_trace_path is not None:
-                self.profiler.export_chrome_trace(self.json_trace_path)
-
-    @master_only
-    def after_train_iter(self, runner):
-        self.profiler.step()
-        if not self.by_epoch and runner.iter == self.profile_iters - 1:
-            runner.logger.info('profiler may take a few minutes...')
-            self.profiler.__exit__(None, None, None)
-            if self.json_trace_path is not None:
-                self.profiler.export_chrome_trace(self.json_trace_path)
--- a/mmcv/runner/hooks/sampler_seed.py
+++ b/mmcv/runner/hooks/sampler_seed.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from .hook import HOOKS, Hook
-
-
-@HOOKS.register_module()
-class DistSamplerSeedHook(Hook):
-    """Data-loading sampler for distributed training.
-
-    When distributed training, it is only useful in conjunction with
-    :obj:`EpochBasedRunner`, while :obj:`IterBasedRunner` achieves the same
-    purpose with :obj:`IterLoader`.
-    """
-
-    def before_epoch(self, runner):
-        if hasattr(runner.data_loader.sampler, 'set_epoch'):
-            # in case the data loader uses `SequentialSampler` in Pytorch
-            runner.data_loader.sampler.set_epoch(runner.epoch)
-        elif hasattr(runner.data_loader.batch_sampler.sampler, 'set_epoch'):
-            # batch sampler in pytorch warps the sampler as its attributes.
-            runner.data_loader.batch_sampler.sampler.set_epoch(runner.epoch)
--- a/mmcv/runner/hooks/sync_buffer.py
+++ b/mmcv/runner/hooks/sync_buffer.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from ..dist_utils import allreduce_params
-from .hook import HOOKS, Hook
-
-
-@HOOKS.register_module()
-class SyncBuffersHook(Hook):
-    """Synchronize model buffers such as running_mean and running_var in BN at
-    the end of each epoch.
-
-    Args:
-        distributed (bool): Whether distributed training is used. It is
-          effective only for distributed training. Defaults to True.
-    """
-
-    def __init__(self, distributed: bool = True):
-        self.distributed = distributed
-
-    def after_epoch(self, runner):
-        """All-reduce model buffers at the end of each epoch."""
-        if self.distributed:
-            allreduce_params(runner.model.buffers())