Unverified Commit 9185eee8 authored by Zaida Zhou's avatar Zaida Zhou Committed by GitHub
Browse files

Remove runner, parallel, engine and device (#2216)

* Remove runner, parallel, engine and device

* fix format

* remove outdated docs
parent 19a02415
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import platform
import shutil
import time
import warnings
from typing import Callable, Dict, List, Optional, Tuple, Union, no_type_check
import torch
from torch.optim import Optimizer
from torch.utils.data import DataLoader
import mmcv
from .base_runner import BaseRunner
from .builder import RUNNERS
from .checkpoint import save_checkpoint
from .hooks import IterTimerHook
from .utils import get_host_info
class IterLoader:
def __init__(self, dataloader: DataLoader):
self._dataloader = dataloader
self.iter_loader = iter(self._dataloader)
self._epoch = 0
@property
def epoch(self) -> int:
return self._epoch
def __next__(self):
try:
data = next(self.iter_loader)
except StopIteration:
self._epoch += 1
if hasattr(self._dataloader.sampler, 'set_epoch'):
self._dataloader.sampler.set_epoch(self._epoch)
time.sleep(2) # Prevent possible deadlock during epoch transition
self.iter_loader = iter(self._dataloader)
data = next(self.iter_loader)
return data
def __len__(self):
return len(self._dataloader)
@RUNNERS.register_module()
class IterBasedRunner(BaseRunner):
"""Iteration-based Runner.
This runner train models iteration by iteration.
"""
def train(self, data_loader, **kwargs):
self.model.train()
self.mode = 'train'
self.data_loader = data_loader
self._epoch = data_loader.epoch
data_batch = next(data_loader)
self.data_batch = data_batch
self.call_hook('before_train_iter')
outputs = self.model.train_step(data_batch, self.optimizer, **kwargs)
if not isinstance(outputs, dict):
raise TypeError('model.train_step() must return a dict')
if 'log_vars' in outputs:
self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
self.outputs = outputs
self.call_hook('after_train_iter')
del self.data_batch
self._inner_iter += 1
self._iter += 1
@torch.no_grad()
def val(self, data_loader, **kwargs):
self.model.eval()
self.mode = 'val'
self.data_loader = data_loader
data_batch = next(data_loader)
self.data_batch = data_batch
self.call_hook('before_val_iter')
outputs = self.model.val_step(data_batch, **kwargs)
if not isinstance(outputs, dict):
raise TypeError('model.val_step() must return a dict')
if 'log_vars' in outputs:
self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
self.outputs = outputs
self.call_hook('after_val_iter')
del self.data_batch
self._inner_iter += 1
def run(self,
data_loaders: List[DataLoader],
workflow: List[Tuple[str, int]],
max_iters: Optional[int] = None,
**kwargs) -> None:
"""Start running.
Args:
data_loaders (list[:obj:`DataLoader`]): Dataloaders for training
and validation.
workflow (list[tuple]): A list of (phase, iters) to specify the
running order and iterations. E.g, [('train', 10000),
('val', 1000)] means running 10000 iterations for training and
1000 iterations for validation, iteratively.
"""
assert isinstance(data_loaders, list)
assert mmcv.is_list_of(workflow, tuple)
assert len(data_loaders) == len(workflow)
if max_iters is not None:
warnings.warn(
'setting max_iters in run is deprecated, '
'please set max_iters in runner_config', DeprecationWarning)
self._max_iters = max_iters
assert self._max_iters is not None, (
'max_iters must be specified during instantiation')
work_dir = self.work_dir if self.work_dir is not None else 'NONE'
self.logger.info('Start running, host: %s, work_dir: %s',
get_host_info(), work_dir)
self.logger.info('Hooks will be executed in the following order:\n%s',
self.get_hook_info())
self.logger.info('workflow: %s, max: %d iters', workflow,
self._max_iters)
self.call_hook('before_run')
iter_loaders = [IterLoader(x) for x in data_loaders]
self.call_hook('before_epoch')
while self.iter < self._max_iters:
for i, flow in enumerate(workflow):
self._inner_iter = 0
mode, iters = flow
if not isinstance(mode, str) or not hasattr(self, mode):
raise ValueError(
'runner has no method named "{}" to run a workflow'.
format(mode))
iter_runner = getattr(self, mode)
for _ in range(iters):
if mode == 'train' and self.iter >= self._max_iters:
break
iter_runner(iter_loaders[i], **kwargs)
time.sleep(1) # wait for some hooks like loggers to finish
self.call_hook('after_epoch')
self.call_hook('after_run')
@no_type_check
def resume(self,
checkpoint: str,
resume_optimizer: bool = True,
map_location: Union[str, Callable] = 'default') -> None:
"""Resume model from checkpoint.
Args:
checkpoint (str): Checkpoint to resume from.
resume_optimizer (bool, optional): Whether resume the optimizer(s)
if the checkpoint file includes optimizer(s). Default to True.
map_location (str, optional): Same as :func:`torch.load`.
Default to 'default'.
"""
if map_location == 'default':
device_id = torch.cuda.current_device()
checkpoint = self.load_checkpoint(
checkpoint,
map_location=lambda storage, loc: storage.cuda(device_id))
else:
checkpoint = self.load_checkpoint(
checkpoint, map_location=map_location)
self._epoch = checkpoint['meta']['epoch']
self._iter = checkpoint['meta']['iter']
self._inner_iter = checkpoint['meta']['iter']
if 'optimizer' in checkpoint and resume_optimizer:
if isinstance(self.optimizer, Optimizer):
self.optimizer.load_state_dict(checkpoint['optimizer'])
elif isinstance(self.optimizer, dict):
for k in self.optimizer.keys():
self.optimizer[k].load_state_dict(
checkpoint['optimizer'][k])
else:
raise TypeError(
'Optimizer should be dict or torch.optim.Optimizer '
f'but got {type(self.optimizer)}')
self.logger.info(f'resumed from epoch: {self.epoch}, iter {self.iter}')
def save_checkpoint( # type: ignore
self,
out_dir: str,
filename_tmpl: str = 'iter_{}.pth',
meta: Optional[Dict] = None,
save_optimizer: bool = True,
create_symlink: bool = True) -> None:
"""Save checkpoint to file.
Args:
out_dir (str): Directory to save checkpoint files.
filename_tmpl (str, optional): Checkpoint file template.
Defaults to 'iter_{}.pth'.
meta (dict, optional): Metadata to be saved in checkpoint.
Defaults to None.
save_optimizer (bool, optional): Whether save optimizer.
Defaults to True.
create_symlink (bool, optional): Whether create symlink to the
latest checkpoint file. Defaults to True.
"""
if meta is None:
meta = {}
elif not isinstance(meta, dict):
raise TypeError(
f'meta should be a dict or None, but got {type(meta)}')
if self.meta is not None:
meta.update(self.meta)
# Note: meta.update(self.meta) should be done before
# meta.update(epoch=self.epoch + 1, iter=self.iter) otherwise
# there will be problems with resumed checkpoints.
# More details in https://github.com/open-mmlab/mmcv/pull/1108
meta.update(epoch=self.epoch + 1, iter=self.iter)
filename = filename_tmpl.format(self.iter + 1)
filepath = osp.join(out_dir, filename)
optimizer = self.optimizer if save_optimizer else None
save_checkpoint(self.model, filepath, optimizer=optimizer, meta=meta)
# in some environments, `os.symlink` is not supported, you may need to
# set `create_symlink` to False
if create_symlink:
dst_file = osp.join(out_dir, 'latest.pth')
if platform.system() != 'Windows':
mmcv.symlink(filename, dst_file)
else:
shutil.copy(filepath, dst_file)
def register_training_hooks(self,
lr_config,
optimizer_config=None,
checkpoint_config=None,
log_config=None,
momentum_config=None,
custom_hooks_config=None):
"""Register default hooks for iter-based training.
Checkpoint hook, optimizer stepper hook and logger hooks will be set to
`by_epoch=False` by default.
Default hooks include:
+----------------------+-------------------------+
| Hooks | Priority |
+======================+=========================+
| LrUpdaterHook | VERY_HIGH (10) |
+----------------------+-------------------------+
| MomentumUpdaterHook | HIGH (30) |
+----------------------+-------------------------+
| OptimizerStepperHook | ABOVE_NORMAL (40) |
+----------------------+-------------------------+
| CheckpointSaverHook | NORMAL (50) |
+----------------------+-------------------------+
| IterTimerHook | LOW (70) |
+----------------------+-------------------------+
| LoggerHook(s) | VERY_LOW (90) |
+----------------------+-------------------------+
| CustomHook(s) | defaults to NORMAL (50) |
+----------------------+-------------------------+
If custom hooks have same priority with default hooks, custom hooks
will be triggered after default hooks.
"""
if checkpoint_config is not None:
checkpoint_config.setdefault('by_epoch', False) # type: ignore
if lr_config is not None:
lr_config.setdefault('by_epoch', False) # type: ignore
if log_config is not None:
for info in log_config['hooks']:
info.setdefault('by_epoch', False)
super().register_training_hooks(
lr_config=lr_config,
momentum_config=momentum_config,
optimizer_config=optimizer_config,
checkpoint_config=checkpoint_config,
log_config=log_config,
timer_config=IterTimerHook(),
custom_hooks_config=custom_hooks_config)
# Copyright (c) OpenMMLab. All rights reserved.
from collections import OrderedDict
import numpy as np
class LogBuffer:
def __init__(self):
self.val_history = OrderedDict()
self.n_history = OrderedDict()
self.output = OrderedDict()
self.ready = False
def clear(self) -> None:
self.val_history.clear()
self.n_history.clear()
self.clear_output()
def clear_output(self) -> None:
self.output.clear()
self.ready = False
def update(self, vars: dict, count: int = 1) -> None:
assert isinstance(vars, dict)
for key, var in vars.items():
if key not in self.val_history:
self.val_history[key] = []
self.n_history[key] = []
self.val_history[key].append(var)
self.n_history[key].append(count)
def average(self, n: int = 0) -> None:
"""Average latest n values or all values."""
assert n >= 0
for key in self.val_history:
values = np.array(self.val_history[key][-n:])
nums = np.array(self.n_history[key][-n:])
avg = np.sum(values * nums) / np.sum(nums)
self.output[key] = avg
self.ready = True
# Copyright (c) OpenMMLab. All rights reserved.
from .builder import (OPTIMIZER_BUILDERS, OPTIMIZERS, build_optimizer,
build_optimizer_constructor)
from .default_constructor import DefaultOptimizerConstructor
__all__ = [
'OPTIMIZER_BUILDERS', 'OPTIMIZERS', 'DefaultOptimizerConstructor',
'build_optimizer', 'build_optimizer_constructor'
]
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import inspect
from typing import Dict, List
import torch
from ...utils import Registry, build_from_cfg
OPTIMIZERS = Registry('optimizer')
OPTIMIZER_BUILDERS = Registry('optimizer builder')
def register_torch_optimizers() -> List:
torch_optimizers = []
for module_name in dir(torch.optim):
if module_name.startswith('__'):
continue
_optim = getattr(torch.optim, module_name)
if inspect.isclass(_optim) and issubclass(_optim,
torch.optim.Optimizer):
OPTIMIZERS.register_module()(_optim)
torch_optimizers.append(module_name)
return torch_optimizers
TORCH_OPTIMIZERS = register_torch_optimizers()
def build_optimizer_constructor(cfg: Dict):
return build_from_cfg(cfg, OPTIMIZER_BUILDERS)
def build_optimizer(model, cfg: Dict):
optimizer_cfg = copy.deepcopy(cfg)
constructor_type = optimizer_cfg.pop('constructor',
'DefaultOptimizerConstructor')
paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None)
optim_constructor = build_optimizer_constructor(
dict(
type=constructor_type,
optimizer_cfg=optimizer_cfg,
paramwise_cfg=paramwise_cfg))
optimizer = optim_constructor(model)
return optimizer
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import Dict, List, Optional, Union
import torch
import torch.nn as nn
from torch.nn import GroupNorm, LayerNorm
from mmcv.utils import _BatchNorm, _InstanceNorm, build_from_cfg, is_list_of
from mmcv.utils.ext_loader import check_ops_exist
from .builder import OPTIMIZER_BUILDERS, OPTIMIZERS
@OPTIMIZER_BUILDERS.register_module()
class DefaultOptimizerConstructor:
"""Default constructor for optimizers.
By default each parameter share the same optimizer settings, and we
provide an argument ``paramwise_cfg`` to specify parameter-wise settings.
It is a dict and may contain the following fields:
- ``custom_keys`` (dict): Specified parameters-wise settings by keys. If
one of the keys in ``custom_keys`` is a substring of the name of one
parameter, then the setting of the parameter will be specified by
``custom_keys[key]`` and other setting like ``bias_lr_mult`` etc. will
be ignored. It should be noted that the aforementioned ``key`` is the
longest key that is a substring of the name of the parameter. If there
are multiple matched keys with the same length, then the key with lower
alphabet order will be chosen.
``custom_keys[key]`` should be a dict and may contain fields ``lr_mult``
and ``decay_mult``. See Example 2 below.
- ``bias_lr_mult`` (float): It will be multiplied to the learning
rate for all bias parameters (except for those in normalization
layers and offset layers of DCN).
- ``bias_decay_mult`` (float): It will be multiplied to the weight
decay for all bias parameters (except for those in
normalization layers, depthwise conv layers, offset layers of DCN).
- ``norm_decay_mult`` (float): It will be multiplied to the weight
decay for all weight and bias parameters of normalization
layers.
- ``dwconv_decay_mult`` (float): It will be multiplied to the weight
decay for all weight and bias parameters of depthwise conv
layers.
- ``dcn_offset_lr_mult`` (float): It will be multiplied to the learning
rate for parameters of offset layer in the deformable convs
of a model.
- ``bypass_duplicate`` (bool): If true, the duplicate parameters
would not be added into optimizer. Default: False.
Note:
1. If the option ``dcn_offset_lr_mult`` is used, the constructor will
override the effect of ``bias_lr_mult`` in the bias of offset layer.
So be careful when using both ``bias_lr_mult`` and
``dcn_offset_lr_mult``. If you wish to apply both of them to the offset
layer in deformable convs, set ``dcn_offset_lr_mult`` to the original
``dcn_offset_lr_mult`` * ``bias_lr_mult``.
2. If the option ``dcn_offset_lr_mult`` is used, the constructor will
apply it to all the DCN layers in the model. So be careful when the
model contains multiple DCN layers in places other than backbone.
Args:
model (:obj:`nn.Module`): The model with parameters to be optimized.
optimizer_cfg (dict): The config dict of the optimizer.
Positional fields are
- `type`: class name of the optimizer.
Optional fields are
- any arguments of the corresponding optimizer type, e.g.,
lr, weight_decay, momentum, etc.
paramwise_cfg (dict, optional): Parameter-wise options.
Example 1:
>>> model = torch.nn.modules.Conv1d(1, 1, 1)
>>> optimizer_cfg = dict(type='SGD', lr=0.01, momentum=0.9,
>>> weight_decay=0.0001)
>>> paramwise_cfg = dict(norm_decay_mult=0.)
>>> optim_builder = DefaultOptimizerConstructor(
>>> optimizer_cfg, paramwise_cfg)
>>> optimizer = optim_builder(model)
Example 2:
>>> # assume model have attribute model.backbone and model.cls_head
>>> optimizer_cfg = dict(type='SGD', lr=0.01, weight_decay=0.95)
>>> paramwise_cfg = dict(custom_keys={
'backbone': dict(lr_mult=0.1, decay_mult=0.9)})
>>> optim_builder = DefaultOptimizerConstructor(
>>> optimizer_cfg, paramwise_cfg)
>>> optimizer = optim_builder(model)
>>> # Then the `lr` and `weight_decay` for model.backbone is
>>> # (0.01 * 0.1, 0.95 * 0.9). `lr` and `weight_decay` for
>>> # model.cls_head is (0.01, 0.95).
"""
def __init__(self,
optimizer_cfg: Dict,
paramwise_cfg: Optional[Dict] = None):
if not isinstance(optimizer_cfg, dict):
raise TypeError('optimizer_cfg should be a dict',
f'but got {type(optimizer_cfg)}')
self.optimizer_cfg = optimizer_cfg
self.paramwise_cfg = {} if paramwise_cfg is None else paramwise_cfg
self.base_lr = optimizer_cfg.get('lr', None)
self.base_wd = optimizer_cfg.get('weight_decay', None)
self._validate_cfg()
def _validate_cfg(self) -> None:
if not isinstance(self.paramwise_cfg, dict):
raise TypeError('paramwise_cfg should be None or a dict, '
f'but got {type(self.paramwise_cfg)}')
if 'custom_keys' in self.paramwise_cfg:
if not isinstance(self.paramwise_cfg['custom_keys'], dict):
raise TypeError(
'If specified, custom_keys must be a dict, '
f'but got {type(self.paramwise_cfg["custom_keys"])}')
if self.base_wd is None:
for key in self.paramwise_cfg['custom_keys']:
if 'decay_mult' in self.paramwise_cfg['custom_keys'][key]:
raise ValueError('base_wd should not be None')
# get base lr and weight decay
# weight_decay must be explicitly specified if mult is specified
if ('bias_decay_mult' in self.paramwise_cfg
or 'norm_decay_mult' in self.paramwise_cfg
or 'dwconv_decay_mult' in self.paramwise_cfg):
if self.base_wd is None:
raise ValueError('base_wd should not be None')
def _is_in(self, param_group: Dict, param_group_list: List) -> bool:
assert is_list_of(param_group_list, dict)
param = set(param_group['params'])
param_set = set()
for group in param_group_list:
param_set.update(set(group['params']))
return not param.isdisjoint(param_set)
def add_params(self,
params: List[Dict],
module: nn.Module,
prefix: str = '',
is_dcn_module: Union[int, float, None] = None) -> None:
"""Add all parameters of module to the params list.
The parameters of the given module will be added to the list of param
groups, with specific rules defined by paramwise_cfg.
Args:
params (list[dict]): A list of param groups, it will be modified
in place.
module (nn.Module): The module to be added.
prefix (str): The prefix of the module
is_dcn_module (int|float|None): If the current module is a
submodule of DCN, `is_dcn_module` will be passed to
control conv_offset layer's learning rate. Defaults to None.
"""
# get param-wise options
custom_keys = self.paramwise_cfg.get('custom_keys', {})
# first sort with alphabet order and then sort with reversed len of str
sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True)
bias_lr_mult = self.paramwise_cfg.get('bias_lr_mult', 1.)
bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', 1.)
norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', 1.)
dwconv_decay_mult = self.paramwise_cfg.get('dwconv_decay_mult', 1.)
bypass_duplicate = self.paramwise_cfg.get('bypass_duplicate', False)
dcn_offset_lr_mult = self.paramwise_cfg.get('dcn_offset_lr_mult', 1.)
# special rules for norm layers and depth-wise conv layers
is_norm = isinstance(module,
(_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm))
is_dwconv = (
isinstance(module, torch.nn.Conv2d)
and module.in_channels == module.groups)
for name, param in module.named_parameters(recurse=False):
param_group = {'params': [param]}
if not param.requires_grad:
params.append(param_group)
continue
if bypass_duplicate and self._is_in(param_group, params):
warnings.warn(f'{prefix} is duplicate. It is skipped since '
f'bypass_duplicate={bypass_duplicate}')
continue
# if the parameter match one of the custom keys, ignore other rules
is_custom = False
for key in sorted_keys:
if key in f'{prefix}.{name}':
is_custom = True
lr_mult = custom_keys[key].get('lr_mult', 1.)
param_group['lr'] = self.base_lr * lr_mult
if self.base_wd is not None:
decay_mult = custom_keys[key].get('decay_mult', 1.)
param_group['weight_decay'] = self.base_wd * decay_mult
break
if not is_custom:
# bias_lr_mult affects all bias parameters
# except for norm.bias dcn.conv_offset.bias
if name == 'bias' and not (is_norm or is_dcn_module):
param_group['lr'] = self.base_lr * bias_lr_mult
if (prefix.find('conv_offset') != -1 and is_dcn_module
and isinstance(module, torch.nn.Conv2d)):
# deal with both dcn_offset's bias & weight
param_group['lr'] = self.base_lr * dcn_offset_lr_mult
# apply weight decay policies
if self.base_wd is not None:
# norm decay
if is_norm:
param_group[
'weight_decay'] = self.base_wd * norm_decay_mult
# depth-wise conv
elif is_dwconv:
param_group[
'weight_decay'] = self.base_wd * dwconv_decay_mult
# bias lr and decay
elif name == 'bias' and not is_dcn_module:
# TODO: current bias_decay_mult will have affect on DCN
param_group[
'weight_decay'] = self.base_wd * bias_decay_mult
params.append(param_group)
if check_ops_exist():
from mmcv.ops import DeformConv2d, ModulatedDeformConv2d
is_dcn_module = isinstance(module,
(DeformConv2d, ModulatedDeformConv2d))
else:
is_dcn_module = False
for child_name, child_mod in module.named_children():
child_prefix = f'{prefix}.{child_name}' if prefix else child_name
self.add_params(
params,
child_mod,
prefix=child_prefix,
is_dcn_module=is_dcn_module)
def __call__(self, model: nn.Module):
if hasattr(model, 'module'):
model = model.module
optimizer_cfg = self.optimizer_cfg.copy()
# if no paramwise option is specified, just use the global setting
if not self.paramwise_cfg:
optimizer_cfg['params'] = model.parameters()
return build_from_cfg(optimizer_cfg, OPTIMIZERS)
# set param-wise lr and weight decay recursively
params: List[Dict] = []
self.add_params(params, model)
optimizer_cfg['params'] = params
return build_from_cfg(optimizer_cfg, OPTIMIZERS)
# Copyright (c) OpenMMLab. All rights reserved.
from enum import Enum
from typing import Union
class Priority(Enum):
"""Hook priority levels.
+--------------+------------+
| Level | Value |
+==============+============+
| HIGHEST | 0 |
+--------------+------------+
| VERY_HIGH | 10 |
+--------------+------------+
| HIGH | 30 |
+--------------+------------+
| ABOVE_NORMAL | 40 |
+--------------+------------+
| NORMAL | 50 |
+--------------+------------+
| BELOW_NORMAL | 60 |
+--------------+------------+
| LOW | 70 |
+--------------+------------+
| VERY_LOW | 90 |
+--------------+------------+
| LOWEST | 100 |
+--------------+------------+
"""
HIGHEST = 0
VERY_HIGH = 10
HIGH = 30
ABOVE_NORMAL = 40
NORMAL = 50
BELOW_NORMAL = 60
LOW = 70
VERY_LOW = 90
LOWEST = 100
def get_priority(priority: Union[int, str, Priority]) -> int:
"""Get priority value.
Args:
priority (int or str or :obj:`Priority`): Priority.
Returns:
int: The priority value.
"""
if isinstance(priority, int):
if priority < 0 or priority > 100:
raise ValueError('priority must be between 0 and 100')
return priority
elif isinstance(priority, Priority):
return priority.value
elif isinstance(priority, str):
return Priority[priority.upper()].value
else:
raise TypeError('priority must be an integer or Priority enum value')
# Copyright (c) OpenMMLab. All rights reserved.
import os
import random
import sys
import time
import warnings
from getpass import getuser
from socket import gethostname
from types import ModuleType
from typing import Optional
import numpy as np
import torch
import mmcv
def get_host_info() -> str:
"""Get hostname and username.
Return empty string if exception raised, e.g. ``getpass.getuser()`` will
lead to error in docker container
"""
host = ''
try:
host = f'{getuser()}@{gethostname()}'
except Exception as e:
warnings.warn(f'Host or user not found: {str(e)}')
finally:
return host
def get_time_str() -> str:
return time.strftime('%Y%m%d_%H%M%S', time.localtime())
def obj_from_dict(info: dict,
parent: Optional[ModuleType] = None,
default_args: Optional[dict] = None):
"""Initialize an object from dict.
The dict must contain the key "type", which indicates the object type, it
can be either a string or type, such as "list" or ``list``. Remaining
fields are treated as the arguments for constructing the object.
Args:
info (dict): Object types and arguments.
parent (:class:`module`): Module which may containing expected object
classes.
default_args (dict, optional): Default arguments for initializing the
object.
Returns:
any type: Object built from the dict.
"""
assert isinstance(info, dict) and 'type' in info
assert isinstance(default_args, dict) or default_args is None
args = info.copy()
obj_type = args.pop('type')
if mmcv.is_str(obj_type):
if parent is not None:
obj_type = getattr(parent, obj_type)
else:
obj_type = sys.modules[obj_type]
elif not isinstance(obj_type, type):
raise TypeError('type must be a str or valid type, but '
f'got {type(obj_type)}')
if default_args is not None:
for name, value in default_args.items():
args.setdefault(name, value)
return obj_type(**args)
def set_random_seed(seed: int,
deterministic: bool = False,
use_rank_shift: bool = False) -> None:
"""Set random seed.
Args:
seed (int): Seed to be used.
deterministic (bool): Whether to set the deterministic option for
CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
to True and `torch.backends.cudnn.benchmark` to False.
Default: False.
rank_shift (bool): Whether to add rank number to the random seed to
have different random seed in different threads. Default: False.
"""
if use_rank_shift:
rank, _ = mmcv.runner.get_dist_info()
seed += rank
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
if deterministic:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
...@@ -39,7 +39,6 @@ else: ...@@ -39,7 +39,6 @@ else:
from .device_type import (IS_IPU_AVAILABLE, IS_MLU_AVAILABLE, from .device_type import (IS_IPU_AVAILABLE, IS_MLU_AVAILABLE,
IS_MPS_AVAILABLE) IS_MPS_AVAILABLE)
from .env import collect_env from .env import collect_env
from .hub import load_url
from .logging import get_logger, print_log from .logging import get_logger, print_log
from .parrots_jit import jit, skip_no_elena from .parrots_jit import jit, skip_no_elena
# yapf: disable # yapf: disable
...@@ -75,7 +74,7 @@ else: ...@@ -75,7 +74,7 @@ else:
'assert_dict_has_keys', 'assert_keys_equal', 'assert_is_norm_layer', 'assert_dict_has_keys', 'assert_keys_equal', 'assert_is_norm_layer',
'assert_params_all_zeros', 'check_python_script', 'assert_params_all_zeros', 'check_python_script',
'is_method_overridden', 'is_jit_tracing', 'is_rocm_pytorch', 'is_method_overridden', 'is_jit_tracing', 'is_rocm_pytorch',
'_get_cuda_home', 'load_url', 'has_method', 'IS_CUDA_AVAILABLE', '_get_cuda_home', 'has_method', 'IS_CUDA_AVAILABLE', 'worker_init_fn',
'worker_init_fn', 'IS_MLU_AVAILABLE', 'IS_IPU_AVAILABLE', 'IS_MLU_AVAILABLE', 'IS_IPU_AVAILABLE', 'IS_MPS_AVAILABLE',
'IS_MPS_AVAILABLE', 'torch_meshgrid' 'torch_meshgrid'
] ]
# Copyright (c) OpenMMLab. All rights reserved.
# The 1.6 release of PyTorch switched torch.save to use a new zipfile-based
# file format. It will cause RuntimeError when a checkpoint was saved in
# torch >= 1.6.0 but loaded in torch < 1.7.0.
# More details at https://github.com/open-mmlab/mmpose/issues/904
from .parrots_wrapper import TORCH_VERSION
from .path import mkdir_or_exist
from .version_utils import digit_version
if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) < digit_version(
'1.7.0'):
# Modified from https://github.com/pytorch/pytorch/blob/master/torch/hub.py
import os
import sys
import warnings
import zipfile
from urllib.parse import urlparse
import torch
from torch.hub import HASH_REGEX, _get_torch_home, download_url_to_file
# Hub used to support automatically extracts from zipfile manually
# compressed by users. The legacy zip format expects only one file from
# torch.save() < 1.6 in the zip. We should remove this support since
# zipfile is now default zipfile format for torch.save().
def _is_legacy_zip_format(filename):
if zipfile.is_zipfile(filename):
infolist = zipfile.ZipFile(filename).infolist()
return len(infolist) == 1 and not infolist[0].is_dir()
return False
def _legacy_zip_load(filename, model_dir, map_location):
warnings.warn(
'Falling back to the old format < 1.6. This support will'
' be deprecated in favor of default zipfile format '
'introduced in 1.6. Please redo torch.save() to save it '
'in the new zipfile format.', DeprecationWarning)
# Note: extractall() defaults to overwrite file if exists. No need to
# clean up beforehand. We deliberately don't handle tarfile here
# since our legacy serialization format was in tar.
# E.g. resnet18-5c106cde.pth which is widely used.
with zipfile.ZipFile(filename) as f:
members = f.infolist()
if len(members) != 1:
raise RuntimeError(
'Only one file(not dir) is allowed in the zipfile')
f.extractall(model_dir)
extraced_name = members[0].filename
extracted_file = os.path.join(model_dir, extraced_name)
return torch.load(extracted_file, map_location=map_location)
def load_url(url,
model_dir=None,
map_location=None,
progress=True,
check_hash=False,
file_name=None):
r"""Loads the Torch serialized object at the given URL.
If downloaded file is a zip file, it will be automatically decompressed
If the object is already present in `model_dir`, it's deserialized and
returned.
The default value of ``model_dir`` is ``<hub_dir>/checkpoints`` where
``hub_dir`` is the directory returned by :func:`~torch.hub.get_dir`.
Args:
url (str): URL of the object to download
model_dir (str, optional): directory in which to save the object
map_location (optional): a function or a dict specifying how to
remap storage locations (see torch.load)
progress (bool, optional): whether or not to display a progress bar
to stderr. Default: True
check_hash(bool, optional): If True, the filename part of the URL
should follow the naming convention ``filename-<sha256>.ext``
where ``<sha256>`` is the first eight or more digits of the
SHA256 hash of the contents of the file. The hash is used to
ensure unique names and to verify the contents of the file.
Default: False
file_name (str, optional): name for the downloaded file. Filename
from ``url`` will be used if not set. Default: None.
Example:
>>> url = ('https://s3.amazonaws.com/pytorch/models/resnet18-5c106'
... 'cde.pth')
>>> state_dict = torch.hub.load_state_dict_from_url(url)
"""
# Issue warning to move data if old env is set
if os.getenv('TORCH_MODEL_ZOO'):
warnings.warn(
'TORCH_MODEL_ZOO is deprecated, please use env '
'TORCH_HOME instead', DeprecationWarning)
if model_dir is None:
torch_home = _get_torch_home()
model_dir = os.path.join(torch_home, 'checkpoints')
mkdir_or_exist(model_dir)
parts = urlparse(url)
filename = os.path.basename(parts.path)
if file_name is not None:
filename = file_name
cached_file = os.path.join(model_dir, filename)
if not os.path.exists(cached_file):
sys.stderr.write('Downloading: "{}" to {}\n'.format(
url, cached_file))
hash_prefix = None
if check_hash:
r = HASH_REGEX.search(filename) # r is Optional[Match[str]]
hash_prefix = r.group(1) if r else None
download_url_to_file(
url, cached_file, hash_prefix, progress=progress)
if _is_legacy_zip_format(cached_file):
return _legacy_zip_load(cached_file, model_dir, map_location)
try:
return torch.load(cached_file, map_location=map_location)
except RuntimeError as error:
if digit_version(TORCH_VERSION) < digit_version('1.5.0'):
warnings.warn(
f'If the error is the same as "{cached_file} is a zip '
'archive (did you mean to use torch.jit.load()?)", you can'
' upgrade your torch to 1.5.0 or higher (current torch '
f'version is {TORCH_VERSION}). The error was raised '
' because the checkpoint was saved in torch>=1.6.0 but '
'loaded in torch<1.5.')
raise error
else:
from torch.utils.model_zoo import load_url # type: ignore # noqa: F401
{
"train_old": "train",
"test_old": "test"
}
\ No newline at end of file
{
"test": "test.pth",
"val": "val.pth",
"train_empty": "train.pth"
}
\ No newline at end of file
{
"train": "https://localhost/train.pth",
"test": "https://localhost/test.pth"
}
\ No newline at end of file
{
"alexnet": "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth",
"densenet121": "https://download.pytorch.org/models/densenet121-a639ec97.pth",
"densenet169": "https://download.pytorch.org/models/densenet169-b2777c0a.pth",
"densenet201": "https://download.pytorch.org/models/densenet201-c1103571.pth",
"densenet161": "https://download.pytorch.org/models/densenet161-8d451a50.pth",
"efficientnet_b0": "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth",
"efficientnet_b1": "https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth",
"efficientnet_b2": "https://download.pytorch.org/models/efficientnet_b2_rwightman-bcdf34b7.pth",
"efficientnet_b3": "https://download.pytorch.org/models/efficientnet_b3_rwightman-cf984f9c.pth",
"efficientnet_b4": "https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth",
"efficientnet_b5": "https://download.pytorch.org/models/efficientnet_b5_lukemelas-b6417697.pth",
"efficientnet_b6": "https://download.pytorch.org/models/efficientnet_b6_lukemelas-c76e70fd.pth",
"efficientnet_b7": "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth",
"googlenet": "https://download.pytorch.org/models/googlenet-1378be20.pth",
"inception_v3_google": "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth",
"mobilenet_v2": "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth",
"mobilenet_v3_large": "https://download.pytorch.org/models/mobilenet_v3_large-8738ca79.pth",
"mobilenet_v3_small": "https://download.pytorch.org/models/mobilenet_v3_small-047dcff4.pth",
"regnet_y_400mf": "https://download.pytorch.org/models/regnet_y_400mf-c65dace8.pth",
"regnet_y_800mf": "https://download.pytorch.org/models/regnet_y_800mf-1b27b58c.pth",
"regnet_y_1_6gf": "https://download.pytorch.org/models/regnet_y_1_6gf-b11a554e.pth",
"regnet_y_3_2gf": "https://download.pytorch.org/models/regnet_y_3_2gf-b5a9779c.pth",
"regnet_y_8gf": "https://download.pytorch.org/models/regnet_y_8gf-d0d0e4a8.pth",
"regnet_y_16gf": "https://download.pytorch.org/models/regnet_y_16gf-9e6ed7dd.pth",
"regnet_y_32gf": "https://download.pytorch.org/models/regnet_y_32gf-4dee3f7a.pth",
"regnet_x_400mf": "https://download.pytorch.org/models/regnet_x_400mf-adf1edd5.pth",
"regnet_x_800mf": "https://download.pytorch.org/models/regnet_x_800mf-ad17e45c.pth",
"regnet_x_1_6gf": "https://download.pytorch.org/models/regnet_x_1_6gf-e3633e7f.pth",
"regnet_x_3_2gf": "https://download.pytorch.org/models/regnet_x_3_2gf-f342aeae.pth",
"regnet_x_8gf": "https://download.pytorch.org/models/regnet_x_8gf-03ceed89.pth",
"regnet_x_16gf": "https://download.pytorch.org/models/regnet_x_16gf-2007eb11.pth",
"regnet_x_32gf": "https://download.pytorch.org/models/regnet_x_32gf-9d47f8d0.pth",
"resnet18": "https://download.pytorch.org/models/resnet18-f37072fd.pth",
"resnet34": "https://download.pytorch.org/models/resnet34-b627a593.pth",
"resnet50": "https://download.pytorch.org/models/resnet50-0676ba61.pth",
"resnet101": "https://download.pytorch.org/models/resnet101-63fe2227.pth",
"resnet152": "https://download.pytorch.org/models/resnet152-394f9c45.pth",
"resnext50_32x4d": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
"resnext101_32x8d": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
"wide_resnet50_2": "https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth",
"wide_resnet101_2": "https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth",
"shufflenetv2_x0.5": "https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth",
"shufflenetv2_x1.0": "https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth",
"shufflenetv2_x1.5": null,
"shufflenetv2_x2.0": null,
"squeezenet1_0": "https://download.pytorch.org/models/squeezenet1_0-b66bff10.pth",
"squeezenet1_1": "https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth",
"vgg11": "https://download.pytorch.org/models/vgg11-8a719046.pth",
"vgg13": "https://download.pytorch.org/models/vgg13-19584684.pth",
"vgg16": "https://download.pytorch.org/models/vgg16-397923af.pth",
"vgg19": "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth",
"vgg11_bn": "https://download.pytorch.org/models/vgg11_bn-6002323d.pth",
"vgg13_bn": "https://download.pytorch.org/models/vgg13_bn-abd245e5.pth",
"vgg16_bn": "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth",
"vgg19_bn": "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth"
}
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.device import get_device
from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_MPS_AVAILABLE
def test_get_device():
current_device = get_device()
if IS_CUDA_AVAILABLE:
assert current_device == 'cuda'
elif IS_MLU_AVAILABLE:
assert current_device == 'mlu'
elif IS_MPS_AVAILABLE:
assert current_device == 'mps'
else:
assert current_device == 'cpu'
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from mmcv.device._functions import Scatter, scatter
from mmcv.utils import IS_MLU_AVAILABLE, IS_MPS_AVAILABLE
def test_scatter():
# if the device is CPU, just return the input
input = torch.zeros([1, 3, 3, 3])
output = scatter(input=input, devices=[-1])
assert torch.allclose(input, output)
inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])]
outputs = scatter(input=inputs, devices=[-1])
for input, output in zip(inputs, outputs):
assert torch.allclose(input, output)
# if the device is MLU, copy the input from CPU to MLU
if IS_MLU_AVAILABLE:
input = torch.zeros([1, 3, 3, 3])
output = scatter(input=input, devices=[0])
assert torch.allclose(input.to('mlu'), output)
inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])]
outputs = scatter(input=inputs, devices=[0])
for input, output in zip(inputs, outputs):
assert torch.allclose(input.to('mlu'), output)
# if the device is MPS, copy the input from CPU to MPS
if IS_MPS_AVAILABLE:
input = torch.zeros([1, 3, 3, 3])
output = scatter(input=input, devices=[0])
assert torch.allclose(input.to('mps'), output)
inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])]
outputs = scatter(input=inputs, devices=[0])
for input, output in zip(inputs, outputs):
assert torch.allclose(input.to('mps'), output)
# input should be a tensor or list of tensor
with pytest.raises(Exception):
scatter(5, [-1])
def test_Scatter():
# if the device is CPU, just return the input
target_devices = [-1]
input = torch.zeros([1, 3, 3, 3])
outputs = Scatter.forward(target_devices, input)
assert isinstance(outputs, tuple)
assert torch.allclose(input, outputs[0])
target_devices = [-1]
inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])]
outputs = Scatter.forward(target_devices, inputs)
assert isinstance(outputs, tuple)
for input, output in zip(inputs, outputs):
assert torch.allclose(input, output)
# if the device is MLU, copy the input from CPU to MLU
if IS_MLU_AVAILABLE:
target_devices = [0]
input = torch.zeros([1, 3, 3, 3])
outputs = Scatter.forward(target_devices, input)
assert isinstance(outputs, tuple)
assert torch.allclose(input.to('mlu'), outputs[0])
target_devices = [0]
inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])]
outputs = Scatter.forward(target_devices, inputs)
assert isinstance(outputs, tuple)
for input, output in zip(inputs, outputs):
assert torch.allclose(input.to('mlu'), output[0])
# if the device is MPS, copy the input from CPU to MPS
if IS_MPS_AVAILABLE:
target_devices = [0]
input = torch.zeros([1, 3, 3, 3])
outputs = Scatter.forward(target_devices, input)
assert isinstance(outputs, tuple)
assert torch.allclose(input.to('mps'), outputs[0])
target_devices = [0]
inputs = [torch.zeros([1, 3, 3, 3]), torch.zeros([1, 4, 4, 4])]
outputs = Scatter.forward(target_devices, inputs)
assert isinstance(outputs, tuple)
for input, output in zip(inputs, outputs):
assert torch.allclose(input.to('mps'), output[0])
# Copyright (c) OpenMMLab. All rights reserved.
import logging
import numpy as np
import pytest
import torch
from mmcv.parallel.data_container import DataContainer
from mmcv.utils import IS_IPU_AVAILABLE
if IS_IPU_AVAILABLE:
from mmcv.device.ipu.hierarchical_data_manager import \
HierarchicalDataManager
skip_no_ipu = pytest.mark.skipif(
not IS_IPU_AVAILABLE, reason='test case under ipu environment')
@skip_no_ipu
def test_HierarchicalData():
# test hierarchical data
hierarchical_data_sample = {
'a': torch.rand(3, 4),
'b': np.random.rand(3, 4),
'c': DataContainer({
'a': torch.rand(3, 4),
'b': 4,
'c': 'd'
}),
'd': 123,
'e': [1, 3, torch.rand(3, 4),
np.random.rand(3, 4)],
'f': {
'a': torch.rand(3, 4),
'b': np.random.rand(3, 4),
'c': [1, 'asd']
}
}
all_tensors = []
all_tensors.append(hierarchical_data_sample['a'])
all_tensors.append(hierarchical_data_sample['c'].data['a'])
all_tensors.append(hierarchical_data_sample['e'][2])
all_tensors.append(hierarchical_data_sample['f']['a'])
all_tensors_id = [id(ele) for ele in all_tensors]
hd = HierarchicalDataManager(logging.getLogger())
hd.record_hierarchical_data(hierarchical_data_sample)
tensors = hd.collect_all_tensors()
for t in tensors:
assert id(t) in all_tensors_id
tensors[0].add_(1)
hd.update_all_tensors(tensors)
data = hd.hierarchical_data
data['c'].data['a'].sub_(1)
hd.record_hierarchical_data(data)
tensors = hd.collect_all_tensors()
for t in tensors:
assert id(t) in all_tensors_id
hd.quick()
with pytest.raises(
AssertionError,
match='original hierarchical data is not torch.tensor'):
hd.record_hierarchical_data(torch.rand(3, 4))
class AuxClass:
pass
with pytest.raises(NotImplementedError, match='not supported datatype:'):
hd.record_hierarchical_data(AuxClass())
with pytest.raises(NotImplementedError, match='not supported datatype:'):
hierarchical_data_sample['a'] = AuxClass()
hd.update_all_tensors(tensors)
with pytest.raises(NotImplementedError, match='not supported datatype:'):
hierarchical_data_sample['a'] = AuxClass()
hd.collect_all_tensors()
with pytest.raises(NotImplementedError, match='not supported datatype:'):
hierarchical_data_sample['a'] = AuxClass()
hd.clean_all_tensors()
hd = HierarchicalDataManager(logging.getLogger())
hd.record_hierarchical_data(hierarchical_data_sample)
hierarchical_data_sample['a'] = torch.rand(3, 4)
with pytest.raises(ValueError, match='all data except torch.Tensor'):
new_hierarchical_data_sample = {
**hierarchical_data_sample, 'b': np.random.rand(3, 4)
}
hd.update_hierarchical_data(new_hierarchical_data_sample)
hd.update_hierarchical_data(new_hierarchical_data_sample, strict=False)
hd.clean_all_tensors()
# test single tensor
single_tensor = torch.rand(3, 4)
hd = HierarchicalDataManager(logging.getLogger())
hd.record_hierarchical_data(single_tensor)
tensors = hd.collect_all_tensors()
assert len(tensors) == 1 and single_tensor in tensors
single_tensor_to_update = [torch.rand(3, 4)]
hd.update_all_tensors(single_tensor_to_update)
new_tensors = hd.collect_all_tensors()
assert new_tensors == single_tensor_to_update
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import pytest
import torch
from torch.utils.data import Dataset
from mmcv.parallel.data_container import DataContainer
from mmcv.utils import IS_IPU_AVAILABLE
if IS_IPU_AVAILABLE:
from mmcv.device.ipu import IPUDataLoader, cfg2options
from mmcv.device.ipu.dataloader import collate
skip_no_ipu = pytest.mark.skipif(
not IS_IPU_AVAILABLE, reason='test case under ipu environment')
class ToyDataset(Dataset):
def __getitem__(self, index):
return 111
def __len__(self, ):
return 3
@skip_no_ipu
def test_ipu_dataloader():
# test lazy initialization
dataloader = IPUDataLoader(
ToyDataset(), None, batch_size=256, num_workers=1, mode='async')
options_cfg = {'train_cfg': {}, 'eval_cfg': {}}
ipu_options = cfg2options(options_cfg)
dataloader.init(ipu_options['training'])
# test normal initialization
options_cfg = {'train_cfg': {}, 'eval_cfg': {}}
ipu_options = cfg2options(options_cfg)['training']
dataloader = IPUDataLoader(
ToyDataset(), ipu_options, batch_size=256, num_workers=1, mode='async')
@skip_no_ipu
def test_ipu_collate():
with pytest.raises(TypeError, match='`batch` should be a sequence'):
collate(123)
with pytest.raises(TypeError, match='DataContainer is not supported'):
collate([DataContainer(666)])
data_list = [[1, 2, 3], [2, 3, 4], DataContainer(666)]
batch0 = {
'tensor': torch.rand(3, 4, 5),
'arr': np.random.rand(3, 4, 5, 6),
'data_list': data_list
}
batch1 = {
'tensor': torch.rand(3, 4, 5),
'arr': np.random.rand(3, 4, 5, 6),
'data_list': data_list
}
batch = [batch1, batch0]
results = collate(batch)
assert results['tensor'].shape == (2, 3, 4, 5)
assert results['arr'].shape == (2, 3, 4, 5, 6)
for data in results['data_list']:
for tensor in data:
assert not isinstance(tensor, DataContainer)
assert tensor.shape == (2, )
# Copyright (c) OpenMMLab. All rights reserved.
import logging
import os.path as osp
import pytest
import torch
import torch.nn as nn
from mmcv.runner import build_runner
from mmcv.runner.fp16_utils import auto_fp16
from mmcv.utils import IS_IPU_AVAILABLE
if IS_IPU_AVAILABLE:
from mmcv.device.ipu.hook_wrapper import IPUFp16OptimizerHook
skip_no_ipu = pytest.mark.skipif(
not IS_IPU_AVAILABLE, reason='test case under ipu environment')
# TODO Once the model training and inference interfaces
# of MMCLS and MMDET are unified,
# construct the model according to the unified standards
class ToyModel(nn.Module):
def __init__(self):
super().__init__()
self.conv = nn.Conv2d(3, 3, 1)
self.bn = nn.BatchNorm2d(3)
self.relu = nn.ReLU6()
self.fp16_enabled = False
@auto_fp16(apply_to=('img', ))
def forward(self, img, return_loss=True, **kwargs):
x = self.conv(img)
x = self.bn(x)
x = self.relu(x)
if return_loss:
loss = ((x - kwargs['gt_label'])**2).sum()
return {
'loss': loss,
'loss_list': [loss, loss],
'loss_dict': {
'loss1': loss
}
}
return x
def _parse_losses(self, losses):
return losses['loss'], losses['loss']
def train_step(self, data, optimizer=None, **kwargs):
losses = self(**data)
loss, log_vars = self._parse_losses(losses)
outputs = dict(
loss=loss, log_vars=log_vars, num_samples=len(data['img'].data))
return outputs
@skip_no_ipu
def test_ipu_hook_wrapper(tmp_path):
model = ToyModel()
dummy_input = {
'data': {
'img': torch.rand((16, 3, 10, 10)),
'gt_label': torch.rand((16, 3, 10, 10))
}
}
dir_name = 'a_tmp_dir'
working_dir = osp.join(tmp_path, dir_name)
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
default_args = dict(
model=model,
work_dir=working_dir,
optimizer=optimizer,
logger=logging.getLogger())
cfg = dict(type='IPUEpochBasedRunner', max_epochs=1)
dummy_runner = build_runner(cfg, default_args=default_args)
# learning policy
lr_config = dict(policy='step', step=[1, 150])
# test optimizer config
optimizer_config = dict(
grad_clip=dict(max_norm=2), detect_anomalous_params=True)
# test building ipu_lr_hook_class
dummy_runner.register_training_hooks(
lr_config=lr_config, optimizer_config=None, timer_config=None)
# test _set_lr()
output = dummy_runner.model.train_step(**dummy_input)
dummy_runner.outputs = output
dummy_runner.call_hook('before_train_epoch')
# test building ipu_optimizer_hook_class
with pytest.raises(
NotImplementedError, match='IPU does not support gradient clip'):
dummy_runner.register_training_hooks(
lr_config=None,
optimizer_config=optimizer_config,
timer_config=None)
# test fp16 optimizer hook
lr_config = dict(policy='step', step=[1, 150])
optimizer_config = dict(grad_clip=dict(max_norm=2))
dummy_runner.hooks.pop(0)
with pytest.raises(NotImplementedError, match='IPU mode does not support'):
optimizer_config = IPUFp16OptimizerHook(
loss_scale='dynamic', distributed=False)
with pytest.raises(NotImplementedError, match='IPU mode supports single'):
optimizer_config = IPUFp16OptimizerHook(
loss_scale={}, distributed=False)
with pytest.raises(ValueError, match='loss_scale should be float'):
optimizer_config = IPUFp16OptimizerHook(
loss_scale=[], distributed=False)
optimizer_config = IPUFp16OptimizerHook(loss_scale=2.0, distributed=False)
dummy_runner.register_training_hooks(
lr_config=lr_config,
optimizer_config=optimizer_config,
timer_config=None)
dummy_runner.call_hook('after_train_iter')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment