Commit cbc25585 authored by limm's avatar limm
Browse files

add mmpretrain/ part

parent 1baf0566
Pipeline #2801 canceled with stages
# Copyright (c) OpenMMLab. All rights reserved.
import math
import os.path as osp
from typing import Optional, Sequence
from mmengine.fileio import join_path
from mmengine.hooks import Hook
from mmengine.runner import EpochBasedTrainLoop, Runner
from mmengine.visualization import Visualizer
from mmpretrain.registry import HOOKS
from mmpretrain.structures import DataSample
@HOOKS.register_module()
class VisualizationHook(Hook):
"""Classification Visualization Hook. Used to visualize validation and
testing prediction results.
- If ``out_dir`` is specified, all storage backends are ignored
and save the image to the ``out_dir``.
- If ``show`` is True, plot the result image in a window, please
confirm you are able to access the graphical interface.
Args:
enable (bool): Whether to enable this hook. Defaults to False.
interval (int): The interval of samples to visualize. Defaults to 5000.
show (bool): Whether to display the drawn image. Defaults to False.
out_dir (str, optional): directory where painted images will be saved
in the testing process. If None, handle with the backends of the
visualizer. Defaults to None.
**kwargs: other keyword arguments of
:meth:`mmpretrain.visualization.UniversalVisualizer.visualize_cls`.
"""
def __init__(self,
enable=False,
interval: int = 5000,
show: bool = False,
out_dir: Optional[str] = None,
**kwargs):
self._visualizer: Visualizer = Visualizer.get_current_instance()
self.enable = enable
self.interval = interval
self.show = show
self.out_dir = out_dir
self.draw_args = {**kwargs, 'show': show}
def _draw_samples(self,
batch_idx: int,
data_batch: dict,
data_samples: Sequence[DataSample],
step: int = 0) -> None:
"""Visualize every ``self.interval`` samples from a data batch.
Args:
batch_idx (int): The index of the current batch in the val loop.
data_batch (dict): Data from dataloader.
outputs (Sequence[:obj:`DataSample`]): Outputs from model.
step (int): Global step value to record. Defaults to 0.
"""
if self.enable is False:
return
batch_size = len(data_samples)
images = data_batch['inputs']
start_idx = batch_size * batch_idx
end_idx = start_idx + batch_size
# The first index divisible by the interval, after the start index
first_sample_id = math.ceil(start_idx / self.interval) * self.interval
for sample_id in range(first_sample_id, end_idx, self.interval):
image = images[sample_id - start_idx]
image = image.permute(1, 2, 0).cpu().numpy().astype('uint8')
data_sample = data_samples[sample_id - start_idx]
if 'img_path' in data_sample:
# osp.basename works on different platforms even file clients.
sample_name = osp.basename(data_sample.get('img_path'))
else:
sample_name = str(sample_id)
draw_args = self.draw_args
if self.out_dir is not None:
draw_args['out_file'] = join_path(self.out_dir,
f'{sample_name}_{step}.png')
self._visualizer.visualize_cls(
image=image,
data_sample=data_sample,
step=step,
name=sample_name,
**self.draw_args,
)
def after_val_iter(self, runner: Runner, batch_idx: int, data_batch: dict,
outputs: Sequence[DataSample]) -> None:
"""Visualize every ``self.interval`` samples during validation.
Args:
runner (:obj:`Runner`): The runner of the validation process.
batch_idx (int): The index of the current batch in the val loop.
data_batch (dict): Data from dataloader.
outputs (Sequence[:obj:`DataSample`]): Outputs from model.
"""
if isinstance(runner.train_loop, EpochBasedTrainLoop):
step = runner.epoch
else:
step = runner.iter
self._draw_samples(batch_idx, data_batch, outputs, step=step)
def after_test_iter(self, runner: Runner, batch_idx: int, data_batch: dict,
outputs: Sequence[DataSample]) -> None:
"""Visualize every ``self.interval`` samples during test.
Args:
runner (:obj:`Runner`): The runner of the testing process.
batch_idx (int): The index of the current batch in the test loop.
data_batch (dict): Data from dataloader.
outputs (Sequence[:obj:`DetDataSample`]): Outputs from model.
"""
self._draw_samples(batch_idx, data_batch, outputs, step=0)
# Copyright (c) OpenMMLab. All rights reserved.
import operator as op
from typing import Any, Optional, Union
from mmengine.hooks import Hook
from mmpretrain.registry import HOOKS
from mmpretrain.utils import get_ori_model
@HOOKS.register_module()
class WarmupParamHook(Hook):
"""This is a hook used for changing the parameters other than optimizations
that need to warmup inside the module.
This hook can extend with more detailed warmup rule if necessary.
Args:
param_name (str): The parameter name that needs to be altered.
module_name (str): Module name that belongs to the model. Such as
`head`, `head.loss`, etc.
warmup_epochs (int): The warmup epochs for this parameter.
"""
def __init__(
self,
param_name: str,
module_name: str,
warmup_epochs: int,
) -> None:
self.param_name = param_name
self.warmup_epochs = warmup_epochs
# getter for module which saves the changed parameter
self.module_getter = op.attrgetter(module_name)
def get_param(self, runner) -> Any:
"""Get the parameter."""
try:
module = self.module_getter(get_ori_model(runner.model))
return getattr(module, self.param_name)
except AttributeError as e:
raise AttributeError(f'{e}. Please check hook settings.')
def set_param(self, runner, value) -> None:
"""Set the parameter."""
try:
module = self.module_getter(get_ori_model(runner.model))
setattr(module, self.param_name, value)
except AttributeError as e:
raise AttributeError(f'{e}. Please check hook settings.')
def before_train(self, runner) -> None:
"""Get the original value before train."""
self.ori_val = self.get_param(runner)
def before_train_iter(
self,
runner,
batch_idx: int,
data_batch: Optional[Union[dict, tuple, list]] = None) -> None:
"""Set the warmup value before each train iter."""
cur_iter = runner.iter
iters_per_epoch = runner.max_iters / runner.max_epochs
new_val = self.ori_val * min(
1, cur_iter / (self.warmup_epochs * iters_per_epoch))
self.set_param(runner, new_val)
# Copyright (c) OpenMMLab. All rights reserved.
from .adan_t import Adan
from .lamb import Lamb
from .lars import LARS
from .layer_decay_optim_wrapper_constructor import \
LearningRateDecayOptimWrapperConstructor
__all__ = ['Lamb', 'Adan', 'LARS', 'LearningRateDecayOptimWrapperConstructor']
# Copyright 2022 Garena Online Private Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
from typing import List
import torch
from torch import Tensor
from torch.optim.optimizer import Optimizer
from mmpretrain.registry import OPTIMIZERS
@OPTIMIZERS.register_module()
class Adan(Optimizer):
"""Implements a pytorch variant of Adan.
Adan was proposed in
Adan : Adaptive Nesterov Momentum Algorithm for Faster Optimizing Deep Models. # noqa
https://arxiv.org/abs/2208.06677
Arguments:
params (iterable): iterable of parameters to optimize
or dicts defining parameter groups.
lr (float, optional): learning rate. (default: 1e-3)
betas (Tuple[float, float, flot], optional): coefficients used
for computing running averages of gradient.
(default: (0.98, 0.92, 0.99))
eps (float, optional): term added to the denominator to improve
numerical stability. (default: 1e-8)
weight_decay (float, optional): decoupled weight decay
(L2 penalty) (default: 0)
max_grad_norm (float, optional): value used to clip
global grad norm (default: 0.0 no clip)
no_prox (bool): how to perform the decoupled weight decay
(default: False)
foreach (bool): if True would use torch._foreach implementation.
It's faster but uses slightly more memory.
"""
def __init__(self,
params,
lr=1e-3,
betas=(0.98, 0.92, 0.99),
eps=1e-8,
weight_decay=0.0,
max_grad_norm=0.0,
no_prox=False,
foreach: bool = True):
if not 0.0 <= max_grad_norm:
raise ValueError('Invalid Max grad norm: {}'.format(max_grad_norm))
if not 0.0 <= lr:
raise ValueError('Invalid learning rate: {}'.format(lr))
if not 0.0 <= eps:
raise ValueError('Invalid epsilon value: {}'.format(eps))
if not 0.0 <= betas[0] < 1.0:
raise ValueError('Invalid beta parameter at index 0: {}'.format(
betas[0]))
if not 0.0 <= betas[1] < 1.0:
raise ValueError('Invalid beta parameter at index 1: {}'.format(
betas[1]))
if not 0.0 <= betas[2] < 1.0:
raise ValueError('Invalid beta parameter at index 2: {}'.format(
betas[2]))
defaults = dict(
lr=lr,
betas=betas,
eps=eps,
weight_decay=weight_decay,
max_grad_norm=max_grad_norm,
no_prox=no_prox,
foreach=foreach)
super().__init__(params, defaults)
def __setstate__(self, state):
super(Adan, self).__setstate__(state)
for group in self.param_groups:
group.setdefault('no_prox', False)
@torch.no_grad()
def restart_opt(self):
for group in self.param_groups:
group['step'] = 0
for p in group['params']:
if p.requires_grad:
state = self.state[p]
# State initialization
# Exponential moving average of gradient values
state['exp_avg'] = torch.zeros_like(p)
# Exponential moving average of squared gradient values
state['exp_avg_sq'] = torch.zeros_like(p)
# Exponential moving average of gradient difference
state['exp_avg_diff'] = torch.zeros_like(p)
@torch.no_grad()
def step(self):
"""Performs a single optimization step."""
if self.defaults['max_grad_norm'] > 0:
device = self.param_groups[0]['params'][0].device
global_grad_norm = torch.zeros(1, device=device)
max_grad_norm = torch.tensor(
self.defaults['max_grad_norm'], device=device)
for group in self.param_groups:
for p in group['params']:
if p.grad is not None:
grad = p.grad
global_grad_norm.add_(grad.pow(2).sum())
global_grad_norm = torch.sqrt(global_grad_norm) + group['eps']
clip_global_grad_norm = \
torch.clamp(max_grad_norm / global_grad_norm, max=1.0)
else:
clip_global_grad_norm = 1.0
for group in self.param_groups:
params_with_grad = []
grads = []
exp_avgs = []
exp_avg_sqs = []
exp_avg_diffs = []
pre_grads = []
beta1, beta2, beta3 = group['betas']
# assume same step across group now to simplify things
# per parameter step can be easily support
# by making it tensor, or pass list into kernel
if 'step' in group:
group['step'] += 1
else:
group['step'] = 1
bias_correction1 = 1.0 - beta1**group['step']
bias_correction2 = 1.0 - beta2**group['step']
bias_correction3 = 1.0 - beta3**group['step']
for p in group['params']:
if p.grad is None:
continue
params_with_grad.append(p)
grads.append(p.grad)
state = self.state[p]
if len(state) == 0:
state['exp_avg'] = torch.zeros_like(p)
state['exp_avg_sq'] = torch.zeros_like(p)
state['exp_avg_diff'] = torch.zeros_like(p)
if 'pre_grad' not in state or group['step'] == 1:
# at first step grad wouldn't be clipped
# by `clip_global_grad_norm`
# this is only to simplify implementation
state['pre_grad'] = p.grad
exp_avgs.append(state['exp_avg'])
exp_avg_sqs.append(state['exp_avg_sq'])
exp_avg_diffs.append(state['exp_avg_diff'])
pre_grads.append(state['pre_grad'])
kwargs = dict(
params=params_with_grad,
grads=grads,
exp_avgs=exp_avgs,
exp_avg_sqs=exp_avg_sqs,
exp_avg_diffs=exp_avg_diffs,
pre_grads=pre_grads,
beta1=beta1,
beta2=beta2,
beta3=beta3,
bias_correction1=bias_correction1,
bias_correction2=bias_correction2,
bias_correction3_sqrt=math.sqrt(bias_correction3),
lr=group['lr'],
weight_decay=group['weight_decay'],
eps=group['eps'],
no_prox=group['no_prox'],
clip_global_grad_norm=clip_global_grad_norm,
)
if group['foreach']:
copy_grads = _multi_tensor_adan(**kwargs)
else:
copy_grads = _single_tensor_adan(**kwargs)
for p, copy_grad in zip(params_with_grad, copy_grads):
self.state[p]['pre_grad'] = copy_grad
def _single_tensor_adan(
params: List[Tensor],
grads: List[Tensor],
exp_avgs: List[Tensor],
exp_avg_sqs: List[Tensor],
exp_avg_diffs: List[Tensor],
pre_grads: List[Tensor],
*,
beta1: float,
beta2: float,
beta3: float,
bias_correction1: float,
bias_correction2: float,
bias_correction3_sqrt: float,
lr: float,
weight_decay: float,
eps: float,
no_prox: bool,
clip_global_grad_norm: Tensor,
):
copy_grads = []
for i, param in enumerate(params):
grad = grads[i]
exp_avg = exp_avgs[i]
exp_avg_sq = exp_avg_sqs[i]
exp_avg_diff = exp_avg_diffs[i]
pre_grad = pre_grads[i]
grad = grad.mul_(clip_global_grad_norm)
copy_grads.append(grad.clone())
diff = grad - pre_grad
update = grad + beta2 * diff
exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1) # m_t
exp_avg_diff.mul_(beta2).add_(diff, alpha=1 - beta2) # diff_t
exp_avg_sq.mul_(beta3).addcmul_(update, update, value=1 - beta3) # n_t
denom = (exp_avg_sq.sqrt() / bias_correction3_sqrt).add_(eps)
update = exp_avg / bias_correction1
update.add_(beta2 * exp_avg_diff / bias_correction2).div_(denom)
if no_prox:
param.mul_(1 - lr * weight_decay)
param.add_(update, alpha=-lr)
else:
param.add_(update, alpha=-lr)
param.div_(1 + lr * weight_decay)
return copy_grads
def _multi_tensor_adan(
params: List[Tensor],
grads: List[Tensor],
exp_avgs: List[Tensor],
exp_avg_sqs: List[Tensor],
exp_avg_diffs: List[Tensor],
pre_grads: List[Tensor],
*,
beta1: float,
beta2: float,
beta3: float,
bias_correction1: float,
bias_correction2: float,
bias_correction3_sqrt: float,
lr: float,
weight_decay: float,
eps: float,
no_prox: bool,
clip_global_grad_norm: Tensor,
):
if clip_global_grad_norm < 1.0:
torch._foreach_mul_(grads, clip_global_grad_norm.item())
copy_grads = [g.clone() for g in grads]
diff = torch._foreach_sub(grads, pre_grads)
# NOTE: line below while looking identical gives different result,
# due to float precision errors.
# using mul+add produces identical results to single-tensor,
# using add+alpha doesn't
# update = torch._foreach_add(grads, torch._foreach_mul(diff, beta2))
update = torch._foreach_add(grads, diff, alpha=beta2)
torch._foreach_mul_(exp_avgs, beta1)
torch._foreach_add_(exp_avgs, grads, alpha=1 - beta1) # m_t
torch._foreach_mul_(exp_avg_diffs, beta2)
torch._foreach_add_(exp_avg_diffs, diff, alpha=1 - beta2) # diff_t
torch._foreach_mul_(exp_avg_sqs, beta3)
torch._foreach_addcmul_(
exp_avg_sqs, update, update, value=1 - beta3) # n_t
denom = torch._foreach_sqrt(exp_avg_sqs)
torch._foreach_div_(denom, bias_correction3_sqrt)
torch._foreach_add_(denom, eps)
update = torch._foreach_div(exp_avgs, bias_correction1)
# NOTE: same issue as above.
# beta2 * diff / bias_correction2 != diff * (beta2 / bias_correction2) # noqa
# using faster version by default. uncomment for tests to pass
# torch._foreach_add_(update, torch._foreach_div(torch._foreach_mul(exp_avg_diffs, beta2), bias_correction2)) # noqa
torch._foreach_add_(
update, torch._foreach_mul(exp_avg_diffs, beta2 / bias_correction2))
torch._foreach_div_(update, denom)
if no_prox:
torch._foreach_mul_(params, 1 - lr * weight_decay)
else:
torch._foreach_add_(params, update, alpha=-lr)
torch._foreach_div_(params, 1 + lr * weight_decay)
return copy_grads
"""PyTorch Lamb optimizer w/ behaviour similar to NVIDIA FusedLamb.
This optimizer code was adapted from the following (starting with latest)
* https://github.com/HabanaAI/Model-References/blob/
2b435114fe8e31f159b1d3063b8280ae37af7423/PyTorch/nlp/bert/pretraining/lamb.py
* https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/
LanguageModeling/Transformer-XL/pytorch/lamb.py
* https://github.com/cybertronai/pytorch-lamb
Use FusedLamb if you can (GPU). The reason for including this variant of Lamb
is to have a version that is
similar in behaviour to APEX FusedLamb if you aren't using NVIDIA GPUs or
cannot install/use APEX.
In addition to some cleanup, this Lamb impl has been modified to support
PyTorch XLA and has been tested on TPU.
Original copyrights for above sources are below.
Modifications Copyright 2021 Ross Wightman
"""
# Copyright (c) 2021, Habana Labs Ltd. All rights reserved.
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# MIT License
#
# Copyright (c) 2019 cybertronai
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import math
import torch
from torch.optim import Optimizer
from mmpretrain.registry import OPTIMIZERS
@OPTIMIZERS.register_module()
class Lamb(Optimizer):
"""A pure pytorch variant of FuseLAMB (NvLamb variant) optimizer.
This class is copied from `timm`_. The LAMB was proposed in `Large Batch
Optimization for Deep Learning - Training BERT in 76 minutes`_.
.. _timm:
https://github.com/rwightman/pytorch-image-models/blob/master/timm/optim/lamb.py
.. _Large Batch Optimization for Deep Learning - Training BERT in 76 minutes:
https://arxiv.org/abs/1904.00962
Arguments:
params (iterable): iterable of parameters to optimize or dicts defining
parameter groups.
lr (float, optional): learning rate. (default: 1e-3)
betas (Tuple[float, float], optional): coefficients used for computing
running averages of gradient and its norm. (default: (0.9, 0.999))
eps (float, optional): term added to the denominator to improve
numerical stability. (default: 1e-8)
weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
grad_averaging (bool, optional): whether apply (1-beta2) to grad when
calculating running averages of gradient. (default: True)
max_grad_norm (float, optional): value used to clip global grad norm
(default: 1.0)
trust_clip (bool): enable LAMBC trust ratio clipping (default: False)
always_adapt (boolean, optional): Apply adaptive learning rate to 0.0
weight decay parameter (default: False)
""" # noqa: E501
def __init__(self,
params,
lr=1e-3,
bias_correction=True,
betas=(0.9, 0.999),
eps=1e-6,
weight_decay=0.01,
grad_averaging=True,
max_grad_norm=1.0,
trust_clip=False,
always_adapt=False):
defaults = dict(
lr=lr,
bias_correction=bias_correction,
betas=betas,
eps=eps,
weight_decay=weight_decay,
grad_averaging=grad_averaging,
max_grad_norm=max_grad_norm,
trust_clip=trust_clip,
always_adapt=always_adapt)
super().__init__(params, defaults)
@torch.no_grad()
def step(self, closure=None):
"""Performs a single optimization step.
Arguments:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
with torch.enable_grad():
loss = closure()
device = self.param_groups[0]['params'][0].device
one_tensor = torch.tensor(
1.0, device=device
) # because torch.where doesn't handle scalars correctly
global_grad_norm = torch.zeros(1, device=device)
for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue
grad = p.grad
if grad.is_sparse:
raise RuntimeError(
'Lamb does not support sparse gradients, consider '
'SparseAdam instead.')
global_grad_norm.add_(grad.pow(2).sum())
global_grad_norm = torch.sqrt(global_grad_norm)
# FIXME it'd be nice to remove explicit tensor conversion of scalars
# when torch.where promotes
# scalar types properly https://github.com/pytorch/pytorch/issues/9190
max_grad_norm = torch.tensor(
self.defaults['max_grad_norm'], device=device)
clip_global_grad_norm = torch.where(global_grad_norm > max_grad_norm,
global_grad_norm / max_grad_norm,
one_tensor)
for group in self.param_groups:
bias_correction = 1 if group['bias_correction'] else 0
beta1, beta2 = group['betas']
grad_averaging = 1 if group['grad_averaging'] else 0
beta3 = 1 - beta1 if grad_averaging else 1.0
# assume same step across group now to simplify things
# per parameter step can be easily support by making it tensor, or
# pass list into kernel
if 'step' in group:
group['step'] += 1
else:
group['step'] = 1
if bias_correction:
bias_correction1 = 1 - beta1**group['step']
bias_correction2 = 1 - beta2**group['step']
else:
bias_correction1, bias_correction2 = 1.0, 1.0
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.div_(clip_global_grad_norm)
state = self.state[p]
# State initialization
if len(state) == 0:
# Exponential moving average of gradient valuesa
state['exp_avg'] = torch.zeros_like(p)
# Exponential moving average of squared gradient values
state['exp_avg_sq'] = torch.zeros_like(p)
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
# Decay the first and second moment running average coefficient
exp_avg.mul_(beta1).add_(grad, alpha=beta3) # m_t
exp_avg_sq.mul_(beta2).addcmul_(
grad, grad, value=1 - beta2) # v_t
denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(
group['eps'])
update = (exp_avg / bias_correction1).div_(denom)
weight_decay = group['weight_decay']
if weight_decay != 0:
update.add_(p, alpha=weight_decay)
if weight_decay != 0 or group['always_adapt']:
# Layer-wise LR adaptation. By default, skip adaptation on
# parameters that are
# excluded from weight decay, unless always_adapt == True,
# then always enabled.
w_norm = p.norm(2.0)
g_norm = update.norm(2.0)
# FIXME nested where required since logical and/or not
# working in PT XLA
trust_ratio = torch.where(
w_norm > 0,
torch.where(g_norm > 0, w_norm / g_norm, one_tensor),
one_tensor,
)
if group['trust_clip']:
# LAMBC trust clipping, upper bound fixed at one
trust_ratio = torch.minimum(trust_ratio, one_tensor)
update.mul_(trust_ratio)
p.add_(update, alpha=-group['lr'])
return loss
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Iterable
import torch
from torch.optim.optimizer import Optimizer
from mmpretrain.registry import OPTIMIZERS
@OPTIMIZERS.register_module()
class LARS(Optimizer):
"""Implements layer-wise adaptive rate scaling for SGD.
Based on Algorithm 1 of the following paper by You, Gitman, and Ginsburg.
`Large Batch Training of Convolutional Networks:
<https://arxiv.org/abs/1708.03888>`_.
Args:
params (Iterable): Iterable of parameters to optimize or dicts defining
parameter groups.
lr (float): Base learning rate.
momentum (float): Momentum factor. Defaults to 0.
weight_decay (float): Weight decay (L2 penalty). Defaults to 0.
dampening (float): Dampening for momentum. Defaults to 0.
eta (float): LARS coefficient. Defaults to 0.001.
nesterov (bool): Enables Nesterov momentum. Defaults to False.
eps (float): A small number to avoid dviding zero. Defaults to 1e-8.
Example:
>>> optimizer = LARS(model.parameters(), lr=0.1, momentum=0.9,
>>> weight_decay=1e-4, eta=1e-3)
>>> optimizer.zero_grad()
>>> loss_fn(model(input), target).backward()
>>> optimizer.step()
"""
def __init__(self,
params: Iterable,
lr: float,
momentum: float = 0,
weight_decay: float = 0,
dampening: float = 0,
eta: float = 0.001,
nesterov: bool = False,
eps: float = 1e-8) -> None:
if not isinstance(lr, float) and lr < 0.0:
raise ValueError(f'Invalid learning rate: {lr}')
if momentum < 0.0:
raise ValueError(f'Invalid momentum value: {momentum}')
if weight_decay < 0.0:
raise ValueError(f'Invalid weight_decay value: {weight_decay}')
if eta < 0.0:
raise ValueError(f'Invalid LARS coefficient value: {eta}')
defaults = dict(
lr=lr,
momentum=momentum,
dampening=dampening,
weight_decay=weight_decay,
nesterov=nesterov,
eta=eta)
if nesterov and (momentum <= 0 or dampening != 0):
raise ValueError(
'Nesterov momentum requires a momentum and zero dampening')
self.eps = eps
super().__init__(params, defaults)
def __setstate__(self, state) -> None:
super().__setstate__(state)
for group in self.param_groups:
group.setdefault('nesterov', False)
@torch.no_grad()
def step(self, closure=None) -> torch.Tensor:
"""Performs a single optimization step.
Args:
closure (callable, optional): A closure that reevaluates the model
and returns the loss.
"""
loss = None
if closure is not None:
with torch.enable_grad():
loss = closure()
for group in self.param_groups:
weight_decay = group['weight_decay']
momentum = group['momentum']
dampening = group['dampening']
eta = group['eta']
nesterov = group['nesterov']
lr = group['lr']
lars_exclude = group.get('lars_exclude', False)
for p in group['params']:
if p.grad is None:
continue
d_p = p.grad
if lars_exclude:
local_lr = 1.
else:
weight_norm = torch.norm(p).item()
grad_norm = torch.norm(d_p).item()
if weight_norm != 0 and grad_norm != 0:
# Compute local learning rate for this layer
local_lr = eta * weight_norm / \
(grad_norm + weight_decay * weight_norm + self.eps)
else:
local_lr = 1.
actual_lr = local_lr * lr
d_p = d_p.add(p, alpha=weight_decay).mul(actual_lr)
if momentum != 0:
param_state = self.state[p]
if 'momentum_buffer' not in param_state:
buf = param_state['momentum_buffer'] = \
torch.clone(d_p).detach()
else:
buf = param_state['momentum_buffer']
buf.mul_(momentum).add_(d_p, alpha=1 - dampening)
if nesterov:
d_p = d_p.add(buf, alpha=momentum)
else:
d_p = buf
p.add_(-d_p)
return loss
# Copyright (c) OpenMMLab. All rights reserved.
from collections import defaultdict
from typing import Callable, List, Optional
from mmengine.logging import MMLogger
from mmengine.optim import DefaultOptimWrapperConstructor
from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm, _InstanceNorm
from torch import nn
from torch.nn import GroupNorm, LayerNorm
from mmpretrain.registry import OPTIM_WRAPPER_CONSTRUCTORS
@OPTIM_WRAPPER_CONSTRUCTORS.register_module()
class LearningRateDecayOptimWrapperConstructor(DefaultOptimWrapperConstructor):
"""Different learning rates are set for different layers of backbone.
By default, each parameter share the same optimizer settings, and we
provide an argument ``paramwise_cfg`` to specify parameter-wise settings.
It is a dict and may contain the following fields:
- ``layer_decay_rate`` (float): The learning rate of a parameter will
multiply it by multiple times according to the layer depth of the
parameter. Usually, it's less than 1, so that the earlier layers will
have a lower learning rate. Defaults to 1.
- ``bias_decay_mult`` (float): It will be multiplied to the weight
decay for all bias parameters (except for those in normalization layers).
- ``norm_decay_mult`` (float): It will be multiplied to the weight
decay for all weight and bias parameters of normalization layers.
- ``flat_decay_mult`` (float): It will be multiplied to the weight
decay for all one-dimensional parameters
- ``custom_keys`` (dict): Specified parameters-wise settings by keys. If
one of the keys in ``custom_keys`` is a substring of the name of one
parameter, then the setting of the parameter will be specified by
``custom_keys[key]`` and other setting like ``bias_decay_mult`` will be
ignored. It should be a dict and may contain fields ``decay_mult``.
(The ``lr_mult`` is disabled in this constructor).
Example:
In the config file, you can use this constructor as below:
.. code:: python
optim_wrapper = dict(
optimizer=dict(
type='AdamW',
lr=4e-3,
weight_decay=0.05,
eps=1e-8,
betas=(0.9, 0.999)),
constructor='LearningRateDecayOptimWrapperConstructor',
paramwise_cfg=dict(
layer_decay_rate=0.75, # layer-wise lr decay factor
norm_decay_mult=0.,
flat_decay_mult=0.,
custom_keys={
'.cls_token': dict(decay_mult=0.0),
'.pos_embed': dict(decay_mult=0.0)
}))
"""
def add_params(self,
params: List[dict],
module: nn.Module,
prefix: str = '',
get_layer_depth: Optional[Callable] = None,
**kwargs) -> None:
"""Add all parameters of module to the params list.
The parameters of the given module will be added to the list of param
groups, with specific rules defined by paramwise_cfg.
Args:
params (List[dict]): A list of param groups, it will be modified
in place.
module (nn.Module): The module to be added.
optimizer_cfg (dict): The configuration of optimizer.
prefix (str): The prefix of the module.
"""
# get param-wise options
custom_keys = self.paramwise_cfg.get('custom_keys', {})
# first sort with alphabet order and then sort with reversed len of str
sorted_keys = sorted(sorted(custom_keys.keys()), key=len, reverse=True)
logger = MMLogger.get_current_instance()
# The model should have `get_layer_depth` method
if get_layer_depth is None and not hasattr(module, 'get_layer_depth'):
raise NotImplementedError('The layer-wise learning rate decay need'
f' the model {type(module)} has'
' `get_layer_depth` method.')
else:
get_layer_depth = get_layer_depth or module.get_layer_depth
bias_decay_mult = self.paramwise_cfg.get('bias_decay_mult', None)
norm_decay_mult = self.paramwise_cfg.get('norm_decay_mult', None)
flat_decay_mult = self.paramwise_cfg.get('flat_decay_mult', None)
decay_rate = self.paramwise_cfg.get('layer_decay_rate', 1.0)
# special rules for norm layers and depth-wise conv layers
is_norm = isinstance(module,
(_BatchNorm, _InstanceNorm, GroupNorm, LayerNorm))
for name, param in module.named_parameters(recurse=False):
param_group = {'params': [param]}
param_name = prefix + name
if not param.requires_grad:
continue
if self.base_wd is not None:
base_wd = self.base_wd
custom_key = next(
filter(lambda k: k in param_name, sorted_keys), None)
# custom parameters decay
if custom_key is not None:
custom_cfg = custom_keys[custom_key].copy()
decay_mult = custom_cfg.pop('decay_mult', 1.)
param_group['weight_decay'] = base_wd * decay_mult
# add custom settings to param_group
param_group.update(custom_cfg)
# norm decay
elif is_norm and norm_decay_mult is not None:
param_group['weight_decay'] = base_wd * norm_decay_mult
# bias decay
elif name == 'bias' and bias_decay_mult is not None:
param_group['weight_decay'] = base_wd * bias_decay_mult
# flatten parameters decay
elif param.ndim == 1 and flat_decay_mult is not None:
param_group['weight_decay'] = base_wd * flat_decay_mult
else:
param_group['weight_decay'] = base_wd
layer_id, max_id = get_layer_depth(param_name)
scale = decay_rate**(max_id - layer_id - 1)
param_group['lr'] = self.base_lr * scale
param_group['lr_scale'] = scale
param_group['layer_id'] = layer_id
param_group['param_name'] = param_name
params.append(param_group)
for child_name, child_mod in module.named_children():
child_prefix = f'{prefix}{child_name}.'
self.add_params(
params,
child_mod,
prefix=child_prefix,
get_layer_depth=get_layer_depth,
)
if prefix == '':
layer_params = defaultdict(list)
for param in params:
layer_params[param['layer_id']].append(param)
for layer_id, layer_params in layer_params.items():
lr_scale = layer_params[0]['lr_scale']
lr = layer_params[0]['lr']
msg = [
f'layer {layer_id} params '
f'(lr={lr:.3g}, lr_scale={lr_scale:.3g}):'
]
for param in layer_params:
msg.append(f'\t{param["param_name"]}: '
f'weight_decay={param["weight_decay"]:.3g}')
logger.debug('\n'.join(msg))
# Copyright (c) OpenMMLab. All rights reserved.
from .retrieval_loop import RetrievalTestLoop, RetrievalValLoop
__all__ = ['RetrievalTestLoop', 'RetrievalValLoop']
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from mmengine.model import is_model_wrapper
from mmengine.runner import TestLoop, ValLoop, autocast
from mmpretrain.registry import LOOPS
@LOOPS.register_module()
class RetrievalValLoop(ValLoop):
"""Loop for multimodal retrieval val.
Args:
runner (Runner): A reference of runner.
dataloader (Dataloader or dict): A dataloader object or a dict to
build a dataloader.
evaluator (Evaluator or dict or list): Used for computing metrics.
fp16 (bool): Whether to enable fp16 valing. Defaults to
False.
"""
def run(self) -> dict:
"""Launch val."""
self.runner.call_hook('before_val')
self.runner.call_hook('before_val_epoch')
self.runner.model.eval()
feats_local = []
data_samples_local = []
for idx, data_batch in enumerate(self.dataloader):
with torch.no_grad():
self.runner.call_hook(
'before_val_iter', batch_idx=idx, data_batch=data_batch)
# predictions should be sequence of BaseDataElement
with autocast(enabled=self.fp16):
if is_model_wrapper(self.runner.model):
data_preprocessor = self.runner.model.module.data_preprocessor # noqa: E501
else:
data_preprocessor = self.runner.model.data_preprocessor
# get features for retrieval instead of data samples
data_batch = data_preprocessor(data_batch, False)
feats = self.runner.model._run_forward(
data_batch, mode='tensor')
feats_local.append(feats)
data_samples_local.extend(data_batch['data_samples'])
self.runner.call_hook(
'after_val_iter',
batch_idx=idx,
data_batch=data_batch,
outputs=feats)
# concatenate different features
feats_local = {
k: torch.cat([dic[k] for dic in feats_local])
for k in feats_local[0]
}
# get predictions
if is_model_wrapper(self.runner.model):
predict_all_fn = self.runner.model.module.predict_all
else:
predict_all_fn = self.runner.model.predict_all
img_size = self.dataloader.dataset.img_size
text_size = self.dataloader.dataset.text_size
with torch.no_grad():
i2t_data_samples, t2i_data_samples = predict_all_fn(
feats_local,
data_samples_local,
num_images=img_size,
num_texts=text_size,
)
# process in evaluator and compute metrics
self.evaluator.process(i2t_data_samples, None)
i2t_metrics = self.evaluator.evaluate(img_size)
i2t_metrics = {f'i2t/{k}': v for k, v in i2t_metrics.items()}
self.evaluator.process(t2i_data_samples, None)
t2i_metrics = self.evaluator.evaluate(text_size)
t2i_metrics = {f't2i/{k}': v for k, v in t2i_metrics.items()}
metrics = {**i2t_metrics, **t2i_metrics}
self.runner.call_hook('after_val_epoch', metrics=metrics)
self.runner.call_hook('after_val')
return metrics
@LOOPS.register_module()
class RetrievalTestLoop(TestLoop):
"""Loop for multimodal retrieval test.
Args:
runner (Runner): A reference of runner.
dataloader (Dataloader or dict): A dataloader object or a dict to
build a dataloader.
evaluator (Evaluator or dict or list): Used for computing metrics.
fp16 (bool): Whether to enable fp16 testing. Defaults to
False.
"""
def run(self) -> dict:
"""Launch test."""
self.runner.call_hook('before_test')
self.runner.call_hook('before_test_epoch')
self.runner.model.eval()
feats_local = []
data_samples_local = []
for idx, data_batch in enumerate(self.dataloader):
with torch.no_grad():
self.runner.call_hook(
'before_test_iter', batch_idx=idx, data_batch=data_batch)
# predictions should be sequence of BaseDataElement
with autocast(enabled=self.fp16):
if is_model_wrapper(self.runner.model):
data_preprocessor = self.runner.model.module.data_preprocessor # noqa: E501
else:
data_preprocessor = self.runner.model.data_preprocessor
# get features for retrieval instead of data samples
data_batch = data_preprocessor(data_batch, False)
feats = self.runner.model._run_forward(
data_batch, mode='tensor')
feats_local.append(feats)
data_samples_local.extend(data_batch['data_samples'])
self.runner.call_hook(
'after_test_iter',
batch_idx=idx,
data_batch=data_batch,
outputs=feats)
# concatenate different features
feats_local = {
k: torch.cat([dic[k] for dic in feats_local])
for k in feats_local[0]
}
# get predictions
if is_model_wrapper(self.runner.model):
predict_all_fn = self.runner.model.module.predict_all
else:
predict_all_fn = self.runner.model.predict_all
img_size = self.dataloader.dataset.img_size
text_size = self.dataloader.dataset.text_size
with torch.no_grad():
i2t_data_samples, t2i_data_samples = predict_all_fn(
feats_local,
data_samples_local,
num_images=img_size,
num_texts=text_size,
)
# process in evaluator and compute metrics
self.evaluator.process(i2t_data_samples, None)
i2t_metrics = self.evaluator.evaluate(img_size)
i2t_metrics = {f'i2t/{k}': v for k, v in i2t_metrics.items()}
self.evaluator.process(t2i_data_samples, None)
t2i_metrics = self.evaluator.evaluate(text_size)
t2i_metrics = {f't2i/{k}': v for k, v in t2i_metrics.items()}
metrics = {**i2t_metrics, **t2i_metrics}
self.runner.call_hook('after_test_epoch', metrics=metrics)
self.runner.call_hook('after_test')
return metrics
# Copyright (c) OpenMMLab. All rights reserved.
from .weight_decay_scheduler import CosineAnnealingWeightDecay
__all__ = ['CosineAnnealingWeightDecay']
# Copyright (c) OpenMMLab. All rights reserved.
import math
from mmengine.optim.scheduler import CosineAnnealingParamScheduler
from mmpretrain.registry import PARAM_SCHEDULERS
class WeightDecaySchedulerMixin:
"""A mixin class for learning rate schedulers."""
def __init__(self, optimizer, *args, **kwargs):
super().__init__(optimizer, 'weight_decay', *args, **kwargs)
@PARAM_SCHEDULERS.register_module()
class CosineAnnealingWeightDecay(WeightDecaySchedulerMixin,
CosineAnnealingParamScheduler):
"""Set the weight decay value of each parameter group using a cosine
annealing schedule.
If the weight decay was set to be 0 initially, the weight decay value will
be 0 constantly during the training.
"""
def _get_value(self) -> list:
"""Compute value using chainable form of the scheduler."""
def _get_eta_min(base_value):
if self.eta_min_ratio is None:
return self.eta_min
return base_value * self.eta_min_ratio
if self.last_step == 0:
return [
group[self.param_name] for group in self.optimizer.param_groups
]
elif (self.last_step - 1 - self.T_max) % (2 * self.T_max) == 0:
weight_decay_value_list = []
for base_value, group in zip(self.base_values,
self.optimizer.param_groups):
if base_value == 0:
group_value = 0
else:
group_value = group[self.param_name] + (
base_value - _get_eta_min(base_value)) * (
1 - math.cos(math.pi / self.T_max)) / 2
weight_decay_value_list.append(group_value)
return weight_decay_value_list
weight_decay_value_list = []
for base_value, group in zip(self.base_values,
self.optimizer.param_groups):
if base_value == 0:
group_value = 0
else:
group_value = (
1 + math.cos(math.pi * self.last_step / self.T_max)) / (
1 + math.cos(math.pi *
(self.last_step - 1) / self.T_max)
) * (group[self.param_name] -
_get_eta_min(base_value)) + _get_eta_min(base_value)
weight_decay_value_list.append(group_value)
return weight_decay_value_list
# Copyright (c) OpenMMLab. All rights reserved.
from .functional import * # noqa: F401,F403
from .metrics import * # noqa: F401,F403
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional
from mmengine.evaluator import BaseMetric
from mmpretrain.registry import METRICS
@METRICS.register_module()
class ANLS(BaseMetric):
"""ANLS metric.
Compute the Average Normalized Levenshtein Similarity(ANLS).
Args:
threshold (float): ANLS threshold used for determining if the answer
has been correctly selected but not properly recognized,
or on the contrary, the output is a wrong text selected from the
options and given as an answer.
collect_device (str): Device name used for collecting results from
different ranks during distributed training. Must be 'cpu' or
'gpu'. Defaults to 'cpu'.
prefix (str, optional): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Should be modified according to the
`retrieval_type` for unambiguous results. Defaults to TR.
"""
default_prefix = 'ANLS'
def __init__(self,
threshold: float = 0.5,
collect_device: str = 'cpu',
prefix: Optional[str] = None) -> None:
super().__init__(collect_device=collect_device, prefix=prefix)
self.threshold = threshold
def process(self, data_batch, data_samples) -> None:
"""Process one batch of data samples.
The processed results should be stored in ``self.results``, which will
be used to computed the metrics when all batches have been processed.
Args:
data_batch: A batch of data from the dataloader.
data_samples (Sequence[dict]): A batch of outputs from the model.
"""
for sample in data_samples:
gt_answer = sample.get('gt_answer')
result = {
'pred_answer': sample.get('pred_answer'),
'gt_answer': gt_answer
}
self.results.append(result)
def compute_metrics(self, results: List) -> dict:
"""Compute the metrics from processed results.
Args:
results (dict): The processed results of each batch.
Returns:
Dict: The computed metrics. The keys are the names of the metrics,
and the values are corresponding results.
"""
total_score = 0.
for result in results:
sample_score_list = []
pred = ' '.join(result['pred_answer'].strip().lower().split())
for gt in result['gt_answer']:
gt = ' '.join(gt.strip().lower().split())
dist = levenshtein_distance(gt, pred)
length = max(
len(gt.upper()), len(result['pred_answer'].upper()))
sample_score_list.append(0.0 if length == 0 else float(dist) /
float(length))
per_sample_score = 1. - min(sample_score_list)
if per_sample_score < self.threshold:
per_sample_score = 0.
total_score += per_sample_score
total_score = total_score / len(results)
return {'ANLS': total_score}
def levenshtein_distance(s1, s2):
if len(s1) > len(s2):
s1, s2 = s2, s1
distances = range(len(s1) + 1)
for i2, c2 in enumerate(s2):
distances_ = [i2 + 1]
for i1, c1 in enumerate(s1):
if c1 == c2:
distances_.append(distances[i1])
else:
distances_.append(1 + min((distances[i1], distances[i1 + 1],
distances_[-1])))
distances = distances_
return distances[-1]
# Copyright (c) OpenMMLab. All rights reserved.
from .ANLS import ANLS
from .caption import COCOCaption
from .gqa import GQAAcc
from .multi_label import AveragePrecision, MultiLabelMetric
from .multi_task import MultiTasksMetric
from .nocaps import NocapsSave
from .retrieval import RetrievalAveragePrecision, RetrievalRecall
from .scienceqa import ScienceQAMetric
from .shape_bias_label import ShapeBiasMetric
from .single_label import Accuracy, ConfusionMatrix, SingleLabelMetric
from .visual_grounding_eval import VisualGroundingMetric
from .voc_multi_label import VOCAveragePrecision, VOCMultiLabelMetric
from .vqa import ReportVQA, VQAAcc
__all__ = [
'Accuracy', 'SingleLabelMetric', 'MultiLabelMetric', 'AveragePrecision',
'MultiTasksMetric', 'VOCAveragePrecision', 'VOCMultiLabelMetric',
'ConfusionMatrix', 'RetrievalRecall', 'VQAAcc', 'ReportVQA', 'COCOCaption',
'VisualGroundingMetric', 'ScienceQAMetric', 'GQAAcc', 'NocapsSave',
'RetrievalAveragePrecision', 'ShapeBiasMetric', 'ANLS'
]
# Copyright (c) OpenMMLab. All rights reserved.
import json
import os
import tempfile
from typing import List, Optional
from mmengine.evaluator import BaseMetric
from mmengine.utils import track_iter_progress
from mmpretrain.registry import METRICS
from mmpretrain.utils import require
try:
from pycocoevalcap.eval import COCOEvalCap
from pycocotools.coco import COCO
except ImportError:
COCOEvalCap = None
COCO = None
@METRICS.register_module()
class COCOCaption(BaseMetric):
"""Coco Caption evaluation wrapper.
Save the generated captions and transform into coco format.
Calling COCO API for caption metrics.
Args:
ann_file (str): the path for the COCO format caption ground truth
json file, load for evaluations.
collect_device (str): Device name used for collecting results from
different ranks during distributed training. Must be 'cpu' or
'gpu'. Defaults to 'cpu'.
prefix (str, optional): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Should be modified according to the
`retrieval_type` for unambiguous results. Defaults to TR.
"""
@require('pycocoevalcap')
def __init__(self,
ann_file: str,
collect_device: str = 'cpu',
prefix: Optional[str] = None):
super().__init__(collect_device=collect_device, prefix=prefix)
self.ann_file = ann_file
def process(self, data_batch, data_samples):
"""Process one batch of data samples.
The processed results should be stored in ``self.results``, which will
be used to computed the metrics when all batches have been processed.
Args:
data_batch: A batch of data from the dataloader.
data_samples (Sequence[dict]): A batch of outputs from the model.
"""
for data_sample in data_samples:
result = dict()
result['caption'] = data_sample.get('pred_caption')
result['image_id'] = int(data_sample.get('image_id'))
# Save the result to `self.results`.
self.results.append(result)
def compute_metrics(self, results: List):
"""Compute the metrics from processed results.
Args:
results (dict): The processed results of each batch.
Returns:
Dict: The computed metrics. The keys are the names of the metrics,
and the values are corresponding results.
"""
# NOTICE: don't access `self.results` from the method.
with tempfile.TemporaryDirectory() as temp_dir:
eval_result_file = save_result(
result=results,
result_dir=temp_dir,
filename='m4-caption_pred',
remove_duplicate='image_id',
)
coco_val = coco_caption_eval(eval_result_file, self.ann_file)
return coco_val
def save_result(result, result_dir, filename, remove_duplicate=''):
"""Saving predictions as json file for evaluation."""
# combine results from all processes
result_new = []
if remove_duplicate:
result_new = []
id_list = []
for res in track_iter_progress(result):
if res[remove_duplicate] not in id_list:
id_list.append(res[remove_duplicate])
result_new.append(res)
result = result_new
final_result_file_url = os.path.join(result_dir, '%s.json' % filename)
print(f'result file saved to {final_result_file_url}')
json.dump(result, open(final_result_file_url, 'w'))
return final_result_file_url
def coco_caption_eval(results_file, ann_file):
"""Evaluation between gt json and prediction json files."""
# create coco object and coco_result object
coco = COCO(ann_file)
coco_result = coco.loadRes(results_file)
# create coco_eval object by taking coco and coco_result
coco_eval = COCOEvalCap(coco, coco_result)
# make sure the image ids are the same
coco_eval.params['image_id'] = coco_result.getImgIds()
# This will take some times at the first run
coco_eval.evaluate()
# print output evaluation scores
for metric, score in coco_eval.eval.items():
print(f'{metric}: {score:.3f}')
return coco_eval.eval
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional
from mmengine.evaluator import BaseMetric
from mmpretrain.evaluation.metrics.vqa import (_process_digit_article,
_process_punctuation)
from mmpretrain.registry import METRICS
@METRICS.register_module()
class GQAAcc(BaseMetric):
"""GQA Acc metric.
Compute GQA accuracy.
Args:
collect_device (str): Device name used for collecting results from
different ranks during distributed training. Must be 'cpu' or
'gpu'. Defaults to 'cpu'.
prefix (str, optional): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Should be modified according to the
`retrieval_type` for unambiguous results. Defaults to TR.
"""
default_prefix = 'GQA'
def __init__(self,
collect_device: str = 'cpu',
prefix: Optional[str] = None) -> None:
super().__init__(collect_device=collect_device, prefix=prefix)
def process(self, data_batch, data_samples) -> None:
"""Process one batch of data samples.
The processed results should be stored in ``self.results``, which will
be used to computed the metrics when all batches have been processed.
Args:
data_batch: A batch of data from the dataloader.
data_samples (Sequence[dict]): A batch of outputs from the model.
"""
for sample in data_samples:
gt_answer = sample.get('gt_answer')
result = {
'pred_answer': sample.get('pred_answer'),
'gt_answer': gt_answer
}
self.results.append(result)
def compute_metrics(self, results: List) -> dict:
"""Compute the metrics from processed results.
Args:
results (dict): The processed results of each batch.
Returns:
Dict: The computed metrics. The keys are the names of the metrics,
and the values are corresponding results.
"""
acc = []
for result in results:
pred_answer = self._process_answer(result['pred_answer'])
gt_answer = self._process_answer(result['gt_answer'])
gqa_acc = 1 if pred_answer == gt_answer else 0
acc.append(gqa_acc)
accuracy = sum(acc) / len(acc)
metrics = {'acc': accuracy}
return metrics
def _process_answer(self, answer) -> str:
answer = _process_punctuation(answer)
answer = _process_digit_article(answer)
return answer
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional, Sequence, Union
import numpy as np
import torch
from mmengine.evaluator import BaseMetric
from mmengine.logging import MMLogger
from mmpretrain.registry import METRICS
from mmpretrain.structures import label_to_onehot
from .single_label import _precision_recall_f1_support, to_tensor
@METRICS.register_module()
class MultiLabelMetric(BaseMetric):
r"""A collection of precision, recall, f1-score and support for
multi-label tasks.
The collection of metrics is for single-label multi-class classification.
And all these metrics are based on the confusion matrix of every category:
.. image:: ../../_static/image/confusion-matrix.png
:width: 60%
:align: center
All metrics can be formulated use variables above:
**Precision** is the fraction of correct predictions in all predictions:
.. math::
\text{Precision} = \frac{TP}{TP+FP}
**Recall** is the fraction of correct predictions in all targets:
.. math::
\text{Recall} = \frac{TP}{TP+FN}
**F1-score** is the harmonic mean of the precision and recall:
.. math::
\text{F1-score} = \frac{2\times\text{Recall}\times\text{Precision}}{\text{Recall}+\text{Precision}}
**Support** is the number of samples:
.. math::
\text{Support} = TP + TN + FN + FP
Args:
thr (float, optional): Predictions with scores under the threshold
are considered as negative. If None, the ``topk`` predictions will
be considered as positive. If the ``topk`` is also None, use
``thr=0.5`` as default. Defaults to None.
topk (int, optional): Predictions with the k-th highest scores are
considered as positive. If None, use ``thr`` to determine positive
predictions. If both ``thr`` and ``topk`` are not None, use
``thr``. Defaults to None.
items (Sequence[str]): The detailed metric items to evaluate, select
from "precision", "recall", "f1-score" and "support".
Defaults to ``('precision', 'recall', 'f1-score')``.
average (str | None): How to calculate the final metrics from the
confusion matrix of every category. It supports three modes:
- `"macro"`: Calculate metrics for each category, and calculate
the mean value over all categories.
- `"micro"`: Average the confusion matrix over all categories and
calculate metrics on the mean confusion matrix.
- `None`: Calculate metrics of every category and output directly.
Defaults to "macro".
collect_device (str): Device name used for collecting results from
different ranks during distributed training. Must be 'cpu' or
'gpu'. Defaults to 'cpu'.
prefix (str, optional): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Defaults to None.
Examples:
>>> import torch
>>> from mmpretrain.evaluation import MultiLabelMetric
>>> # ------ The Basic Usage for category indices labels -------
>>> y_pred = [[0], [1], [0, 1], [3]]
>>> y_true = [[0, 3], [0, 2], [1], [3]]
>>> # Output precision, recall, f1-score and support
>>> MultiLabelMetric.calculate(
... y_pred, y_true, pred_indices=True, target_indices=True, num_classes=4)
(tensor(50.), tensor(50.), tensor(45.8333), tensor(6))
>>> # ----------- The Basic Usage for one-hot labels -----------
>>> y_pred = torch.tensor([[1, 1, 0, 0],
... [1, 1, 0, 0],
... [0, 0, 1, 0],
... [0, 1, 0, 0],
... [0, 1, 0, 0]])
>>> y_true = torch.Tensor([[1, 1, 0, 0],
... [0, 0, 1, 0],
... [1, 1, 1, 0],
... [1, 0, 0, 0],
... [1, 0, 0, 0]])
>>> MultiLabelMetric.calculate(y_pred, y_true)
(tensor(43.7500), tensor(31.2500), tensor(33.3333), tensor(8))
>>> # --------- The Basic Usage for one-hot pred scores ---------
>>> y_pred = torch.rand(y_true.size())
>>> y_pred
tensor([[0.4575, 0.7335, 0.3934, 0.2572],
[0.1318, 0.1004, 0.8248, 0.6448],
[0.8349, 0.6294, 0.7896, 0.2061],
[0.4037, 0.7308, 0.6713, 0.8374],
[0.3779, 0.4836, 0.0313, 0.0067]])
>>> # Calculate with different threshold.
>>> MultiLabelMetric.calculate(y_pred, y_true, thr=0.1)
(tensor(42.5000), tensor(75.), tensor(53.1746), tensor(8))
>>> # Calculate with topk.
>>> MultiLabelMetric.calculate(y_pred, y_true, topk=1)
(tensor(62.5000), tensor(31.2500), tensor(39.1667), tensor(8))
>>>
>>> # ------------------- Use with Evalutor -------------------
>>> from mmpretrain.structures import DataSample
>>> from mmengine.evaluator import Evaluator
>>> data_sampels = [
... DataSample().set_pred_score(pred).set_gt_score(gt)
... for pred, gt in zip(torch.rand(1000, 5), torch.randint(0, 2, (1000, 5)))]
>>> evaluator = Evaluator(metrics=MultiLabelMetric(thr=0.5))
>>> evaluator.process(data_sampels)
>>> evaluator.evaluate(1000)
{
'multi-label/precision': 50.72898037055408,
'multi-label/recall': 50.06836461357571,
'multi-label/f1-score': 50.384466955258475
}
>>> # Evaluate on each class by using topk strategy
>>> evaluator = Evaluator(metrics=MultiLabelMetric(topk=1, average=None))
>>> evaluator.process(data_sampels)
>>> evaluator.evaluate(1000)
{
'multi-label/precision_top1_classwise': [48.22, 50.54, 50.99, 44.18, 52.5],
'multi-label/recall_top1_classwise': [18.92, 19.22, 19.92, 20.0, 20.27],
'multi-label/f1-score_top1_classwise': [27.18, 27.85, 28.65, 27.54, 29.25]
}
""" # noqa: E501
default_prefix: Optional[str] = 'multi-label'
def __init__(self,
thr: Optional[float] = None,
topk: Optional[int] = None,
items: Sequence[str] = ('precision', 'recall', 'f1-score'),
average: Optional[str] = 'macro',
collect_device: str = 'cpu',
prefix: Optional[str] = None) -> None:
logger = MMLogger.get_current_instance()
if thr is None and topk is None:
thr = 0.5
logger.warning('Neither thr nor k is given, set thr as 0.5 by '
'default.')
elif thr is not None and topk is not None:
logger.warning('Both thr and topk are given, '
'use threshold in favor of top-k.')
self.thr = thr
self.topk = topk
self.average = average
for item in items:
assert item in ['precision', 'recall', 'f1-score', 'support'], \
f'The metric {item} is not supported by `SingleLabelMetric`,' \
' please choose from "precision", "recall", "f1-score" and ' \
'"support".'
self.items = tuple(items)
super().__init__(collect_device=collect_device, prefix=prefix)
def process(self, data_batch, data_samples: Sequence[dict]):
"""Process one batch of data samples.
The processed results should be stored in ``self.results``, which will
be used to computed the metrics when all batches have been processed.
Args:
data_batch: A batch of data from the dataloader.
data_samples (Sequence[dict]): A batch of outputs from the model.
"""
for data_sample in data_samples:
result = dict()
result['pred_score'] = data_sample['pred_score'].clone()
num_classes = result['pred_score'].size()[-1]
if 'gt_score' in data_sample:
result['gt_score'] = data_sample['gt_score'].clone()
else:
result['gt_score'] = label_to_onehot(data_sample['gt_label'],
num_classes)
# Save the result to `self.results`.
self.results.append(result)
def compute_metrics(self, results: List):
"""Compute the metrics from processed results.
Args:
results (list): The processed results of each batch.
Returns:
Dict: The computed metrics. The keys are the names of the metrics,
and the values are corresponding results.
"""
# NOTICE: don't access `self.results` from the method. `self.results`
# are a list of results from multiple batch, while the input `results`
# are the collected results.
metrics = {}
target = torch.stack([res['gt_score'] for res in results])
pred = torch.stack([res['pred_score'] for res in results])
metric_res = self.calculate(
pred,
target,
pred_indices=False,
target_indices=False,
average=self.average,
thr=self.thr,
topk=self.topk)
def pack_results(precision, recall, f1_score, support):
single_metrics = {}
if 'precision' in self.items:
single_metrics['precision'] = precision
if 'recall' in self.items:
single_metrics['recall'] = recall
if 'f1-score' in self.items:
single_metrics['f1-score'] = f1_score
if 'support' in self.items:
single_metrics['support'] = support
return single_metrics
if self.thr:
suffix = '' if self.thr == 0.5 else f'_thr-{self.thr:.2f}'
for k, v in pack_results(*metric_res).items():
metrics[k + suffix] = v
else:
for k, v in pack_results(*metric_res).items():
metrics[k + f'_top{self.topk}'] = v
result_metrics = dict()
for k, v in metrics.items():
if self.average is None:
result_metrics[k + '_classwise'] = v.detach().cpu().tolist()
elif self.average == 'macro':
result_metrics[k] = v.item()
else:
result_metrics[k + f'_{self.average}'] = v.item()
return result_metrics
@staticmethod
def calculate(
pred: Union[torch.Tensor, np.ndarray, Sequence],
target: Union[torch.Tensor, np.ndarray, Sequence],
pred_indices: bool = False,
target_indices: bool = False,
average: Optional[str] = 'macro',
thr: Optional[float] = None,
topk: Optional[int] = None,
num_classes: Optional[int] = None
) -> Union[torch.Tensor, List[torch.Tensor]]:
"""Calculate the precision, recall, f1-score.
Args:
pred (torch.Tensor | np.ndarray | Sequence): The prediction
results. A :obj:`torch.Tensor` or :obj:`np.ndarray` with
shape ``(N, num_classes)`` or a sequence of index/onehot
format labels.
target (torch.Tensor | np.ndarray | Sequence): The prediction
results. A :obj:`torch.Tensor` or :obj:`np.ndarray` with
shape ``(N, num_classes)`` or a sequence of index/onehot
format labels.
pred_indices (bool): Whether the ``pred`` is a sequence of
category index labels. If True, ``num_classes`` must be set.
Defaults to False.
target_indices (bool): Whether the ``target`` is a sequence of
category index labels. If True, ``num_classes`` must be set.
Defaults to False.
average (str | None): How to calculate the final metrics from
the confusion matrix of every category. It supports three
modes:
- `"macro"`: Calculate metrics for each category, and calculate
the mean value over all categories.
- `"micro"`: Average the confusion matrix over all categories
and calculate metrics on the mean confusion matrix.
- `None`: Calculate metrics of every category and output
directly.
Defaults to "macro".
thr (float, optional): Predictions with scores under the thresholds
are considered as negative. Defaults to None.
topk (int, optional): Predictions with the k-th highest scores are
considered as positive. Defaults to None.
num_classes (Optional, int): The number of classes. If the ``pred``
is indices instead of onehot, this argument is required.
Defaults to None.
Returns:
Tuple: The tuple contains precision, recall and f1-score.
And the type of each item is:
- torch.Tensor: A tensor for each metric. The shape is (1, ) if
``average`` is not None, and (C, ) if ``average`` is None.
Notes:
If both ``thr`` and ``topk`` are set, use ``thr` to determine
positive predictions. If neither is set, use ``thr=0.5`` as
default.
"""
average_options = ['micro', 'macro', None]
assert average in average_options, 'Invalid `average` argument, ' \
f'please specicy from {average_options}.'
def _format_label(label, is_indices):
"""format various label to torch.Tensor."""
if isinstance(label, np.ndarray):
assert label.ndim == 2, 'The shape `pred` and `target` ' \
'array must be (N, num_classes).'
label = torch.from_numpy(label)
elif isinstance(label, torch.Tensor):
assert label.ndim == 2, 'The shape `pred` and `target` ' \
'tensor must be (N, num_classes).'
elif isinstance(label, Sequence):
if is_indices:
assert num_classes is not None, 'For index-type labels, ' \
'please specify `num_classes`.'
label = torch.stack([
label_to_onehot(indices, num_classes)
for indices in label
])
else:
label = torch.stack(
[to_tensor(onehot) for onehot in label])
else:
raise TypeError(
'The `pred` and `target` must be type of torch.tensor or '
f'np.ndarray or sequence but get {type(label)}.')
return label
pred = _format_label(pred, pred_indices)
target = _format_label(target, target_indices).long()
assert pred.shape == target.shape, \
f"The size of pred ({pred.shape}) doesn't match "\
f'the target ({target.shape}).'
if num_classes is not None:
assert pred.size(1) == num_classes, \
f'The shape of `pred` ({pred.shape}) '\
f"doesn't match the num_classes ({num_classes})."
num_classes = pred.size(1)
thr = 0.5 if (thr is None and topk is None) else thr
if thr is not None:
# a label is predicted positive if larger than thr
pos_inds = (pred >= thr).long()
else:
# top-k labels will be predicted positive for any example
_, topk_indices = pred.topk(topk)
pos_inds = torch.zeros_like(pred).scatter_(1, topk_indices, 1)
pos_inds = pos_inds.long()
return _precision_recall_f1_support(pos_inds, target, average)
def _average_precision(pred: torch.Tensor,
target: torch.Tensor) -> torch.Tensor:
r"""Calculate the average precision for a single class.
AP summarizes a precision-recall curve as the weighted mean of maximum
precisions obtained for any r'>r, where r is the recall:
.. math::
\text{AP} = \sum_n (R_n - R_{n-1}) P_n
Note that no approximation is involved since the curve is piecewise
constant.
Args:
pred (torch.Tensor): The model prediction with shape
``(N, num_classes)``.
target (torch.Tensor): The target of predictions with shape
``(N, num_classes)``.
Returns:
torch.Tensor: average precision result.
"""
assert pred.shape == target.shape, \
f"The size of pred ({pred.shape}) doesn't match "\
f'the target ({target.shape}).'
# a small value for division by zero errors
eps = torch.finfo(torch.float32).eps
# get rid of -1 target such as difficult sample
# that is not wanted in evaluation results.
valid_index = target > -1
pred = pred[valid_index]
target = target[valid_index]
# sort examples
sorted_pred_inds = torch.argsort(pred, dim=0, descending=True)
sorted_target = target[sorted_pred_inds]
# get indexes when gt_true is positive
pos_inds = sorted_target == 1
# Calculate cumulative tp case numbers
tps = torch.cumsum(pos_inds, 0)
total_pos = tps[-1].item() # the last of tensor may change later
# Calculate cumulative tp&fp(pred_poss) case numbers
pred_pos_nums = torch.arange(1, len(sorted_target) + 1).to(pred.device)
pred_pos_nums[pred_pos_nums < eps] = eps
tps[torch.logical_not(pos_inds)] = 0
precision = tps / pred_pos_nums.float()
ap = torch.sum(precision, 0) / max(total_pos, eps)
return ap
@METRICS.register_module()
class AveragePrecision(BaseMetric):
r"""Calculate the average precision with respect of classes.
AveragePrecision (AP) summarizes a precision-recall curve as the weighted
mean of maximum precisions obtained for any r'>r, where r is the recall:
.. math::
\text{AP} = \sum_n (R_n - R_{n-1}) P_n
Note that no approximation is involved since the curve is piecewise
constant.
Args:
average (str | None): How to calculate the final metrics from
every category. It supports two modes:
- `"macro"`: Calculate metrics for each category, and calculate
the mean value over all categories. The result of this mode
is also called **mAP**.
- `None`: Calculate metrics of every category and output directly.
Defaults to "macro".
collect_device (str): Device name used for collecting results from
different ranks during distributed training. Must be 'cpu' or
'gpu'. Defaults to 'cpu'.
prefix (str, optional): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Defaults to None.
References
----------
1. `Wikipedia entry for the Average precision
<https://en.wikipedia.org/w/index.php?title=Information_retrieval&
oldid=793358396#Average_precision>`_
Examples:
>>> import torch
>>> from mmpretrain.evaluation import AveragePrecision
>>> # --------- The Basic Usage for one-hot pred scores ---------
>>> y_pred = torch.Tensor([[0.9, 0.8, 0.3, 0.2],
... [0.1, 0.2, 0.2, 0.1],
... [0.7, 0.5, 0.9, 0.3],
... [0.8, 0.1, 0.1, 0.2]])
>>> y_true = torch.Tensor([[1, 1, 0, 0],
... [0, 1, 0, 0],
... [0, 0, 1, 0],
... [1, 0, 0, 0]])
>>> AveragePrecision.calculate(y_pred, y_true)
tensor(70.833)
>>> # ------------------- Use with Evalutor -------------------
>>> from mmpretrain.structures import DataSample
>>> from mmengine.evaluator import Evaluator
>>> data_samples = [
... DataSample().set_pred_score(i).set_gt_score(j)
... for i, j in zip(y_pred, y_true)
... ]
>>> evaluator = Evaluator(metrics=AveragePrecision())
>>> evaluator.process(data_samples)
>>> evaluator.evaluate(5)
{'multi-label/mAP': 70.83333587646484}
>>> # Evaluate on each class
>>> evaluator = Evaluator(metrics=AveragePrecision(average=None))
>>> evaluator.process(data_samples)
>>> evaluator.evaluate(5)
{'multi-label/AP_classwise': [100., 83.33, 100., 0.]}
"""
default_prefix: Optional[str] = 'multi-label'
def __init__(self,
average: Optional[str] = 'macro',
collect_device: str = 'cpu',
prefix: Optional[str] = None) -> None:
super().__init__(collect_device=collect_device, prefix=prefix)
self.average = average
def process(self, data_batch, data_samples: Sequence[dict]):
"""Process one batch of data samples.
The processed results should be stored in ``self.results``, which will
be used to computed the metrics when all batches have been processed.
Args:
data_batch: A batch of data from the dataloader.
data_samples (Sequence[dict]): A batch of outputs from the model.
"""
for data_sample in data_samples:
result = dict()
result['pred_score'] = data_sample['pred_score'].clone()
num_classes = result['pred_score'].size()[-1]
if 'gt_score' in data_sample:
result['gt_score'] = data_sample['gt_score'].clone()
else:
result['gt_score'] = label_to_onehot(data_sample['gt_label'],
num_classes)
# Save the result to `self.results`.
self.results.append(result)
def compute_metrics(self, results: List):
"""Compute the metrics from processed results.
Args:
results (list): The processed results of each batch.
Returns:
Dict: The computed metrics. The keys are the names of the metrics,
and the values are corresponding results.
"""
# NOTICE: don't access `self.results` from the method. `self.results`
# are a list of results from multiple batch, while the input `results`
# are the collected results.
# concat
target = torch.stack([res['gt_score'] for res in results])
pred = torch.stack([res['pred_score'] for res in results])
ap = self.calculate(pred, target, self.average)
result_metrics = dict()
if self.average is None:
result_metrics['AP_classwise'] = ap.detach().cpu().tolist()
else:
result_metrics['mAP'] = ap.item()
return result_metrics
@staticmethod
def calculate(pred: Union[torch.Tensor, np.ndarray],
target: Union[torch.Tensor, np.ndarray],
average: Optional[str] = 'macro') -> torch.Tensor:
r"""Calculate the average precision for a single class.
Args:
pred (torch.Tensor | np.ndarray): The model predictions with
shape ``(N, num_classes)``.
target (torch.Tensor | np.ndarray): The target of predictions
with shape ``(N, num_classes)``.
average (str | None): The average method. It supports two modes:
- `"macro"`: Calculate metrics for each category, and calculate
the mean value over all categories. The result of this mode
is also called mAP.
- `None`: Calculate metrics of every category and output
directly.
Defaults to "macro".
Returns:
torch.Tensor: the average precision of all classes.
"""
average_options = ['macro', None]
assert average in average_options, 'Invalid `average` argument, ' \
f'please specicy from {average_options}.'
pred = to_tensor(pred)
target = to_tensor(target)
assert pred.ndim == 2 and pred.shape == target.shape, \
'Both `pred` and `target` should have shape `(N, num_classes)`.'
num_classes = pred.shape[1]
ap = pred.new_zeros(num_classes)
for k in range(num_classes):
ap[k] = _average_precision(pred[:, k], target[:, k])
if average == 'macro':
return ap.mean() * 100.0
else:
return ap * 100
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, Sequence
from mmengine.evaluator import BaseMetric
from mmpretrain.registry import METRICS
@METRICS.register_module()
class MultiTasksMetric(BaseMetric):
"""Metrics for MultiTask
Args:
task_metrics(dict): a dictionary in the keys are the names of the tasks
and the values is a list of the metric corresponds to this task
Examples:
>>> import torch
>>> from mmpretrain.evaluation import MultiTasksMetric
# -------------------- The Basic Usage --------------------
>>>task_metrics = {
'task0': [dict(type='Accuracy', topk=(1, ))],
'task1': [dict(type='Accuracy', topk=(1, 3))]
}
>>>pred = [{
'pred_task': {
'task0': torch.tensor([0.7, 0.0, 0.3]),
'task1': torch.tensor([0.5, 0.2, 0.3])
},
'gt_task': {
'task0': torch.tensor(0),
'task1': torch.tensor(2)
}
}, {
'pred_task': {
'task0': torch.tensor([0.0, 0.0, 1.0]),
'task1': torch.tensor([0.0, 0.0, 1.0])
},
'gt_task': {
'task0': torch.tensor(2),
'task1': torch.tensor(2)
}
}]
>>>metric = MultiTasksMetric(task_metrics)
>>>metric.process(None, pred)
>>>results = metric.evaluate(2)
results = {
'task0_accuracy/top1': 100.0,
'task1_accuracy/top1': 50.0,
'task1_accuracy/top3': 100.0
}
"""
def __init__(self,
task_metrics: Dict,
collect_device: str = 'cpu') -> None:
self.task_metrics = task_metrics
super().__init__(collect_device=collect_device)
self._metrics = {}
for task_name in self.task_metrics.keys():
self._metrics[task_name] = []
for metric in self.task_metrics[task_name]:
self._metrics[task_name].append(METRICS.build(metric))
def process(self, data_batch, data_samples: Sequence[dict]):
"""Process one batch of data samples.
The processed results should be stored in ``self.results``, which will
be used to computed the metrics when all batches have been processed.
Args:
data_batch: A batch of data from the dataloader.
data_samples (Sequence[dict]): A batch of outputs from the model.
"""
for task_name in self.task_metrics.keys():
filtered_data_samples = []
for data_sample in data_samples:
eval_mask = data_sample[task_name]['eval_mask']
if eval_mask:
filtered_data_samples.append(data_sample[task_name])
for metric in self._metrics[task_name]:
metric.process(data_batch, filtered_data_samples)
def compute_metrics(self, results: list) -> dict:
raise NotImplementedError(
'compute metrics should not be used here directly')
def evaluate(self, size):
"""Evaluate the model performance of the whole dataset after processing
all batches.
Args:
size (int): Length of the entire validation dataset. When batch
size > 1, the dataloader may pad some data samples to make
sure all ranks have the same length of dataset slice. The
``collect_results`` function will drop the padded data based on
this size.
Returns:
dict: Evaluation metrics dict on the val dataset. The keys are
"{task_name}_{metric_name}" , and the values
are corresponding results.
"""
metrics = {}
for task_name in self._metrics:
for metric in self._metrics[task_name]:
name = metric.__class__.__name__
if name == 'MultiTasksMetric' or metric.results:
results = metric.evaluate(size)
else:
results = {metric.__class__.__name__: 0}
for key in results:
name = f'{task_name}_{key}'
if name in results:
"""Inspired from https://github.com/open-
mmlab/mmengine/ bl ob/ed20a9cba52ceb371f7c825131636b9e2
747172e/mmengine/evalua tor/evaluator.py#L84-L87."""
raise ValueError(
'There are multiple metric results with the same'
f'metric name {name}. Please make sure all metrics'
'have different prefixes.')
metrics[name] = results[key]
return metrics
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional
import mmengine
from mmpretrain.registry import METRICS
from mmpretrain.utils import require
from .caption import COCOCaption, save_result
try:
from pycocoevalcap.eval import COCOEvalCap
from pycocotools.coco import COCO
except ImportError:
COCOEvalCap = None
COCO = None
@METRICS.register_module()
class NocapsSave(COCOCaption):
"""Nocaps evaluation wrapper.
Save the generated captions and transform into coco format.
The dumped file can be submitted to the official evluation system.
Args:
collect_device (str): Device name used for collecting results from
different ranks during distributed training. Must be 'cpu' or
'gpu'. Defaults to 'cpu'.
prefix (str, optional): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Should be modified according to the
`retrieval_type` for unambiguous results. Defaults to TR.
"""
@require('pycocoevalcap')
def __init__(self,
save_dir: str = './',
collect_device: str = 'cpu',
prefix: Optional[str] = None):
super(COCOCaption, self).__init__(
collect_device=collect_device, prefix=prefix)
self.save_dir = save_dir
def compute_metrics(self, results: List):
"""Compute the metrics from processed results.
Args:
results (dict): The processed results of each batch.
"""
mmengine.mkdir_or_exist(self.save_dir)
save_result(
result=results,
result_dir=self.save_dir,
filename='nocap_pred',
remove_duplicate='image_id',
)
return dict()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment