Commit c04f261a authored by dongchy920's avatar dongchy920
Browse files

InstruceBLIP

parents
Pipeline #1594 canceled with stages
# Copyright (c) OpenMMLab. All rights reserved.
# This file is for backward compatibility.
# Module wrappers for empty tensor have been moved to mmcv.cnn.bricks.
import warnings
from ..cnn.bricks.wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d
class Conv2d_deprecated(Conv2d):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
warnings.warn(
'Importing Conv2d wrapper from "mmcv.ops" will be deprecated in'
' the future. Please import them from "mmcv.cnn" instead')
class ConvTranspose2d_deprecated(ConvTranspose2d):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
warnings.warn(
'Importing ConvTranspose2d wrapper from "mmcv.ops" will be '
'deprecated in the future. Please import them from "mmcv.cnn" '
'instead')
class MaxPool2d_deprecated(MaxPool2d):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
warnings.warn(
'Importing MaxPool2d wrapper from "mmcv.ops" will be deprecated in'
' the future. Please import them from "mmcv.cnn" instead')
class Linear_deprecated(Linear):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
warnings.warn(
'Importing Linear wrapper from "mmcv.ops" will be deprecated in'
' the future. Please import them from "mmcv.cnn" instead')
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', [
'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward',
'softmax_focal_loss_forward', 'softmax_focal_loss_backward'
])
class SigmoidFocalLossFunction(Function):
@staticmethod
def symbolic(g, input, target, gamma, alpha, weight, reduction):
return g.op(
'mmcv::MMCVSigmoidFocalLoss',
input,
target,
gamma_f=gamma,
alpha_f=alpha,
weight_f=weight,
reduction_s=reduction)
@staticmethod
def forward(ctx,
input,
target,
gamma=2.0,
alpha=0.25,
weight=None,
reduction='mean'):
assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
assert input.dim() == 2
assert target.dim() == 1
assert input.size(0) == target.size(0)
if weight is None:
weight = input.new_empty(0)
else:
assert weight.dim() == 1
assert input.size(1) == weight.size(0)
ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
assert reduction in ctx.reduction_dict.keys()
ctx.gamma = float(gamma)
ctx.alpha = float(alpha)
ctx.reduction = ctx.reduction_dict[reduction]
output = input.new_zeros(input.size())
ext_module.sigmoid_focal_loss_forward(
input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha)
if ctx.reduction == ctx.reduction_dict['mean']:
output = output.sum() / input.size(0)
elif ctx.reduction == ctx.reduction_dict['sum']:
output = output.sum()
ctx.save_for_backward(input, target, weight)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, target, weight = ctx.saved_tensors
grad_input = input.new_zeros(input.size())
ext_module.sigmoid_focal_loss_backward(
input,
target,
weight,
grad_input,
gamma=ctx.gamma,
alpha=ctx.alpha)
grad_input *= grad_output
if ctx.reduction == ctx.reduction_dict['mean']:
grad_input /= input.size(0)
return grad_input, None, None, None, None, None
sigmoid_focal_loss = SigmoidFocalLossFunction.apply
class SigmoidFocalLoss(nn.Module):
def __init__(self, gamma, alpha, weight=None, reduction='mean'):
super(SigmoidFocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
self.register_buffer('weight', weight)
self.reduction = reduction
def forward(self, input, target):
return sigmoid_focal_loss(input, target, self.gamma, self.alpha,
self.weight, self.reduction)
def __repr__(self):
s = self.__class__.__name__
s += f'(gamma={self.gamma}, '
s += f'alpha={self.alpha}, '
s += f'reduction={self.reduction})'
return s
class SoftmaxFocalLossFunction(Function):
@staticmethod
def symbolic(g, input, target, gamma, alpha, weight, reduction):
return g.op(
'mmcv::MMCVSoftmaxFocalLoss',
input,
target,
gamma_f=gamma,
alpha_f=alpha,
weight_f=weight,
reduction_s=reduction)
@staticmethod
def forward(ctx,
input,
target,
gamma=2.0,
alpha=0.25,
weight=None,
reduction='mean'):
assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
assert input.dim() == 2
assert target.dim() == 1
assert input.size(0) == target.size(0)
if weight is None:
weight = input.new_empty(0)
else:
assert weight.dim() == 1
assert input.size(1) == weight.size(0)
ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
assert reduction in ctx.reduction_dict.keys()
ctx.gamma = float(gamma)
ctx.alpha = float(alpha)
ctx.reduction = ctx.reduction_dict[reduction]
channel_stats, _ = torch.max(input, dim=1)
input_softmax = input - channel_stats.unsqueeze(1).expand_as(input)
input_softmax.exp_()
channel_stats = input_softmax.sum(dim=1)
input_softmax /= channel_stats.unsqueeze(1).expand_as(input)
output = input.new_zeros(input.size(0))
ext_module.softmax_focal_loss_forward(
input_softmax,
target,
weight,
output,
gamma=ctx.gamma,
alpha=ctx.alpha)
if ctx.reduction == ctx.reduction_dict['mean']:
output = output.sum() / input.size(0)
elif ctx.reduction == ctx.reduction_dict['sum']:
output = output.sum()
ctx.save_for_backward(input_softmax, target, weight)
return output
@staticmethod
def backward(ctx, grad_output):
input_softmax, target, weight = ctx.saved_tensors
buff = input_softmax.new_zeros(input_softmax.size(0))
grad_input = input_softmax.new_zeros(input_softmax.size())
ext_module.softmax_focal_loss_backward(
input_softmax,
target,
weight,
buff,
grad_input,
gamma=ctx.gamma,
alpha=ctx.alpha)
grad_input *= grad_output
if ctx.reduction == ctx.reduction_dict['mean']:
grad_input /= input_softmax.size(0)
return grad_input, None, None, None, None, None
softmax_focal_loss = SoftmaxFocalLossFunction.apply
class SoftmaxFocalLoss(nn.Module):
def __init__(self, gamma, alpha, weight=None, reduction='mean'):
super(SoftmaxFocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
self.register_buffer('weight', weight)
self.reduction = reduction
def forward(self, input, target):
return softmax_focal_loss(input, target, self.gamma, self.alpha,
self.weight, self.reduction)
def __repr__(self):
s = self.__class__.__name__
s += f'(gamma={self.gamma}, '
s += f'alpha={self.alpha}, '
s += f'reduction={self.reduction})'
return s
import torch
from torch.autograd import Function
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', [
'furthest_point_sampling_forward',
'furthest_point_sampling_with_dist_forward'
])
class FurthestPointSampling(Function):
"""Uses iterative furthest point sampling to select a set of features whose
corresponding points have the furthest distance."""
@staticmethod
def forward(ctx, points_xyz: torch.Tensor,
num_points: int) -> torch.Tensor:
"""
Args:
points_xyz (Tensor): (B, N, 3) where N > num_points.
num_points (int): Number of points in the sampled set.
Returns:
Tensor: (B, num_points) indices of the sampled points.
"""
assert points_xyz.is_contiguous()
B, N = points_xyz.size()[:2]
output = torch.cuda.IntTensor(B, num_points)
temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
ext_module.furthest_point_sampling_forward(
points_xyz,
temp,
output,
b=B,
n=N,
m=num_points,
)
if torch.__version__ != 'parrots':
ctx.mark_non_differentiable(output)
return output
@staticmethod
def backward(xyz, a=None):
return None, None
class FurthestPointSamplingWithDist(Function):
"""Uses iterative furthest point sampling to select a set of features whose
corresponding points have the furthest distance."""
@staticmethod
def forward(ctx, points_dist: torch.Tensor,
num_points: int) -> torch.Tensor:
"""
Args:
points_dist (Tensor): (B, N, N) Distance between each point pair.
num_points (int): Number of points in the sampled set.
Returns:
Tensor: (B, num_points) indices of the sampled points.
"""
assert points_dist.is_contiguous()
B, N, _ = points_dist.size()
output = points_dist.new_zeros([B, num_points], dtype=torch.int32)
temp = points_dist.new_zeros([B, N]).fill_(1e10)
ext_module.furthest_point_sampling_with_dist_forward(
points_dist, temp, output, b=B, n=N, m=num_points)
if torch.__version__ != 'parrots':
ctx.mark_non_differentiable(output)
return output
@staticmethod
def backward(xyz, a=None):
return None, None
furthest_point_sample = FurthestPointSampling.apply
furthest_point_sample_with_dist = FurthestPointSamplingWithDist.apply
# modified from https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_act.py # noqa:E501
# Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
# Augmentation (ADA)
# =======================================================================
# 1. Definitions
# "Licensor" means any person or entity that distributes its Work.
# "Software" means the original work of authorship made available under
# this License.
# "Work" means the Software and any additions to or derivative works of
# the Software that are made available under this License.
# The terms "reproduce," "reproduction," "derivative works," and
# "distribution" have the meaning as provided under U.S. copyright law;
# provided, however, that for the purposes of this License, derivative
# works shall not include works that remain separable from, or merely
# link (or bind by name) to the interfaces of, the Work.
# Works, including the Software, are "made available" under this License
# by including in or with the Work either (a) a copyright notice
# referencing the applicability of this License to the Work, or (b) a
# copy of this License.
# 2. License Grants
# 2.1 Copyright Grant. Subject to the terms and conditions of this
# License, each Licensor grants to you a perpetual, worldwide,
# non-exclusive, royalty-free, copyright license to reproduce,
# prepare derivative works of, publicly display, publicly perform,
# sublicense and distribute its Work and any resulting derivative
# works in any form.
# 3. Limitations
# 3.1 Redistribution. You may reproduce or distribute the Work only
# if (a) you do so under this License, (b) you include a complete
# copy of this License with your distribution, and (c) you retain
# without modification any copyright, patent, trademark, or
# attribution notices that are present in the Work.
# 3.2 Derivative Works. You may specify that additional or different
# terms apply to the use, reproduction, and distribution of your
# derivative works of the Work ("Your Terms") only if (a) Your Terms
# provide that the use limitation in Section 3.3 applies to your
# derivative works, and (b) you identify the specific derivative
# works that are subject to Your Terms. Notwithstanding Your Terms,
# this License (including the redistribution requirements in Section
# 3.1) will continue to apply to the Work itself.
# 3.3 Use Limitation. The Work and any derivative works thereof only
# may be used or intended for use non-commercially. Notwithstanding
# the foregoing, NVIDIA and its affiliates may use the Work and any
# derivative works commercially. As used herein, "non-commercially"
# means for research or evaluation purposes only.
# 3.4 Patent Claims. If you bring or threaten to bring a patent claim
# against any Licensor (including any claim, cross-claim or
# counterclaim in a lawsuit) to enforce any patents that you allege
# are infringed by any Work, then your rights under this License from
# such Licensor (including the grant in Section 2.1) will terminate
# immediately.
# 3.5 Trademarks. This License does not grant any rights to use any
# Licensor’s or its affiliates’ names, logos, or trademarks, except
# as necessary to reproduce the notices described in this License.
# 3.6 Termination. If you violate any term of this License, then your
# rights under this License (including the grant in Section 2.1) will
# terminate immediately.
# 4. Disclaimer of Warranty.
# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
# THIS LICENSE.
# 5. Limitation of Liability.
# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
# THE POSSIBILITY OF SUCH DAMAGES.
# =======================================================================
import torch
import torch.nn.functional as F
from torch import nn
from torch.autograd import Function
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['fused_bias_leakyrelu'])
class FusedBiasLeakyReLUFunctionBackward(Function):
"""Calculate second order deviation.
This function is to compute the second order deviation for the fused leaky
relu operation.
"""
@staticmethod
def forward(ctx, grad_output, out, negative_slope, scale):
ctx.save_for_backward(out)
ctx.negative_slope = negative_slope
ctx.scale = scale
empty = grad_output.new_empty(0)
grad_input = ext_module.fused_bias_leakyrelu(
grad_output,
empty,
out,
act=3,
grad=1,
alpha=negative_slope,
scale=scale)
dim = [0]
if grad_input.ndim > 2:
dim += list(range(2, grad_input.ndim))
grad_bias = grad_input.sum(dim).detach()
return grad_input, grad_bias
@staticmethod
def backward(ctx, gradgrad_input, gradgrad_bias):
out, = ctx.saved_tensors
# The second order deviation, in fact, contains two parts, while the
# the first part is zero. Thus, we direct consider the second part
# which is similar with the first order deviation in implementation.
gradgrad_out = ext_module.fused_bias_leakyrelu(
gradgrad_input,
gradgrad_bias.to(out.dtype),
out,
act=3,
grad=1,
alpha=ctx.negative_slope,
scale=ctx.scale)
return gradgrad_out, None, None, None
class FusedBiasLeakyReLUFunction(Function):
@staticmethod
def forward(ctx, input, bias, negative_slope, scale):
empty = input.new_empty(0)
out = ext_module.fused_bias_leakyrelu(
input,
bias,
empty,
act=3,
grad=0,
alpha=negative_slope,
scale=scale)
ctx.save_for_backward(out)
ctx.negative_slope = negative_slope
ctx.scale = scale
return out
@staticmethod
def backward(ctx, grad_output):
out, = ctx.saved_tensors
grad_input, grad_bias = FusedBiasLeakyReLUFunctionBackward.apply(
grad_output, out, ctx.negative_slope, ctx.scale)
return grad_input, grad_bias, None, None
class FusedBiasLeakyReLU(nn.Module):
"""Fused bias leaky ReLU.
This function is introduced in the StyleGAN2:
http://arxiv.org/abs/1912.04958
The bias term comes from the convolution operation. In addition, to keep
the variance of the feature map or gradients unchanged, they also adopt a
scale similarly with Kaiming initialization. However, since the
:math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the
final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501
your own scale.
TODO: Implement the CPU version.
Args:
channel (int): The channel number of the feature map.
negative_slope (float, optional): Same as nn.LeakyRelu.
Defaults to 0.2.
scale (float, optional): A scalar to adjust the variance of the feature
map. Defaults to 2**0.5.
"""
def __init__(self, num_channels, negative_slope=0.2, scale=2**0.5):
super(FusedBiasLeakyReLU, self).__init__()
self.bias = nn.Parameter(torch.zeros(num_channels))
self.negative_slope = negative_slope
self.scale = scale
def forward(self, input):
return fused_bias_leakyrelu(input, self.bias, self.negative_slope,
self.scale)
def fused_bias_leakyrelu(input, bias, negative_slope=0.2, scale=2**0.5):
"""Fused bias leaky ReLU function.
This function is introduced in the StyleGAN2:
http://arxiv.org/abs/1912.04958
The bias term comes from the convolution operation. In addition, to keep
the variance of the feature map or gradients unchanged, they also adopt a
scale similarly with Kaiming initialization. However, since the
:math:`1+{alpha}^2` : is too small, we can just ignore it. Therefore, the
final scale is just :math:`\sqrt{2}`:. Of course, you may change it with # noqa: W605, E501
your own scale.
Args:
input (torch.Tensor): Input feature map.
bias (nn.Parameter): The bias from convolution operation.
negative_slope (float, optional): Same as nn.LeakyRelu.
Defaults to 0.2.
scale (float, optional): A scalar to adjust the variance of the feature
map. Defaults to 2**0.5.
Returns:
torch.Tensor: Feature map after non-linear activation.
"""
if not input.is_cuda:
return bias_leakyrelu_ref(input, bias, negative_slope, scale)
return FusedBiasLeakyReLUFunction.apply(input, bias.to(input.dtype),
negative_slope, scale)
def bias_leakyrelu_ref(x, bias, negative_slope=0.2, scale=2**0.5):
if bias is not None:
assert bias.ndim == 1
assert bias.shape[0] == x.shape[1]
x = x + bias.reshape([-1 if i == 1 else 1 for i in range(x.ndim)])
x = F.leaky_relu(x, negative_slope)
if scale != 1:
x = x * scale
return x
import torch
from torch.autograd import Function
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext', ['gather_points_forward', 'gather_points_backward'])
class GatherPoints(Function):
"""Gather points with given index."""
@staticmethod
def forward(ctx, features: torch.Tensor,
indices: torch.Tensor) -> torch.Tensor:
"""
Args:
features (Tensor): (B, C, N) features to gather.
indices (Tensor): (B, M) where M is the number of points.
Returns:
Tensor: (B, C, M) where M is the number of points.
"""
assert features.is_contiguous()
assert indices.is_contiguous()
B, npoint = indices.size()
_, C, N = features.size()
output = torch.cuda.FloatTensor(B, C, npoint)
ext_module.gather_points_forward(
features, indices, output, b=B, c=C, n=N, npoints=npoint)
ctx.for_backwards = (indices, C, N)
if torch.__version__ != 'parrots':
ctx.mark_non_differentiable(indices)
return output
@staticmethod
def backward(ctx, grad_out):
idx, C, N = ctx.for_backwards
B, npoint = idx.size()
grad_features = torch.cuda.FloatTensor(B, C, N).zero_()
grad_out_data = grad_out.data.contiguous()
ext_module.gather_points_backward(
grad_out_data,
idx,
grad_features.data,
b=B,
c=C,
n=N,
npoints=npoint)
return grad_features, None
gather_points = GatherPoints.apply
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple
import torch
from torch import nn as nn
from torch.autograd import Function
from ..utils import ext_loader
from .ball_query import ball_query
from .knn import knn
ext_module = ext_loader.load_ext(
'_ext', ['group_points_forward', 'group_points_backward'])
class QueryAndGroup(nn.Module):
"""Groups points with a ball query of radius.
Args:
max_radius (float): The maximum radius of the balls.
If None is given, we will use kNN sampling instead of ball query.
sample_num (int): Maximum number of features to gather in the ball.
min_radius (float, optional): The minimum radius of the balls.
Default: 0.
use_xyz (bool, optional): Whether to use xyz.
Default: True.
return_grouped_xyz (bool, optional): Whether to return grouped xyz.
Default: False.
normalize_xyz (bool, optional): Whether to normalize xyz.
Default: False.
uniform_sample (bool, optional): Whether to sample uniformly.
Default: False
return_unique_cnt (bool, optional): Whether to return the count of
unique samples. Default: False.
return_grouped_idx (bool, optional): Whether to return grouped idx.
Default: False.
"""
def __init__(self,
max_radius,
sample_num,
min_radius=0,
use_xyz=True,
return_grouped_xyz=False,
normalize_xyz=False,
uniform_sample=False,
return_unique_cnt=False,
return_grouped_idx=False):
super().__init__()
self.max_radius = max_radius
self.min_radius = min_radius
self.sample_num = sample_num
self.use_xyz = use_xyz
self.return_grouped_xyz = return_grouped_xyz
self.normalize_xyz = normalize_xyz
self.uniform_sample = uniform_sample
self.return_unique_cnt = return_unique_cnt
self.return_grouped_idx = return_grouped_idx
if self.return_unique_cnt:
assert self.uniform_sample, \
'uniform_sample should be True when ' \
'returning the count of unique samples'
if self.max_radius is None:
assert not self.normalize_xyz, \
'can not normalize grouped xyz when max_radius is None'
def forward(self, points_xyz, center_xyz, features=None):
"""
Args:
points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
center_xyz (Tensor): (B, npoint, 3) coordinates of the centriods.
features (Tensor): (B, C, N) Descriptors of the features.
Returns:
Tensor: (B, 3 + C, npoint, sample_num) Grouped feature.
"""
# if self.max_radius is None, we will perform kNN instead of ball query
# idx is of shape [B, npoint, sample_num]
if self.max_radius is None:
idx = knn(self.sample_num, points_xyz, center_xyz, False)
idx = idx.transpose(1, 2).contiguous()
else:
idx = ball_query(self.min_radius, self.max_radius, self.sample_num,
points_xyz, center_xyz)
if self.uniform_sample:
unique_cnt = torch.zeros((idx.shape[0], idx.shape[1]))
for i_batch in range(idx.shape[0]):
for i_region in range(idx.shape[1]):
unique_ind = torch.unique(idx[i_batch, i_region, :])
num_unique = unique_ind.shape[0]
unique_cnt[i_batch, i_region] = num_unique
sample_ind = torch.randint(
0,
num_unique, (self.sample_num - num_unique, ),
dtype=torch.long)
all_ind = torch.cat((unique_ind, unique_ind[sample_ind]))
idx[i_batch, i_region, :] = all_ind
xyz_trans = points_xyz.transpose(1, 2).contiguous()
# (B, 3, npoint, sample_num)
grouped_xyz = grouping_operation(xyz_trans, idx)
grouped_xyz_diff = grouped_xyz - \
center_xyz.transpose(1, 2).unsqueeze(-1) # relative offsets
if self.normalize_xyz:
grouped_xyz_diff /= self.max_radius
if features is not None:
grouped_features = grouping_operation(features, idx)
if self.use_xyz:
# (B, C + 3, npoint, sample_num)
new_features = torch.cat([grouped_xyz_diff, grouped_features],
dim=1)
else:
new_features = grouped_features
else:
assert (self.use_xyz
), 'Cannot have not features and not use xyz as a feature!'
new_features = grouped_xyz_diff
ret = [new_features]
if self.return_grouped_xyz:
ret.append(grouped_xyz)
if self.return_unique_cnt:
ret.append(unique_cnt)
if self.return_grouped_idx:
ret.append(idx)
if len(ret) == 1:
return ret[0]
else:
return tuple(ret)
class GroupAll(nn.Module):
"""Group xyz with feature.
Args:
use_xyz (bool): Whether to use xyz.
"""
def __init__(self, use_xyz: bool = True):
super().__init__()
self.use_xyz = use_xyz
def forward(self,
xyz: torch.Tensor,
new_xyz: torch.Tensor,
features: torch.Tensor = None):
"""
Args:
xyz (Tensor): (B, N, 3) xyz coordinates of the features.
new_xyz (Tensor): new xyz coordinates of the features.
features (Tensor): (B, C, N) features to group.
Returns:
Tensor: (B, C + 3, 1, N) Grouped feature.
"""
grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
if features is not None:
grouped_features = features.unsqueeze(2)
if self.use_xyz:
# (B, 3 + C, 1, N)
new_features = torch.cat([grouped_xyz, grouped_features],
dim=1)
else:
new_features = grouped_features
else:
new_features = grouped_xyz
return new_features
class GroupingOperation(Function):
"""Group feature with given index."""
@staticmethod
def forward(ctx, features: torch.Tensor,
indices: torch.Tensor) -> torch.Tensor:
"""
Args:
features (Tensor): (B, C, N) tensor of features to group.
indices (Tensor): (B, npoint, nsample) the indices of
features to group with.
Returns:
Tensor: (B, C, npoint, nsample) Grouped features.
"""
features = features.contiguous()
indices = indices.contiguous()
B, nfeatures, nsample = indices.size()
_, C, N = features.size()
output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)
ext_module.group_points_forward(B, C, N, nfeatures, nsample, features,
indices, output)
ctx.for_backwards = (indices, N)
return output
@staticmethod
def backward(ctx,
grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Args:
grad_out (Tensor): (B, C, npoint, nsample) tensor of the gradients
of the output from forward.
Returns:
Tensor: (B, C, N) gradient of the features.
"""
idx, N = ctx.for_backwards
B, C, npoint, nsample = grad_out.size()
grad_features = torch.cuda.FloatTensor(B, C, N).zero_()
grad_out_data = grad_out.data.contiguous()
ext_module.group_points_backward(B, C, N, npoint, nsample,
grad_out_data, idx,
grad_features.data)
return grad_features, None
grouping_operation = GroupingOperation.apply
# Copyright (c) OpenMMLab. All rights reserved.
import glob
import os
import torch
if torch.__version__ == 'parrots':
import parrots
def get_compiler_version():
return 'GCC ' + parrots.version.compiler
def get_compiling_cuda_version():
return parrots.version.cuda
else:
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext', ['get_compiler_version', 'get_compiling_cuda_version'])
def get_compiler_version():
return ext_module.get_compiler_version()
def get_compiling_cuda_version():
return ext_module.get_compiling_cuda_version()
def get_onnxruntime_op_path():
wildcard = os.path.join(
os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
'_ext_ort.*.so')
paths = glob.glob(wildcard)
if len(paths) > 0:
return paths[0]
else:
return ''
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', [
'iou3d_boxes_iou_bev_forward', 'iou3d_nms_forward',
'iou3d_nms_normal_forward'
])
def boxes_iou_bev(boxes_a, boxes_b):
"""Calculate boxes IoU in the Bird's Eye View.
Args:
boxes_a (torch.Tensor): Input boxes a with shape (M, 5).
boxes_b (torch.Tensor): Input boxes b with shape (N, 5).
Returns:
ans_iou (torch.Tensor): IoU result with shape (M, N).
"""
ans_iou = boxes_a.new_zeros(
torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
ext_module.iou3d_boxes_iou_bev_forward(boxes_a.contiguous(),
boxes_b.contiguous(), ans_iou)
return ans_iou
def nms_bev(boxes, scores, thresh, pre_max_size=None, post_max_size=None):
"""NMS function GPU implementation (for BEV boxes). The overlap of two
boxes for IoU calculation is defined as the exact overlapping area of the
two boxes. In this function, one can also set ``pre_max_size`` and
``post_max_size``.
Args:
boxes (torch.Tensor): Input boxes with the shape of [N, 5]
([x1, y1, x2, y2, ry]).
scores (torch.Tensor): Scores of boxes with the shape of [N].
thresh (float): Overlap threshold of NMS.
pre_max_size (int, optional): Max size of boxes before NMS.
Default: None.
post_max_size (int, optional): Max size of boxes after NMS.
Default: None.
Returns:
torch.Tensor: Indexes after NMS.
"""
assert boxes.size(1) == 5, 'Input boxes shape should be [N, 5]'
order = scores.sort(0, descending=True)[1]
if pre_max_size is not None:
order = order[:pre_max_size]
boxes = boxes[order].contiguous()
keep = torch.zeros(boxes.size(0), dtype=torch.long)
num_out = ext_module.iou3d_nms_forward(boxes, keep, thresh)
keep = order[keep[:num_out].cuda(boxes.device)].contiguous()
if post_max_size is not None:
keep = keep[:post_max_size]
return keep
def nms_normal_bev(boxes, scores, thresh):
"""Normal NMS function GPU implementation (for BEV boxes). The overlap of
two boxes for IoU calculation is defined as the exact overlapping area of
the two boxes WITH their yaw angle set to 0.
Args:
boxes (torch.Tensor): Input boxes with shape (N, 5).
scores (torch.Tensor): Scores of predicted boxes with shape (N).
thresh (float): Overlap threshold of NMS.
Returns:
torch.Tensor: Remaining indices with scores in descending order.
"""
assert boxes.shape[1] == 5, 'Input boxes shape should be [N, 5]'
order = scores.sort(0, descending=True)[1]
boxes = boxes[order].contiguous()
keep = torch.zeros(boxes.size(0), dtype=torch.long)
num_out = ext_module.iou3d_nms_normal_forward(boxes, keep, thresh)
return order[keep[:num_out].cuda(boxes.device)].contiguous()
import torch
from torch.autograd import Function
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['knn_forward'])
class KNN(Function):
r"""KNN (CUDA) based on heap data structure.
Modified from `PAConv <https://github.com/CVMI-Lab/PAConv/tree/main/
scene_seg/lib/pointops/src/knnquery_heap>`_.
Find k-nearest points.
"""
@staticmethod
def forward(ctx,
k: int,
xyz: torch.Tensor,
center_xyz: torch.Tensor = None,
transposed: bool = False) -> torch.Tensor:
"""
Args:
k (int): number of nearest neighbors.
xyz (Tensor): (B, N, 3) if transposed == False, else (B, 3, N).
xyz coordinates of the features.
center_xyz (Tensor, optional): (B, npoint, 3) if transposed ==
False, else (B, 3, npoint). centers of the knn query.
Default: None.
transposed (bool, optional): whether the input tensors are
transposed. Should not explicitly use this keyword when
calling knn (=KNN.apply), just add the fourth param.
Default: False.
Returns:
Tensor: (B, k, npoint) tensor with the indices of
the features that form k-nearest neighbours.
"""
assert (k > 0) & (k < 100), 'k should be in range(0, 100)'
if center_xyz is None:
center_xyz = xyz
if transposed:
xyz = xyz.transpose(2, 1).contiguous()
center_xyz = center_xyz.transpose(2, 1).contiguous()
assert xyz.is_contiguous() # [B, N, 3]
assert center_xyz.is_contiguous() # [B, npoint, 3]
center_xyz_device = center_xyz.get_device()
assert center_xyz_device == xyz.get_device(), \
'center_xyz and xyz should be put on the same device'
if torch.cuda.current_device() != center_xyz_device:
torch.cuda.set_device(center_xyz_device)
B, npoint, _ = center_xyz.shape
N = xyz.shape[1]
idx = center_xyz.new_zeros((B, npoint, k)).int()
dist2 = center_xyz.new_zeros((B, npoint, k)).float()
ext_module.knn_forward(
xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k)
# idx shape to [B, k, npoint]
idx = idx.transpose(2, 1).contiguous()
if torch.__version__ != 'parrots':
ctx.mark_non_differentiable(idx)
return idx
@staticmethod
def backward(ctx, a=None):
return None, None, None
knn = KNN.apply
# Copyright (c) OpenMMLab. All rights reserved.
import math
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext', ['masked_im2col_forward', 'masked_col2im_forward'])
class MaskedConv2dFunction(Function):
@staticmethod
def symbolic(g, features, mask, weight, bias, padding, stride):
return g.op(
'mmcv::MMCVMaskedConv2d',
features,
mask,
weight,
bias,
padding_i=padding,
stride_i=stride)
@staticmethod
def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
assert mask.dim() == 3 and mask.size(0) == 1
assert features.dim() == 4 and features.size(0) == 1
assert features.size()[2:] == mask.size()[1:]
pad_h, pad_w = _pair(padding)
stride_h, stride_w = _pair(stride)
if stride_h != 1 or stride_w != 1:
raise ValueError(
'Stride could not only be 1 in masked_conv2d currently.')
out_channel, in_channel, kernel_h, kernel_w = weight.size()
batch_size = features.size(0)
out_h = int(
math.floor((features.size(2) + 2 * pad_h -
(kernel_h - 1) - 1) / stride_h + 1))
out_w = int(
math.floor((features.size(3) + 2 * pad_w -
(kernel_h - 1) - 1) / stride_w + 1))
mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False)
output = features.new_zeros(batch_size, out_channel, out_h, out_w)
if mask_inds.numel() > 0:
mask_h_idx = mask_inds[:, 0].contiguous()
mask_w_idx = mask_inds[:, 1].contiguous()
data_col = features.new_zeros(in_channel * kernel_h * kernel_w,
mask_inds.size(0))
ext_module.masked_im2col_forward(
features,
mask_h_idx,
mask_w_idx,
data_col,
kernel_h=kernel_h,
kernel_w=kernel_w,
pad_h=pad_h,
pad_w=pad_w)
masked_output = torch.addmm(1, bias[:, None], 1,
weight.view(out_channel, -1), data_col)
ext_module.masked_col2im_forward(
masked_output,
mask_h_idx,
mask_w_idx,
output,
height=out_h,
width=out_w,
channels=out_channel)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
return (None, ) * 5
masked_conv2d = MaskedConv2dFunction.apply
class MaskedConv2d(nn.Conv2d):
"""A MaskedConv2d which inherits the official Conv2d.
The masked forward doesn't implement the backward function and only
supports the stride parameter to be 1 currently.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True):
super(MaskedConv2d,
self).__init__(in_channels, out_channels, kernel_size, stride,
padding, dilation, groups, bias)
def forward(self, input, mask=None):
if mask is None: # fallback to the normal Conv2d
return super(MaskedConv2d, self).forward(input)
else:
return masked_conv2d(input, mask, self.weight, self.bias,
self.padding)
# Copyright (c) OpenMMLab. All rights reserved.
from abc import abstractmethod
import torch
import torch.nn as nn
import torch.nn.functional as F
from ..cnn import ConvModule
class BaseMergeCell(nn.Module):
"""The basic class for cells used in NAS-FPN and NAS-FCOS.
BaseMergeCell takes 2 inputs. After applying convolution
on them, they are resized to the target size. Then,
they go through binary_op, which depends on the type of cell.
If with_out_conv is True, the result of output will go through
another convolution layer.
Args:
in_channels (int): number of input channels in out_conv layer.
out_channels (int): number of output channels in out_conv layer.
with_out_conv (bool): Whether to use out_conv layer
out_conv_cfg (dict): Config dict for convolution layer, which should
contain "groups", "kernel_size", "padding", "bias" to build
out_conv layer.
out_norm_cfg (dict): Config dict for normalization layer in out_conv.
out_conv_order (tuple): The order of conv/norm/activation layers in
out_conv.
with_input1_conv (bool): Whether to use convolution on input1.
with_input2_conv (bool): Whether to use convolution on input2.
input_conv_cfg (dict): Config dict for building input1_conv layer and
input2_conv layer, which is expected to contain the type of
convolution.
Default: None, which means using conv2d.
input_norm_cfg (dict): Config dict for normalization layer in
input1_conv and input2_conv layer. Default: None.
upsample_mode (str): Interpolation method used to resize the output
of input1_conv and input2_conv to target size. Currently, we
support ['nearest', 'bilinear']. Default: 'nearest'.
"""
def __init__(self,
fused_channels=256,
out_channels=256,
with_out_conv=True,
out_conv_cfg=dict(
groups=1, kernel_size=3, padding=1, bias=True),
out_norm_cfg=None,
out_conv_order=('act', 'conv', 'norm'),
with_input1_conv=False,
with_input2_conv=False,
input_conv_cfg=None,
input_norm_cfg=None,
upsample_mode='nearest'):
super(BaseMergeCell, self).__init__()
assert upsample_mode in ['nearest', 'bilinear']
self.with_out_conv = with_out_conv
self.with_input1_conv = with_input1_conv
self.with_input2_conv = with_input2_conv
self.upsample_mode = upsample_mode
if self.with_out_conv:
self.out_conv = ConvModule(
fused_channels,
out_channels,
**out_conv_cfg,
norm_cfg=out_norm_cfg,
order=out_conv_order)
self.input1_conv = self._build_input_conv(
out_channels, input_conv_cfg,
input_norm_cfg) if with_input1_conv else nn.Sequential()
self.input2_conv = self._build_input_conv(
out_channels, input_conv_cfg,
input_norm_cfg) if with_input2_conv else nn.Sequential()
def _build_input_conv(self, channel, conv_cfg, norm_cfg):
return ConvModule(
channel,
channel,
3,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
bias=True)
@abstractmethod
def _binary_op(self, x1, x2):
pass
def _resize(self, x, size):
if x.shape[-2:] == size:
return x
elif x.shape[-2:] < size:
return F.interpolate(x, size=size, mode=self.upsample_mode)
else:
assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0
kernel_size = x.shape[-1] // size[-1]
x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size)
return x
def forward(self, x1, x2, out_size=None):
assert x1.shape[:2] == x2.shape[:2]
assert out_size is None or len(out_size) == 2
if out_size is None: # resize to larger one
out_size = max(x1.size()[2:], x2.size()[2:])
x1 = self.input1_conv(x1)
x2 = self.input2_conv(x2)
x1 = self._resize(x1, out_size)
x2 = self._resize(x2, out_size)
x = self._binary_op(x1, x2)
if self.with_out_conv:
x = self.out_conv(x)
return x
class SumCell(BaseMergeCell):
def __init__(self, in_channels, out_channels, **kwargs):
super(SumCell, self).__init__(in_channels, out_channels, **kwargs)
def _binary_op(self, x1, x2):
return x1 + x2
class ConcatCell(BaseMergeCell):
def __init__(self, in_channels, out_channels, **kwargs):
super(ConcatCell, self).__init__(in_channels * 2, out_channels,
**kwargs)
def _binary_op(self, x1, x2):
ret = torch.cat([x1, x2], dim=1)
return ret
class GlobalPoolingCell(BaseMergeCell):
def __init__(self, in_channels=None, out_channels=None, **kwargs):
super().__init__(in_channels, out_channels, **kwargs)
self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
def _binary_op(self, x1, x2):
x2_att = self.global_pool(x2).sigmoid()
return x2 + x2_att * x1
# Copyright (c) OpenMMLab. All rights reserved.
import math
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair, _single
from annotator.uniformer.mmcv.utils import deprecated_api_warning
from ..cnn import CONV_LAYERS
from ..utils import ext_loader, print_log
ext_module = ext_loader.load_ext(
'_ext',
['modulated_deform_conv_forward', 'modulated_deform_conv_backward'])
class ModulatedDeformConv2dFunction(Function):
@staticmethod
def symbolic(g, input, offset, mask, weight, bias, stride, padding,
dilation, groups, deform_groups):
input_tensors = [input, offset, mask, weight]
if bias is not None:
input_tensors.append(bias)
return g.op(
'mmcv::MMCVModulatedDeformConv2d',
*input_tensors,
stride_i=stride,
padding_i=padding,
dilation_i=dilation,
groups_i=groups,
deform_groups_i=deform_groups)
@staticmethod
def forward(ctx,
input,
offset,
mask,
weight,
bias=None,
stride=1,
padding=0,
dilation=1,
groups=1,
deform_groups=1):
if input is not None and input.dim() != 4:
raise ValueError(
f'Expected 4D tensor as input, got {input.dim()}D tensor \
instead.')
ctx.stride = _pair(stride)
ctx.padding = _pair(padding)
ctx.dilation = _pair(dilation)
ctx.groups = groups
ctx.deform_groups = deform_groups
ctx.with_bias = bias is not None
if not ctx.with_bias:
bias = input.new_empty(0) # fake tensor
# When pytorch version >= 1.6.0, amp is adopted for fp16 mode;
# amp won't cast the type of model (float32), but "offset" is cast
# to float16 by nn.Conv2d automatically, leading to the type
# mismatch with input (when it is float32) or weight.
# The flag for whether to use fp16 or amp is the type of "offset",
# we cast weight and input to temporarily support fp16 and amp
# whatever the pytorch version is.
input = input.type_as(offset)
weight = weight.type_as(input)
ctx.save_for_backward(input, offset, mask, weight, bias)
output = input.new_empty(
ModulatedDeformConv2dFunction._output_size(ctx, input, weight))
ctx._bufs = [input.new_empty(0), input.new_empty(0)]
ext_module.modulated_deform_conv_forward(
input,
weight,
bias,
ctx._bufs[0],
offset,
mask,
output,
ctx._bufs[1],
kernel_h=weight.size(2),
kernel_w=weight.size(3),
stride_h=ctx.stride[0],
stride_w=ctx.stride[1],
pad_h=ctx.padding[0],
pad_w=ctx.padding[1],
dilation_h=ctx.dilation[0],
dilation_w=ctx.dilation[1],
group=ctx.groups,
deformable_group=ctx.deform_groups,
with_bias=ctx.with_bias)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, offset, mask, weight, bias = ctx.saved_tensors
grad_input = torch.zeros_like(input)
grad_offset = torch.zeros_like(offset)
grad_mask = torch.zeros_like(mask)
grad_weight = torch.zeros_like(weight)
grad_bias = torch.zeros_like(bias)
grad_output = grad_output.contiguous()
ext_module.modulated_deform_conv_backward(
input,
weight,
bias,
ctx._bufs[0],
offset,
mask,
ctx._bufs[1],
grad_input,
grad_weight,
grad_bias,
grad_offset,
grad_mask,
grad_output,
kernel_h=weight.size(2),
kernel_w=weight.size(3),
stride_h=ctx.stride[0],
stride_w=ctx.stride[1],
pad_h=ctx.padding[0],
pad_w=ctx.padding[1],
dilation_h=ctx.dilation[0],
dilation_w=ctx.dilation[1],
group=ctx.groups,
deformable_group=ctx.deform_groups,
with_bias=ctx.with_bias)
if not ctx.with_bias:
grad_bias = None
return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias,
None, None, None, None, None)
@staticmethod
def _output_size(ctx, input, weight):
channels = weight.size(0)
output_size = (input.size(0), channels)
for d in range(input.dim() - 2):
in_size = input.size(d + 2)
pad = ctx.padding[d]
kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1
stride_ = ctx.stride[d]
output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
if not all(map(lambda s: s > 0, output_size)):
raise ValueError(
'convolution input is too small (output would be ' +
'x'.join(map(str, output_size)) + ')')
return output_size
modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply
class ModulatedDeformConv2d(nn.Module):
@deprecated_api_warning({'deformable_groups': 'deform_groups'},
cls_name='ModulatedDeformConv2d')
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
deform_groups=1,
bias=True):
super(ModulatedDeformConv2d, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.stride = _pair(stride)
self.padding = _pair(padding)
self.dilation = _pair(dilation)
self.groups = groups
self.deform_groups = deform_groups
# enable compatibility with nn.Conv2d
self.transposed = False
self.output_padding = _single(0)
self.weight = nn.Parameter(
torch.Tensor(out_channels, in_channels // groups,
*self.kernel_size))
if bias:
self.bias = nn.Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias', None)
self.init_weights()
def init_weights(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.zero_()
def forward(self, x, offset, mask):
return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
self.stride, self.padding,
self.dilation, self.groups,
self.deform_groups)
@CONV_LAYERS.register_module('DCNv2')
class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
"""A ModulatedDeformable Conv Encapsulation that acts as normal Conv
layers.
Args:
in_channels (int): Same as nn.Conv2d.
out_channels (int): Same as nn.Conv2d.
kernel_size (int or tuple[int]): Same as nn.Conv2d.
stride (int): Same as nn.Conv2d, while tuple is not supported.
padding (int): Same as nn.Conv2d, while tuple is not supported.
dilation (int): Same as nn.Conv2d, while tuple is not supported.
groups (int): Same as nn.Conv2d.
bias (bool or str): If specified as `auto`, it will be decided by the
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
False.
"""
_version = 2
def __init__(self, *args, **kwargs):
super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs)
self.conv_offset = nn.Conv2d(
self.in_channels,
self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
bias=True)
self.init_weights()
def init_weights(self):
super(ModulatedDeformConv2dPack, self).init_weights()
if hasattr(self, 'conv_offset'):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x):
out = self.conv_offset(x)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
self.stride, self.padding,
self.dilation, self.groups,
self.deform_groups)
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs):
version = local_metadata.get('version', None)
if version is None or version < 2:
# the key is different in early versions
# In version < 2, ModulatedDeformConvPack
# loads previous benchmark models.
if (prefix + 'conv_offset.weight' not in state_dict
and prefix[:-1] + '_offset.weight' in state_dict):
state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
prefix[:-1] + '_offset.weight')
if (prefix + 'conv_offset.bias' not in state_dict
and prefix[:-1] + '_offset.bias' in state_dict):
state_dict[prefix +
'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
'_offset.bias')
if version is not None and version > 1:
print_log(
f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to '
'version 2.',
logger='root')
super()._load_from_state_dict(state_dict, prefix, local_metadata,
strict, missing_keys, unexpected_keys,
error_msgs)
# Copyright (c) OpenMMLab. All rights reserved.
import math
import warnings
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd.function import Function, once_differentiable
from annotator.uniformer.mmcv import deprecated_api_warning
from annotator.uniformer.mmcv.cnn import constant_init, xavier_init
from annotator.uniformer.mmcv.cnn.bricks.registry import ATTENTION
from annotator.uniformer.mmcv.runner import BaseModule
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext', ['ms_deform_attn_backward', 'ms_deform_attn_forward'])
class MultiScaleDeformableAttnFunction(Function):
@staticmethod
def forward(ctx, value, value_spatial_shapes, value_level_start_index,
sampling_locations, attention_weights, im2col_step):
"""GPU version of multi-scale deformable attention.
Args:
value (Tensor): The value has shape
(bs, num_keys, mum_heads, embed_dims//num_heads)
value_spatial_shapes (Tensor): Spatial shape of
each feature map, has shape (num_levels, 2),
last dimension 2 represent (h, w)
sampling_locations (Tensor): The location of sampling points,
has shape
(bs ,num_queries, num_heads, num_levels, num_points, 2),
the last dimension 2 represent (x, y).
attention_weights (Tensor): The weight of sampling points used
when calculate the attention, has shape
(bs ,num_queries, num_heads, num_levels, num_points),
im2col_step (Tensor): The step used in image to column.
Returns:
Tensor: has shape (bs, num_queries, embed_dims)
"""
ctx.im2col_step = im2col_step
output = ext_module.ms_deform_attn_forward(
value,
value_spatial_shapes,
value_level_start_index,
sampling_locations,
attention_weights,
im2col_step=ctx.im2col_step)
ctx.save_for_backward(value, value_spatial_shapes,
value_level_start_index, sampling_locations,
attention_weights)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
"""GPU version of backward function.
Args:
grad_output (Tensor): Gradient
of output tensor of forward.
Returns:
Tuple[Tensor]: Gradient
of input tensors in forward.
"""
value, value_spatial_shapes, value_level_start_index,\
sampling_locations, attention_weights = ctx.saved_tensors
grad_value = torch.zeros_like(value)
grad_sampling_loc = torch.zeros_like(sampling_locations)
grad_attn_weight = torch.zeros_like(attention_weights)
ext_module.ms_deform_attn_backward(
value,
value_spatial_shapes,
value_level_start_index,
sampling_locations,
attention_weights,
grad_output.contiguous(),
grad_value,
grad_sampling_loc,
grad_attn_weight,
im2col_step=ctx.im2col_step)
return grad_value, None, None, \
grad_sampling_loc, grad_attn_weight, None
def multi_scale_deformable_attn_pytorch(value, value_spatial_shapes,
sampling_locations, attention_weights):
"""CPU version of multi-scale deformable attention.
Args:
value (Tensor): The value has shape
(bs, num_keys, mum_heads, embed_dims//num_heads)
value_spatial_shapes (Tensor): Spatial shape of
each feature map, has shape (num_levels, 2),
last dimension 2 represent (h, w)
sampling_locations (Tensor): The location of sampling points,
has shape
(bs ,num_queries, num_heads, num_levels, num_points, 2),
the last dimension 2 represent (x, y).
attention_weights (Tensor): The weight of sampling points used
when calculate the attention, has shape
(bs ,num_queries, num_heads, num_levels, num_points),
Returns:
Tensor: has shape (bs, num_queries, embed_dims)
"""
bs, _, num_heads, embed_dims = value.shape
_, num_queries, num_heads, num_levels, num_points, _ =\
sampling_locations.shape
value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes],
dim=1)
sampling_grids = 2 * sampling_locations - 1
sampling_value_list = []
for level, (H_, W_) in enumerate(value_spatial_shapes):
# bs, H_*W_, num_heads, embed_dims ->
# bs, H_*W_, num_heads*embed_dims ->
# bs, num_heads*embed_dims, H_*W_ ->
# bs*num_heads, embed_dims, H_, W_
value_l_ = value_list[level].flatten(2).transpose(1, 2).reshape(
bs * num_heads, embed_dims, H_, W_)
# bs, num_queries, num_heads, num_points, 2 ->
# bs, num_heads, num_queries, num_points, 2 ->
# bs*num_heads, num_queries, num_points, 2
sampling_grid_l_ = sampling_grids[:, :, :,
level].transpose(1, 2).flatten(0, 1)
# bs*num_heads, embed_dims, num_queries, num_points
sampling_value_l_ = F.grid_sample(
value_l_,
sampling_grid_l_,
mode='bilinear',
padding_mode='zeros',
align_corners=False)
sampling_value_list.append(sampling_value_l_)
# (bs, num_queries, num_heads, num_levels, num_points) ->
# (bs, num_heads, num_queries, num_levels, num_points) ->
# (bs, num_heads, 1, num_queries, num_levels*num_points)
attention_weights = attention_weights.transpose(1, 2).reshape(
bs * num_heads, 1, num_queries, num_levels * num_points)
output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) *
attention_weights).sum(-1).view(bs, num_heads * embed_dims,
num_queries)
return output.transpose(1, 2).contiguous()
@ATTENTION.register_module()
class MultiScaleDeformableAttention(BaseModule):
"""An attention module used in Deformable-Detr.
`Deformable DETR: Deformable Transformers for End-to-End Object Detection.
<https://arxiv.org/pdf/2010.04159.pdf>`_.
Args:
embed_dims (int): The embedding dimension of Attention.
Default: 256.
num_heads (int): Parallel attention heads. Default: 64.
num_levels (int): The number of feature map used in
Attention. Default: 4.
num_points (int): The number of sampling points for
each query in each head. Default: 4.
im2col_step (int): The step used in image_to_column.
Default: 64.
dropout (float): A Dropout layer on `inp_identity`.
Default: 0.1.
batch_first (bool): Key, Query and Value are shape of
(batch, n, embed_dim)
or (n, batch, embed_dim). Default to False.
norm_cfg (dict): Config dict for normalization layer.
Default: None.
init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
Default: None.
"""
def __init__(self,
embed_dims=256,
num_heads=8,
num_levels=4,
num_points=4,
im2col_step=64,
dropout=0.1,
batch_first=False,
norm_cfg=None,
init_cfg=None):
super().__init__(init_cfg)
if embed_dims % num_heads != 0:
raise ValueError(f'embed_dims must be divisible by num_heads, '
f'but got {embed_dims} and {num_heads}')
dim_per_head = embed_dims // num_heads
self.norm_cfg = norm_cfg
self.dropout = nn.Dropout(dropout)
self.batch_first = batch_first
# you'd better set dim_per_head to a power of 2
# which is more efficient in the CUDA implementation
def _is_power_of_2(n):
if (not isinstance(n, int)) or (n < 0):
raise ValueError(
'invalid input for _is_power_of_2: {} (type: {})'.format(
n, type(n)))
return (n & (n - 1) == 0) and n != 0
if not _is_power_of_2(dim_per_head):
warnings.warn(
"You'd better set embed_dims in "
'MultiScaleDeformAttention to make '
'the dimension of each attention head a power of 2 '
'which is more efficient in our CUDA implementation.')
self.im2col_step = im2col_step
self.embed_dims = embed_dims
self.num_levels = num_levels
self.num_heads = num_heads
self.num_points = num_points
self.sampling_offsets = nn.Linear(
embed_dims, num_heads * num_levels * num_points * 2)
self.attention_weights = nn.Linear(embed_dims,
num_heads * num_levels * num_points)
self.value_proj = nn.Linear(embed_dims, embed_dims)
self.output_proj = nn.Linear(embed_dims, embed_dims)
self.init_weights()
def init_weights(self):
"""Default initialization for Parameters of Module."""
constant_init(self.sampling_offsets, 0.)
thetas = torch.arange(
self.num_heads,
dtype=torch.float32) * (2.0 * math.pi / self.num_heads)
grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
grid_init = (grid_init /
grid_init.abs().max(-1, keepdim=True)[0]).view(
self.num_heads, 1, 1,
2).repeat(1, self.num_levels, self.num_points, 1)
for i in range(self.num_points):
grid_init[:, :, i, :] *= i + 1
self.sampling_offsets.bias.data = grid_init.view(-1)
constant_init(self.attention_weights, val=0., bias=0.)
xavier_init(self.value_proj, distribution='uniform', bias=0.)
xavier_init(self.output_proj, distribution='uniform', bias=0.)
self._is_init = True
@deprecated_api_warning({'residual': 'identity'},
cls_name='MultiScaleDeformableAttention')
def forward(self,
query,
key=None,
value=None,
identity=None,
query_pos=None,
key_padding_mask=None,
reference_points=None,
spatial_shapes=None,
level_start_index=None,
**kwargs):
"""Forward Function of MultiScaleDeformAttention.
Args:
query (Tensor): Query of Transformer with shape
(num_query, bs, embed_dims).
key (Tensor): The key tensor with shape
`(num_key, bs, embed_dims)`.
value (Tensor): The value tensor with shape
`(num_key, bs, embed_dims)`.
identity (Tensor): The tensor used for addition, with the
same shape as `query`. Default None. If None,
`query` will be used.
query_pos (Tensor): The positional encoding for `query`.
Default: None.
key_pos (Tensor): The positional encoding for `key`. Default
None.
reference_points (Tensor): The normalized reference
points with shape (bs, num_query, num_levels, 2),
all elements is range in [0, 1], top-left (0,0),
bottom-right (1, 1), including padding area.
or (N, Length_{query}, num_levels, 4), add
additional two dimensions is (w, h) to
form reference boxes.
key_padding_mask (Tensor): ByteTensor for `query`, with
shape [bs, num_key].
spatial_shapes (Tensor): Spatial shape of features in
different levels. With shape (num_levels, 2),
last dimension represents (h, w).
level_start_index (Tensor): The start index of each level.
A tensor has shape ``(num_levels, )`` and can be represented
as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
Returns:
Tensor: forwarded results with shape [num_query, bs, embed_dims].
"""
if value is None:
value = query
if identity is None:
identity = query
if query_pos is not None:
query = query + query_pos
if not self.batch_first:
# change to (bs, num_query ,embed_dims)
query = query.permute(1, 0, 2)
value = value.permute(1, 0, 2)
bs, num_query, _ = query.shape
bs, num_value, _ = value.shape
assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
value = self.value_proj(value)
if key_padding_mask is not None:
value = value.masked_fill(key_padding_mask[..., None], 0.0)
value = value.view(bs, num_value, self.num_heads, -1)
sampling_offsets = self.sampling_offsets(query).view(
bs, num_query, self.num_heads, self.num_levels, self.num_points, 2)
attention_weights = self.attention_weights(query).view(
bs, num_query, self.num_heads, self.num_levels * self.num_points)
attention_weights = attention_weights.softmax(-1)
attention_weights = attention_weights.view(bs, num_query,
self.num_heads,
self.num_levels,
self.num_points)
if reference_points.shape[-1] == 2:
offset_normalizer = torch.stack(
[spatial_shapes[..., 1], spatial_shapes[..., 0]], -1)
sampling_locations = reference_points[:, :, None, :, None, :] \
+ sampling_offsets \
/ offset_normalizer[None, None, None, :, None, :]
elif reference_points.shape[-1] == 4:
sampling_locations = reference_points[:, :, None, :, None, :2] \
+ sampling_offsets / self.num_points \
* reference_points[:, :, None, :, None, 2:] \
* 0.5
else:
raise ValueError(
f'Last dim of reference_points must be'
f' 2 or 4, but get {reference_points.shape[-1]} instead.')
if torch.cuda.is_available() and value.is_cuda:
output = MultiScaleDeformableAttnFunction.apply(
value, spatial_shapes, level_start_index, sampling_locations,
attention_weights, self.im2col_step)
else:
output = multi_scale_deformable_attn_pytorch(
value, spatial_shapes, sampling_locations, attention_weights)
output = self.output_proj(output)
if not self.batch_first:
# (num_query, bs ,embed_dims)
output = output.permute(1, 0, 2)
return self.dropout(output) + identity
import os
import numpy as np
import torch
from annotator.uniformer.mmcv.utils import deprecated_api_warning
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext', ['nms', 'softnms', 'nms_match', 'nms_rotated'])
# This function is modified from: https://github.com/pytorch/vision/
class NMSop(torch.autograd.Function):
@staticmethod
def forward(ctx, bboxes, scores, iou_threshold, offset, score_threshold,
max_num):
is_filtering_by_score = score_threshold > 0
if is_filtering_by_score:
valid_mask = scores > score_threshold
bboxes, scores = bboxes[valid_mask], scores[valid_mask]
valid_inds = torch.nonzero(
valid_mask, as_tuple=False).squeeze(dim=1)
inds = ext_module.nms(
bboxes, scores, iou_threshold=float(iou_threshold), offset=offset)
if max_num > 0:
inds = inds[:max_num]
if is_filtering_by_score:
inds = valid_inds[inds]
return inds
@staticmethod
def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold,
max_num):
from ..onnx import is_custom_op_loaded
has_custom_op = is_custom_op_loaded()
# TensorRT nms plugin is aligned with original nms in ONNXRuntime
is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT'
if has_custom_op and (not is_trt_backend):
return g.op(
'mmcv::NonMaxSuppression',
bboxes,
scores,
iou_threshold_f=float(iou_threshold),
offset_i=int(offset))
else:
from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze
from ..onnx.onnx_utils.symbolic_helper import _size_helper
boxes = unsqueeze(g, bboxes, 0)
scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
if max_num > 0:
max_num = g.op(
'Constant',
value_t=torch.tensor(max_num, dtype=torch.long))
else:
dim = g.op('Constant', value_t=torch.tensor(0))
max_num = _size_helper(g, bboxes, dim)
max_output_per_class = max_num
iou_threshold = g.op(
'Constant',
value_t=torch.tensor([iou_threshold], dtype=torch.float))
score_threshold = g.op(
'Constant',
value_t=torch.tensor([score_threshold], dtype=torch.float))
nms_out = g.op('NonMaxSuppression', boxes, scores,
max_output_per_class, iou_threshold,
score_threshold)
return squeeze(
g,
select(
g, nms_out, 1,
g.op(
'Constant',
value_t=torch.tensor([2], dtype=torch.long))), 1)
class SoftNMSop(torch.autograd.Function):
@staticmethod
def forward(ctx, boxes, scores, iou_threshold, sigma, min_score, method,
offset):
dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
inds = ext_module.softnms(
boxes.cpu(),
scores.cpu(),
dets.cpu(),
iou_threshold=float(iou_threshold),
sigma=float(sigma),
min_score=float(min_score),
method=int(method),
offset=int(offset))
return dets, inds
@staticmethod
def symbolic(g, boxes, scores, iou_threshold, sigma, min_score, method,
offset):
from packaging import version
assert version.parse(torch.__version__) >= version.parse('1.7.0')
nms_out = g.op(
'mmcv::SoftNonMaxSuppression',
boxes,
scores,
iou_threshold_f=float(iou_threshold),
sigma_f=float(sigma),
min_score_f=float(min_score),
method_i=int(method),
offset_i=int(offset),
outputs=2)
return nms_out
@deprecated_api_warning({'iou_thr': 'iou_threshold'})
def nms(boxes, scores, iou_threshold, offset=0, score_threshold=0, max_num=-1):
"""Dispatch to either CPU or GPU NMS implementations.
The input can be either torch tensor or numpy array. GPU NMS will be used
if the input is gpu tensor, otherwise CPU NMS
will be used. The returned type will always be the same as inputs.
Arguments:
boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
scores (torch.Tensor or np.ndarray): scores in shape (N, ).
iou_threshold (float): IoU threshold for NMS.
offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
score_threshold (float): score threshold for NMS.
max_num (int): maximum number of boxes after NMS.
Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \
same data type as the input.
Example:
>>> boxes = np.array([[49.1, 32.4, 51.0, 35.9],
>>> [49.3, 32.9, 51.0, 35.3],
>>> [49.2, 31.8, 51.0, 35.4],
>>> [35.1, 11.5, 39.1, 15.7],
>>> [35.6, 11.8, 39.3, 14.2],
>>> [35.3, 11.5, 39.9, 14.5],
>>> [35.2, 11.7, 39.7, 15.7]], dtype=np.float32)
>>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\
dtype=np.float32)
>>> iou_threshold = 0.6
>>> dets, inds = nms(boxes, scores, iou_threshold)
>>> assert len(inds) == len(dets) == 3
"""
assert isinstance(boxes, (torch.Tensor, np.ndarray))
assert isinstance(scores, (torch.Tensor, np.ndarray))
is_numpy = False
if isinstance(boxes, np.ndarray):
is_numpy = True
boxes = torch.from_numpy(boxes)
if isinstance(scores, np.ndarray):
scores = torch.from_numpy(scores)
assert boxes.size(1) == 4
assert boxes.size(0) == scores.size(0)
assert offset in (0, 1)
if torch.__version__ == 'parrots':
indata_list = [boxes, scores]
indata_dict = {
'iou_threshold': float(iou_threshold),
'offset': int(offset)
}
inds = ext_module.nms(*indata_list, **indata_dict)
else:
inds = NMSop.apply(boxes, scores, iou_threshold, offset,
score_threshold, max_num)
dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)
if is_numpy:
dets = dets.cpu().numpy()
inds = inds.cpu().numpy()
return dets, inds
@deprecated_api_warning({'iou_thr': 'iou_threshold'})
def soft_nms(boxes,
scores,
iou_threshold=0.3,
sigma=0.5,
min_score=1e-3,
method='linear',
offset=0):
"""Dispatch to only CPU Soft NMS implementations.
The input can be either a torch tensor or numpy array.
The returned type will always be the same as inputs.
Arguments:
boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
scores (torch.Tensor or np.ndarray): scores in shape (N, ).
iou_threshold (float): IoU threshold for NMS.
sigma (float): hyperparameter for gaussian method
min_score (float): score filter threshold
method (str): either 'linear' or 'gaussian'
offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \
same data type as the input.
Example:
>>> boxes = np.array([[4., 3., 5., 3.],
>>> [4., 3., 5., 4.],
>>> [3., 1., 3., 1.],
>>> [3., 1., 3., 1.],
>>> [3., 1., 3., 1.],
>>> [3., 1., 3., 1.]], dtype=np.float32)
>>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32)
>>> iou_threshold = 0.6
>>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5)
>>> assert len(inds) == len(dets) == 5
"""
assert isinstance(boxes, (torch.Tensor, np.ndarray))
assert isinstance(scores, (torch.Tensor, np.ndarray))
is_numpy = False
if isinstance(boxes, np.ndarray):
is_numpy = True
boxes = torch.from_numpy(boxes)
if isinstance(scores, np.ndarray):
scores = torch.from_numpy(scores)
assert boxes.size(1) == 4
assert boxes.size(0) == scores.size(0)
assert offset in (0, 1)
method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2}
assert method in method_dict.keys()
if torch.__version__ == 'parrots':
dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
indata_list = [boxes.cpu(), scores.cpu(), dets.cpu()]
indata_dict = {
'iou_threshold': float(iou_threshold),
'sigma': float(sigma),
'min_score': min_score,
'method': method_dict[method],
'offset': int(offset)
}
inds = ext_module.softnms(*indata_list, **indata_dict)
else:
dets, inds = SoftNMSop.apply(boxes.cpu(), scores.cpu(),
float(iou_threshold), float(sigma),
float(min_score), method_dict[method],
int(offset))
dets = dets[:inds.size(0)]
if is_numpy:
dets = dets.cpu().numpy()
inds = inds.cpu().numpy()
return dets, inds
else:
return dets.to(device=boxes.device), inds.to(device=boxes.device)
def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
"""Performs non-maximum suppression in a batched fashion.
Modified from https://github.com/pytorch/vision/blob
/505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39.
In order to perform NMS independently per class, we add an offset to all
the boxes. The offset is dependent only on the class idx, and is large
enough so that boxes from different classes do not overlap.
Arguments:
boxes (torch.Tensor): boxes in shape (N, 4).
scores (torch.Tensor): scores in shape (N, ).
idxs (torch.Tensor): each index value correspond to a bbox cluster,
and NMS will not be applied between elements of different idxs,
shape (N, ).
nms_cfg (dict): specify nms type and other parameters like iou_thr.
Possible keys includes the following.
- iou_thr (float): IoU threshold used for NMS.
- split_thr (float): threshold number of boxes. In some cases the
number of boxes is large (e.g., 200k). To avoid OOM during
training, the users could set `split_thr` to a small value.
If the number of boxes is greater than the threshold, it will
perform NMS on each group of boxes separately and sequentially.
Defaults to 10000.
class_agnostic (bool): if true, nms is class agnostic,
i.e. IoU thresholding happens over all boxes,
regardless of the predicted class.
Returns:
tuple: kept dets and indice.
"""
nms_cfg_ = nms_cfg.copy()
class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic)
if class_agnostic:
boxes_for_nms = boxes
else:
max_coordinate = boxes.max()
offsets = idxs.to(boxes) * (max_coordinate + torch.tensor(1).to(boxes))
boxes_for_nms = boxes + offsets[:, None]
nms_type = nms_cfg_.pop('type', 'nms')
nms_op = eval(nms_type)
split_thr = nms_cfg_.pop('split_thr', 10000)
# Won't split to multiple nms nodes when exporting to onnx
if boxes_for_nms.shape[0] < split_thr or torch.onnx.is_in_onnx_export():
dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
boxes = boxes[keep]
# -1 indexing works abnormal in TensorRT
# This assumes `dets` has 5 dimensions where
# the last dimension is score.
# TODO: more elegant way to handle the dimension issue.
# Some type of nms would reweight the score, such as SoftNMS
scores = dets[:, 4]
else:
max_num = nms_cfg_.pop('max_num', -1)
total_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
# Some type of nms would reweight the score, such as SoftNMS
scores_after_nms = scores.new_zeros(scores.size())
for id in torch.unique(idxs):
mask = (idxs == id).nonzero(as_tuple=False).view(-1)
dets, keep = nms_op(boxes_for_nms[mask], scores[mask], **nms_cfg_)
total_mask[mask[keep]] = True
scores_after_nms[mask[keep]] = dets[:, -1]
keep = total_mask.nonzero(as_tuple=False).view(-1)
scores, inds = scores_after_nms[keep].sort(descending=True)
keep = keep[inds]
boxes = boxes[keep]
if max_num > 0:
keep = keep[:max_num]
boxes = boxes[:max_num]
scores = scores[:max_num]
return torch.cat([boxes, scores[:, None]], -1), keep
def nms_match(dets, iou_threshold):
"""Matched dets into different groups by NMS.
NMS match is Similar to NMS but when a bbox is suppressed, nms match will
record the indice of suppressed bbox and form a group with the indice of
kept bbox. In each group, indice is sorted as score order.
Arguments:
dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5).
iou_thr (float): IoU thresh for NMS.
Returns:
List[torch.Tensor | np.ndarray]: The outer list corresponds different
matched group, the inner Tensor corresponds the indices for a group
in score order.
"""
if dets.shape[0] == 0:
matched = []
else:
assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \
f'but get {dets.shape}'
if isinstance(dets, torch.Tensor):
dets_t = dets.detach().cpu()
else:
dets_t = torch.from_numpy(dets)
indata_list = [dets_t]
indata_dict = {'iou_threshold': float(iou_threshold)}
matched = ext_module.nms_match(*indata_list, **indata_dict)
if torch.__version__ == 'parrots':
matched = matched.tolist()
if isinstance(dets, torch.Tensor):
return [dets.new_tensor(m, dtype=torch.long) for m in matched]
else:
return [np.array(m, dtype=np.int) for m in matched]
def nms_rotated(dets, scores, iou_threshold, labels=None):
"""Performs non-maximum suppression (NMS) on the rotated boxes according to
their intersection-over-union (IoU).
Rotated NMS iteratively removes lower scoring rotated boxes which have an
IoU greater than iou_threshold with another (higher scoring) rotated box.
Args:
boxes (Tensor): Rotated boxes in shape (N, 5). They are expected to \
be in (x_ctr, y_ctr, width, height, angle_radian) format.
scores (Tensor): scores in shape (N, ).
iou_threshold (float): IoU thresh for NMS.
labels (Tensor): boxes' label in shape (N,).
Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \
same data type as the input.
"""
if dets.shape[0] == 0:
return dets, None
multi_label = labels is not None
if multi_label:
dets_wl = torch.cat((dets, labels.unsqueeze(1)), 1)
else:
dets_wl = dets
_, order = scores.sort(0, descending=True)
dets_sorted = dets_wl.index_select(0, order)
if torch.__version__ == 'parrots':
keep_inds = ext_module.nms_rotated(
dets_wl,
scores,
order,
dets_sorted,
iou_threshold=iou_threshold,
multi_label=multi_label)
else:
keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted,
iou_threshold, multi_label)
dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)),
dim=1)
return dets, keep_inds
# Copyright (c) OpenMMLab. All rights reserved.
import numpy as np
import torch
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['pixel_group'])
def pixel_group(score, mask, embedding, kernel_label, kernel_contour,
kernel_region_num, distance_threshold):
"""Group pixels into text instances, which is widely used text detection
methods.
Arguments:
score (np.array or Tensor): The foreground score with size hxw.
mask (np.array or Tensor): The foreground mask with size hxw.
embedding (np.array or Tensor): The embedding with size hxwxc to
distinguish instances.
kernel_label (np.array or Tensor): The instance kernel index with
size hxw.
kernel_contour (np.array or Tensor): The kernel contour with size hxw.
kernel_region_num (int): The instance kernel region number.
distance_threshold (float): The embedding distance threshold between
kernel and pixel in one instance.
Returns:
pixel_assignment (List[List[float]]): The instance coordinate list.
Each element consists of averaged confidence, pixel number, and
coordinates (x_i, y_i for all pixels) in order.
"""
assert isinstance(score, (torch.Tensor, np.ndarray))
assert isinstance(mask, (torch.Tensor, np.ndarray))
assert isinstance(embedding, (torch.Tensor, np.ndarray))
assert isinstance(kernel_label, (torch.Tensor, np.ndarray))
assert isinstance(kernel_contour, (torch.Tensor, np.ndarray))
assert isinstance(kernel_region_num, int)
assert isinstance(distance_threshold, float)
if isinstance(score, np.ndarray):
score = torch.from_numpy(score)
if isinstance(mask, np.ndarray):
mask = torch.from_numpy(mask)
if isinstance(embedding, np.ndarray):
embedding = torch.from_numpy(embedding)
if isinstance(kernel_label, np.ndarray):
kernel_label = torch.from_numpy(kernel_label)
if isinstance(kernel_contour, np.ndarray):
kernel_contour = torch.from_numpy(kernel_contour)
if torch.__version__ == 'parrots':
label = ext_module.pixel_group(
score,
mask,
embedding,
kernel_label,
kernel_contour,
kernel_region_num=kernel_region_num,
distance_threshold=distance_threshold)
label = label.tolist()
label = label[0]
list_index = kernel_region_num
pixel_assignment = []
for x in range(kernel_region_num):
pixel_assignment.append(
np.array(
label[list_index:list_index + int(label[x])],
dtype=np.float))
list_index = list_index + int(label[x])
else:
pixel_assignment = ext_module.pixel_group(score, mask, embedding,
kernel_label, kernel_contour,
kernel_region_num,
distance_threshold)
return pixel_assignment
# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa
from os import path as osp
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.utils import _pair
from torch.onnx.operators import shape_as_tensor
def bilinear_grid_sample(im, grid, align_corners=False):
"""Given an input and a flow-field grid, computes the output using input
values and pixel locations from grid. Supported only bilinear interpolation
method to sample the input pixels.
Args:
im (torch.Tensor): Input feature map, shape (N, C, H, W)
grid (torch.Tensor): Point coordinates, shape (N, Hg, Wg, 2)
align_corners {bool}: If set to True, the extrema (-1 and 1) are
considered as referring to the center points of the input’s
corner pixels. If set to False, they are instead considered as
referring to the corner points of the input’s corner pixels,
making the sampling more resolution agnostic.
Returns:
torch.Tensor: A tensor with sampled points, shape (N, C, Hg, Wg)
"""
n, c, h, w = im.shape
gn, gh, gw, _ = grid.shape
assert n == gn
x = grid[:, :, :, 0]
y = grid[:, :, :, 1]
if align_corners:
x = ((x + 1) / 2) * (w - 1)
y = ((y + 1) / 2) * (h - 1)
else:
x = ((x + 1) * w - 1) / 2
y = ((y + 1) * h - 1) / 2
x = x.view(n, -1)
y = y.view(n, -1)
x0 = torch.floor(x).long()
y0 = torch.floor(y).long()
x1 = x0 + 1
y1 = y0 + 1
wa = ((x1 - x) * (y1 - y)).unsqueeze(1)
wb = ((x1 - x) * (y - y0)).unsqueeze(1)
wc = ((x - x0) * (y1 - y)).unsqueeze(1)
wd = ((x - x0) * (y - y0)).unsqueeze(1)
# Apply default for grid_sample function zero padding
im_padded = F.pad(im, pad=[1, 1, 1, 1], mode='constant', value=0)
padded_h = h + 2
padded_w = w + 2
# save points positions after padding
x0, x1, y0, y1 = x0 + 1, x1 + 1, y0 + 1, y1 + 1
# Clip coordinates to padded image size
x0 = torch.where(x0 < 0, torch.tensor(0), x0)
x0 = torch.where(x0 > padded_w - 1, torch.tensor(padded_w - 1), x0)
x1 = torch.where(x1 < 0, torch.tensor(0), x1)
x1 = torch.where(x1 > padded_w - 1, torch.tensor(padded_w - 1), x1)
y0 = torch.where(y0 < 0, torch.tensor(0), y0)
y0 = torch.where(y0 > padded_h - 1, torch.tensor(padded_h - 1), y0)
y1 = torch.where(y1 < 0, torch.tensor(0), y1)
y1 = torch.where(y1 > padded_h - 1, torch.tensor(padded_h - 1), y1)
im_padded = im_padded.view(n, c, -1)
x0_y0 = (x0 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1)
x0_y1 = (x0 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1)
x1_y0 = (x1 + y0 * padded_w).unsqueeze(1).expand(-1, c, -1)
x1_y1 = (x1 + y1 * padded_w).unsqueeze(1).expand(-1, c, -1)
Ia = torch.gather(im_padded, 2, x0_y0)
Ib = torch.gather(im_padded, 2, x0_y1)
Ic = torch.gather(im_padded, 2, x1_y0)
Id = torch.gather(im_padded, 2, x1_y1)
return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw)
def is_in_onnx_export_without_custom_ops():
from annotator.uniformer.mmcv.ops import get_onnxruntime_op_path
ort_custom_op_path = get_onnxruntime_op_path()
return torch.onnx.is_in_onnx_export(
) and not osp.exists(ort_custom_op_path)
def normalize(grid):
"""Normalize input grid from [-1, 1] to [0, 1]
Args:
grid (Tensor): The grid to be normalize, range [-1, 1].
Returns:
Tensor: Normalized grid, range [0, 1].
"""
return (grid + 1.0) / 2.0
def denormalize(grid):
"""Denormalize input grid from range [0, 1] to [-1, 1]
Args:
grid (Tensor): The grid to be denormalize, range [0, 1].
Returns:
Tensor: Denormalized grid, range [-1, 1].
"""
return grid * 2.0 - 1.0
def generate_grid(num_grid, size, device):
"""Generate regular square grid of points in [0, 1] x [0, 1] coordinate
space.
Args:
num_grid (int): The number of grids to sample, one for each region.
size (tuple(int, int)): The side size of the regular grid.
device (torch.device): Desired device of returned tensor.
Returns:
(torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that
contains coordinates for the regular grids.
"""
affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device)
grid = F.affine_grid(
affine_trans, torch.Size((1, 1, *size)), align_corners=False)
grid = normalize(grid)
return grid.view(1, -1, 2).expand(num_grid, -1, -1)
def rel_roi_point_to_abs_img_point(rois, rel_roi_points):
"""Convert roi based relative point coordinates to image based absolute
point coordinates.
Args:
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
rel_roi_points (Tensor): Point coordinates inside RoI, relative to
RoI, location, range (0, 1), shape (N, P, 2)
Returns:
Tensor: Image based absolute point coordinates, shape (N, P, 2)
"""
with torch.no_grad():
assert rel_roi_points.size(0) == rois.size(0)
assert rois.dim() == 2
assert rel_roi_points.dim() == 3
assert rel_roi_points.size(2) == 2
# remove batch idx
if rois.size(1) == 5:
rois = rois[:, 1:]
abs_img_points = rel_roi_points.clone()
# To avoid an error during exporting to onnx use independent
# variables instead inplace computation
xs = abs_img_points[:, :, 0] * (rois[:, None, 2] - rois[:, None, 0])
ys = abs_img_points[:, :, 1] * (rois[:, None, 3] - rois[:, None, 1])
xs += rois[:, None, 0]
ys += rois[:, None, 1]
abs_img_points = torch.stack([xs, ys], dim=2)
return abs_img_points
def get_shape_from_feature_map(x):
"""Get spatial resolution of input feature map considering exporting to
onnx mode.
Args:
x (torch.Tensor): Input tensor, shape (N, C, H, W)
Returns:
torch.Tensor: Spatial resolution (width, height), shape (1, 1, 2)
"""
if torch.onnx.is_in_onnx_export():
img_shape = shape_as_tensor(x)[2:].flip(0).view(1, 1, 2).to(
x.device).float()
else:
img_shape = torch.tensor(x.shape[2:]).flip(0).view(1, 1, 2).to(
x.device).float()
return img_shape
def abs_img_point_to_rel_img_point(abs_img_points, img, spatial_scale=1.):
"""Convert image based absolute point coordinates to image based relative
coordinates for sampling.
Args:
abs_img_points (Tensor): Image based absolute point coordinates,
shape (N, P, 2)
img (tuple/Tensor): (height, width) of image or feature map.
spatial_scale (float): Scale points by this factor. Default: 1.
Returns:
Tensor: Image based relative point coordinates for sampling,
shape (N, P, 2)
"""
assert (isinstance(img, tuple) and len(img) == 2) or \
(isinstance(img, torch.Tensor) and len(img.shape) == 4)
if isinstance(img, tuple):
h, w = img
scale = torch.tensor([w, h],
dtype=torch.float,
device=abs_img_points.device)
scale = scale.view(1, 1, 2)
else:
scale = get_shape_from_feature_map(img)
return abs_img_points / scale * spatial_scale
def rel_roi_point_to_rel_img_point(rois,
rel_roi_points,
img,
spatial_scale=1.):
"""Convert roi based relative point coordinates to image based absolute
point coordinates.
Args:
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
rel_roi_points (Tensor): Point coordinates inside RoI, relative to
RoI, location, range (0, 1), shape (N, P, 2)
img (tuple/Tensor): (height, width) of image or feature map.
spatial_scale (float): Scale points by this factor. Default: 1.
Returns:
Tensor: Image based relative point coordinates for sampling,
shape (N, P, 2)
"""
abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points)
rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img,
spatial_scale)
return rel_img_point
def point_sample(input, points, align_corners=False, **kwargs):
"""A wrapper around :func:`grid_sample` to support 3D point_coords tensors
Unlike :func:`torch.nn.functional.grid_sample` it assumes point_coords to
lie inside ``[0, 1] x [0, 1]`` square.
Args:
input (Tensor): Feature map, shape (N, C, H, W).
points (Tensor): Image based absolute point coordinates (normalized),
range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2).
align_corners (bool): Whether align_corners. Default: False
Returns:
Tensor: Features of `point` on `input`, shape (N, C, P) or
(N, C, Hgrid, Wgrid).
"""
add_dim = False
if points.dim() == 3:
add_dim = True
points = points.unsqueeze(2)
if is_in_onnx_export_without_custom_ops():
# If custom ops for onnx runtime not compiled use python
# implementation of grid_sample function to make onnx graph
# with supported nodes
output = bilinear_grid_sample(
input, denormalize(points), align_corners=align_corners)
else:
output = F.grid_sample(
input, denormalize(points), align_corners=align_corners, **kwargs)
if add_dim:
output = output.squeeze(3)
return output
class SimpleRoIAlign(nn.Module):
def __init__(self, output_size, spatial_scale, aligned=True):
"""Simple RoI align in PointRend, faster than standard RoIAlign.
Args:
output_size (tuple[int]): h, w
spatial_scale (float): scale the input boxes by this number
aligned (bool): if False, use the legacy implementation in
MMDetection, align_corners=True will be used in F.grid_sample.
If True, align the results more perfectly.
"""
super(SimpleRoIAlign, self).__init__()
self.output_size = _pair(output_size)
self.spatial_scale = float(spatial_scale)
# to be consistent with other RoI ops
self.use_torchvision = False
self.aligned = aligned
def forward(self, features, rois):
num_imgs = features.size(0)
num_rois = rois.size(0)
rel_roi_points = generate_grid(
num_rois, self.output_size, device=rois.device)
if torch.onnx.is_in_onnx_export():
rel_img_points = rel_roi_point_to_rel_img_point(
rois, rel_roi_points, features, self.spatial_scale)
rel_img_points = rel_img_points.reshape(num_imgs, -1,
*rel_img_points.shape[1:])
point_feats = point_sample(
features, rel_img_points, align_corners=not self.aligned)
point_feats = point_feats.transpose(1, 2)
else:
point_feats = []
for batch_ind in range(num_imgs):
# unravel batch dim
feat = features[batch_ind].unsqueeze(0)
inds = (rois[:, 0].long() == batch_ind)
if inds.any():
rel_img_points = rel_roi_point_to_rel_img_point(
rois[inds], rel_roi_points[inds], feat,
self.spatial_scale).unsqueeze(0)
point_feat = point_sample(
feat, rel_img_points, align_corners=not self.aligned)
point_feat = point_feat.squeeze(0).transpose(0, 1)
point_feats.append(point_feat)
point_feats = torch.cat(point_feats, dim=0)
channels = features.size(1)
roi_feats = point_feats.reshape(num_rois, channels, *self.output_size)
return roi_feats
def __repr__(self):
format_str = self.__class__.__name__
format_str += '(output_size={}, spatial_scale={}'.format(
self.output_size, self.spatial_scale)
return format_str
import torch
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', [
'points_in_boxes_part_forward', 'points_in_boxes_cpu_forward',
'points_in_boxes_all_forward'
])
def points_in_boxes_part(points, boxes):
"""Find the box in which each point is (CUDA).
Args:
points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate
boxes (torch.Tensor): [B, T, 7],
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz] in
LiDAR/DEPTH coordinate, (x, y, z) is the bottom center
Returns:
box_idxs_of_pts (torch.Tensor): (B, M), default background = -1
"""
assert points.shape[0] == boxes.shape[0], \
'Points and boxes should have the same batch size, ' \
f'but got {points.shape[0]} and {boxes.shape[0]}'
assert boxes.shape[2] == 7, \
'boxes dimension should be 7, ' \
f'but got unexpected shape {boxes.shape[2]}'
assert points.shape[2] == 3, \
'points dimension should be 3, ' \
f'but got unexpected shape {points.shape[2]}'
batch_size, num_points, _ = points.shape
box_idxs_of_pts = points.new_zeros((batch_size, num_points),
dtype=torch.int).fill_(-1)
# If manually put the tensor 'points' or 'boxes' on a device
# which is not the current device, some temporary variables
# will be created on the current device in the cuda op,
# and the output will be incorrect.
# Therefore, we force the current device to be the same
# as the device of the tensors if it was not.
# Please refer to https://github.com/open-mmlab/mmdetection3d/issues/305
# for the incorrect output before the fix.
points_device = points.get_device()
assert points_device == boxes.get_device(), \
'Points and boxes should be put on the same device'
if torch.cuda.current_device() != points_device:
torch.cuda.set_device(points_device)
ext_module.points_in_boxes_part_forward(boxes.contiguous(),
points.contiguous(),
box_idxs_of_pts)
return box_idxs_of_pts
def points_in_boxes_cpu(points, boxes):
"""Find all boxes in which each point is (CPU). The CPU version of
:meth:`points_in_boxes_all`.
Args:
points (torch.Tensor): [B, M, 3], [x, y, z] in
LiDAR/DEPTH coordinate
boxes (torch.Tensor): [B, T, 7],
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],
(x, y, z) is the bottom center.
Returns:
box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0.
"""
assert points.shape[0] == boxes.shape[0], \
'Points and boxes should have the same batch size, ' \
f'but got {points.shape[0]} and {boxes.shape[0]}'
assert boxes.shape[2] == 7, \
'boxes dimension should be 7, ' \
f'but got unexpected shape {boxes.shape[2]}'
assert points.shape[2] == 3, \
'points dimension should be 3, ' \
f'but got unexpected shape {points.shape[2]}'
batch_size, num_points, _ = points.shape
num_boxes = boxes.shape[1]
point_indices = points.new_zeros((batch_size, num_boxes, num_points),
dtype=torch.int)
for b in range(batch_size):
ext_module.points_in_boxes_cpu_forward(boxes[b].float().contiguous(),
points[b].float().contiguous(),
point_indices[b])
point_indices = point_indices.transpose(1, 2)
return point_indices
def points_in_boxes_all(points, boxes):
"""Find all boxes in which each point is (CUDA).
Args:
points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR/DEPTH coordinate
boxes (torch.Tensor): [B, T, 7],
num_valid_boxes <= T, [x, y, z, x_size, y_size, z_size, rz],
(x, y, z) is the bottom center.
Returns:
box_idxs_of_pts (torch.Tensor): (B, M, T), default background = 0.
"""
assert boxes.shape[0] == points.shape[0], \
'Points and boxes should have the same batch size, ' \
f'but got {boxes.shape[0]} and {boxes.shape[0]}'
assert boxes.shape[2] == 7, \
'boxes dimension should be 7, ' \
f'but got unexpected shape {boxes.shape[2]}'
assert points.shape[2] == 3, \
'points dimension should be 3, ' \
f'but got unexpected shape {points.shape[2]}'
batch_size, num_points, _ = points.shape
num_boxes = boxes.shape[1]
box_idxs_of_pts = points.new_zeros((batch_size, num_points, num_boxes),
dtype=torch.int).fill_(0)
# Same reason as line 25-32
points_device = points.get_device()
assert points_device == boxes.get_device(), \
'Points and boxes should be put on the same device'
if torch.cuda.current_device() != points_device:
torch.cuda.set_device(points_device)
ext_module.points_in_boxes_all_forward(boxes.contiguous(),
points.contiguous(),
box_idxs_of_pts)
return box_idxs_of_pts
from typing import List
import torch
from torch import nn as nn
from annotator.uniformer.mmcv.runner import force_fp32
from .furthest_point_sample import (furthest_point_sample,
furthest_point_sample_with_dist)
def calc_square_dist(point_feat_a, point_feat_b, norm=True):
"""Calculating square distance between a and b.
Args:
point_feat_a (Tensor): (B, N, C) Feature vector of each point.
point_feat_b (Tensor): (B, M, C) Feature vector of each point.
norm (Bool, optional): Whether to normalize the distance.
Default: True.
Returns:
Tensor: (B, N, M) Distance between each pair points.
"""
num_channel = point_feat_a.shape[-1]
# [bs, n, 1]
a_square = torch.sum(point_feat_a.unsqueeze(dim=2).pow(2), dim=-1)
# [bs, 1, m]
b_square = torch.sum(point_feat_b.unsqueeze(dim=1).pow(2), dim=-1)
corr_matrix = torch.matmul(point_feat_a, point_feat_b.transpose(1, 2))
dist = a_square + b_square - 2 * corr_matrix
if norm:
dist = torch.sqrt(dist) / num_channel
return dist
def get_sampler_cls(sampler_type):
"""Get the type and mode of points sampler.
Args:
sampler_type (str): The type of points sampler.
The valid value are "D-FPS", "F-FPS", or "FS".
Returns:
class: Points sampler type.
"""
sampler_mappings = {
'D-FPS': DFPSSampler,
'F-FPS': FFPSSampler,
'FS': FSSampler,
}
try:
return sampler_mappings[sampler_type]
except KeyError:
raise KeyError(
f'Supported `sampler_type` are {sampler_mappings.keys()}, but got \
{sampler_type}')
class PointsSampler(nn.Module):
"""Points sampling.
Args:
num_point (list[int]): Number of sample points.
fps_mod_list (list[str], optional): Type of FPS method, valid mod
['F-FPS', 'D-FPS', 'FS'], Default: ['D-FPS'].
F-FPS: using feature distances for FPS.
D-FPS: using Euclidean distances of points for FPS.
FS: using F-FPS and D-FPS simultaneously.
fps_sample_range_list (list[int], optional):
Range of points to apply FPS. Default: [-1].
"""
def __init__(self,
num_point: List[int],
fps_mod_list: List[str] = ['D-FPS'],
fps_sample_range_list: List[int] = [-1]):
super().__init__()
# FPS would be applied to different fps_mod in the list,
# so the length of the num_point should be equal to
# fps_mod_list and fps_sample_range_list.
assert len(num_point) == len(fps_mod_list) == len(
fps_sample_range_list)
self.num_point = num_point
self.fps_sample_range_list = fps_sample_range_list
self.samplers = nn.ModuleList()
for fps_mod in fps_mod_list:
self.samplers.append(get_sampler_cls(fps_mod)())
self.fp16_enabled = False
@force_fp32()
def forward(self, points_xyz, features):
"""
Args:
points_xyz (Tensor): (B, N, 3) xyz coordinates of the features.
features (Tensor): (B, C, N) Descriptors of the features.
Returns:
Tensor: (B, npoint, sample_num) Indices of sampled points.
"""
indices = []
last_fps_end_index = 0
for fps_sample_range, sampler, npoint in zip(
self.fps_sample_range_list, self.samplers, self.num_point):
assert fps_sample_range < points_xyz.shape[1]
if fps_sample_range == -1:
sample_points_xyz = points_xyz[:, last_fps_end_index:]
if features is not None:
sample_features = features[:, :, last_fps_end_index:]
else:
sample_features = None
else:
sample_points_xyz = \
points_xyz[:, last_fps_end_index:fps_sample_range]
if features is not None:
sample_features = features[:, :, last_fps_end_index:
fps_sample_range]
else:
sample_features = None
fps_idx = sampler(sample_points_xyz.contiguous(), sample_features,
npoint)
indices.append(fps_idx + last_fps_end_index)
last_fps_end_index += fps_sample_range
indices = torch.cat(indices, dim=1)
return indices
class DFPSSampler(nn.Module):
"""Using Euclidean distances of points for FPS."""
def __init__(self):
super().__init__()
def forward(self, points, features, npoint):
"""Sampling points with D-FPS."""
fps_idx = furthest_point_sample(points.contiguous(), npoint)
return fps_idx
class FFPSSampler(nn.Module):
"""Using feature distances for FPS."""
def __init__(self):
super().__init__()
def forward(self, points, features, npoint):
"""Sampling points with F-FPS."""
assert features is not None, \
'feature input to FFPS_Sampler should not be None'
features_for_fps = torch.cat([points, features.transpose(1, 2)], dim=2)
features_dist = calc_square_dist(
features_for_fps, features_for_fps, norm=False)
fps_idx = furthest_point_sample_with_dist(features_dist, npoint)
return fps_idx
class FSSampler(nn.Module):
"""Using F-FPS and D-FPS simultaneously."""
def __init__(self):
super().__init__()
def forward(self, points, features, npoint):
"""Sampling points with FS_Sampling."""
assert features is not None, \
'feature input to FS_Sampler should not be None'
ffps_sampler = FFPSSampler()
dfps_sampler = DFPSSampler()
fps_idx_ffps = ffps_sampler(points, features, npoint)
fps_idx_dfps = dfps_sampler(points, features, npoint)
fps_idx = torch.cat([fps_idx_ffps, fps_idx_dfps], dim=1)
return fps_idx
# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa
from torch import nn
from torch.autograd import Function
from torch.nn.modules.utils import _pair
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext',
['psamask_forward', 'psamask_backward'])
class PSAMaskFunction(Function):
@staticmethod
def symbolic(g, input, psa_type, mask_size):
return g.op(
'mmcv::MMCVPSAMask',
input,
psa_type_i=psa_type,
mask_size_i=mask_size)
@staticmethod
def forward(ctx, input, psa_type, mask_size):
ctx.psa_type = psa_type
ctx.mask_size = _pair(mask_size)
ctx.save_for_backward(input)
h_mask, w_mask = ctx.mask_size
batch_size, channels, h_feature, w_feature = input.size()
assert channels == h_mask * w_mask
output = input.new_zeros(
(batch_size, h_feature * w_feature, h_feature, w_feature))
ext_module.psamask_forward(
input,
output,
psa_type=psa_type,
num_=batch_size,
h_feature=h_feature,
w_feature=w_feature,
h_mask=h_mask,
w_mask=w_mask,
half_h_mask=(h_mask - 1) // 2,
half_w_mask=(w_mask - 1) // 2)
return output
@staticmethod
def backward(ctx, grad_output):
input = ctx.saved_tensors[0]
psa_type = ctx.psa_type
h_mask, w_mask = ctx.mask_size
batch_size, channels, h_feature, w_feature = input.size()
grad_input = grad_output.new_zeros(
(batch_size, channels, h_feature, w_feature))
ext_module.psamask_backward(
grad_output,
grad_input,
psa_type=psa_type,
num_=batch_size,
h_feature=h_feature,
w_feature=w_feature,
h_mask=h_mask,
w_mask=w_mask,
half_h_mask=(h_mask - 1) // 2,
half_w_mask=(w_mask - 1) // 2)
return grad_input, None, None, None
psa_mask = PSAMaskFunction.apply
class PSAMask(nn.Module):
def __init__(self, psa_type, mask_size=None):
super(PSAMask, self).__init__()
assert psa_type in ['collect', 'distribute']
if psa_type == 'collect':
psa_type_enum = 0
else:
psa_type_enum = 1
self.psa_type_enum = psa_type_enum
self.mask_size = mask_size
self.psa_type = psa_type
def forward(self, input):
return psa_mask(input, self.psa_type_enum, self.mask_size)
def __repr__(self):
s = self.__class__.__name__
s += f'(psa_type={self.psa_type}, '
s += f'mask_size={self.mask_size})'
return s
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair
from ..utils import deprecated_api_warning, ext_loader
ext_module = ext_loader.load_ext('_ext',
['roi_align_forward', 'roi_align_backward'])
class RoIAlignFunction(Function):
@staticmethod
def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio,
pool_mode, aligned):
from ..onnx import is_custom_op_loaded
has_custom_op = is_custom_op_loaded()
if has_custom_op:
return g.op(
'mmcv::MMCVRoiAlign',
input,
rois,
output_height_i=output_size[0],
output_width_i=output_size[1],
spatial_scale_f=spatial_scale,
sampling_ratio_i=sampling_ratio,
mode_s=pool_mode,
aligned_i=aligned)
else:
from torch.onnx.symbolic_opset9 import sub, squeeze
from torch.onnx.symbolic_helper import _slice_helper
from torch.onnx import TensorProtoDataType
# batch_indices = rois[:, 0].long()
batch_indices = _slice_helper(
g, rois, axes=[1], starts=[0], ends=[1])
batch_indices = squeeze(g, batch_indices, 1)
batch_indices = g.op(
'Cast', batch_indices, to_i=TensorProtoDataType.INT64)
# rois = rois[:, 1:]
rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5])
if aligned:
# rois -= 0.5/spatial_scale
aligned_offset = g.op(
'Constant',
value_t=torch.tensor([0.5 / spatial_scale],
dtype=torch.float32))
rois = sub(g, rois, aligned_offset)
# roi align
return g.op(
'RoiAlign',
input,
rois,
batch_indices,
output_height_i=output_size[0],
output_width_i=output_size[1],
spatial_scale_f=spatial_scale,
sampling_ratio_i=max(0, sampling_ratio),
mode_s=pool_mode)
@staticmethod
def forward(ctx,
input,
rois,
output_size,
spatial_scale=1.0,
sampling_ratio=0,
pool_mode='avg',
aligned=True):
ctx.output_size = _pair(output_size)
ctx.spatial_scale = spatial_scale
ctx.sampling_ratio = sampling_ratio
assert pool_mode in ('max', 'avg')
ctx.pool_mode = 0 if pool_mode == 'max' else 1
ctx.aligned = aligned
ctx.input_shape = input.size()
assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
ctx.output_size[1])
output = input.new_zeros(output_shape)
if ctx.pool_mode == 0:
argmax_y = input.new_zeros(output_shape)
argmax_x = input.new_zeros(output_shape)
else:
argmax_y = input.new_zeros(0)
argmax_x = input.new_zeros(0)
ext_module.roi_align_forward(
input,
rois,
output,
argmax_y,
argmax_x,
aligned_height=ctx.output_size[0],
aligned_width=ctx.output_size[1],
spatial_scale=ctx.spatial_scale,
sampling_ratio=ctx.sampling_ratio,
pool_mode=ctx.pool_mode,
aligned=ctx.aligned)
ctx.save_for_backward(rois, argmax_y, argmax_x)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
rois, argmax_y, argmax_x = ctx.saved_tensors
grad_input = grad_output.new_zeros(ctx.input_shape)
# complex head architecture may cause grad_output uncontiguous.
grad_output = grad_output.contiguous()
ext_module.roi_align_backward(
grad_output,
rois,
argmax_y,
argmax_x,
grad_input,
aligned_height=ctx.output_size[0],
aligned_width=ctx.output_size[1],
spatial_scale=ctx.spatial_scale,
sampling_ratio=ctx.sampling_ratio,
pool_mode=ctx.pool_mode,
aligned=ctx.aligned)
return grad_input, None, None, None, None, None, None
roi_align = RoIAlignFunction.apply
class RoIAlign(nn.Module):
"""RoI align pooling layer.
Args:
output_size (tuple): h, w
spatial_scale (float): scale the input boxes by this number
sampling_ratio (int): number of inputs samples to take for each
output sample. 0 to take samples densely for current models.
pool_mode (str, 'avg' or 'max'): pooling mode in each bin.
aligned (bool): if False, use the legacy implementation in
MMDetection. If True, align the results more perfectly.
use_torchvision (bool): whether to use roi_align from torchvision.
Note:
The implementation of RoIAlign when aligned=True is modified from
https://github.com/facebookresearch/detectron2/
The meaning of aligned=True:
Given a continuous coordinate c, its two neighboring pixel
indices (in our pixel model) are computed by floor(c - 0.5) and
ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete
indices [0] and [1] (which are sampled from the underlying signal
at continuous coordinates 0.5 and 1.5). But the original roi_align
(aligned=False) does not subtract the 0.5 when computing
neighboring pixel indices and therefore it uses pixels with a
slightly incorrect alignment (relative to our pixel model) when
performing bilinear interpolation.
With `aligned=True`,
we first appropriately scale the ROI and then shift it by -0.5
prior to calling roi_align. This produces the correct neighbors;
The difference does not make a difference to the model's
performance if ROIAlign is used together with conv layers.
"""
@deprecated_api_warning(
{
'out_size': 'output_size',
'sample_num': 'sampling_ratio'
},
cls_name='RoIAlign')
def __init__(self,
output_size,
spatial_scale=1.0,
sampling_ratio=0,
pool_mode='avg',
aligned=True,
use_torchvision=False):
super(RoIAlign, self).__init__()
self.output_size = _pair(output_size)
self.spatial_scale = float(spatial_scale)
self.sampling_ratio = int(sampling_ratio)
self.pool_mode = pool_mode
self.aligned = aligned
self.use_torchvision = use_torchvision
def forward(self, input, rois):
"""
Args:
input: NCHW images
rois: Bx5 boxes. First column is the index into N.\
The other 4 columns are xyxy.
"""
if self.use_torchvision:
from torchvision.ops import roi_align as tv_roi_align
if 'aligned' in tv_roi_align.__code__.co_varnames:
return tv_roi_align(input, rois, self.output_size,
self.spatial_scale, self.sampling_ratio,
self.aligned)
else:
if self.aligned:
rois -= rois.new_tensor([0.] +
[0.5 / self.spatial_scale] * 4)
return tv_roi_align(input, rois, self.output_size,
self.spatial_scale, self.sampling_ratio)
else:
return roi_align(input, rois, self.output_size, self.spatial_scale,
self.sampling_ratio, self.pool_mode, self.aligned)
def __repr__(self):
s = self.__class__.__name__
s += f'(output_size={self.output_size}, '
s += f'spatial_scale={self.spatial_scale}, '
s += f'sampling_ratio={self.sampling_ratio}, '
s += f'pool_mode={self.pool_mode}, '
s += f'aligned={self.aligned}, '
s += f'use_torchvision={self.use_torchvision})'
return s
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment