Unverified Commit c0f5492e authored by zhuyuanhao's avatar zhuyuanhao Committed by GitHub
Browse files

add ext ops, support parrots (#310)



* add ext ops, support parrots

* fix lint

* fix lint

* update op from mmdetection

* support non-pytorch env

* fix import bug

* test not import mmcv.op

* rename mmcv.op to mmcv.ops

* fix compile warning

* 1. fix syncbn warning in pytorch 1.5
2. support only cpu compile
3. add point_sample from mmdet

* fix text bug

* update docstrings

* fix line endings

* minor updates

* remove non_local from ops

* bug fix for nonlocal2d

* rename ops_ext to _ext and _ext to _flow_warp_ext

* update the doc

* try clang-format github action

* fix github action

* add ops to api.rst

* fix cpp format

* fix clang format issues

* remove .clang-format
Co-authored-by: default avatarKai Chen <chenkaidev@gmail.com>
parent a7bf7701
#ifndef SYNC_BN_KERNEL_CUH
#define SYNC_BN_KERNEL_CUH
template <typename T>
__global__ void sync_bn_forward_mean_cuda_kernel(const T *input, float *mean,
int num, int channels,
int spatial) {
__shared__ float buffer[THREADS_PER_BLOCK];
int tid = threadIdx.x;
int c = blockIdx.x;
buffer[tid] = 0;
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
buffer[tid] += input[index];
}
__syncthreads();
for (int s = blockDim.x / 2; s > 0; s >>= 1) {
if (tid < s) {
buffer[tid] += buffer[tid + s];
}
__syncthreads();
}
int total = num * spatial;
if (tid == 0) {
mean[c] = buffer[0] / total;
}
}
template <>
__global__ void sync_bn_forward_mean_cuda_kernel(const phalf *input,
float *mean, int num,
int channels, int spatial) {
__shared__ float buffer[THREADS_PER_BLOCK];
int tid = threadIdx.x;
int c = blockIdx.x;
buffer[tid] = 0;
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
buffer[tid] += static_cast<float>(input[index]);
}
__syncthreads();
for (int s = blockDim.x / 2; s > 0; s >>= 1) {
if (tid < s) {
buffer[tid] += buffer[tid + s];
}
__syncthreads();
}
int total = num * spatial;
if (tid == 0) {
mean[c] = buffer[0] / total;
}
}
template <typename T>
__global__ void sync_bn_forward_var_cuda_kernel(const T *input,
const float *mean, float *var,
int num, int channels,
int spatial) {
__shared__ float buffer[THREADS_PER_BLOCK];
int tid = threadIdx.x;
int c = blockIdx.x;
buffer[tid] = 0;
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
float td = input[index] - mean[c];
buffer[tid] += td * td;
}
__syncthreads();
for (int s = blockDim.x / 2; s > 0; s >>= 1) {
if (tid < s) {
buffer[tid] += buffer[tid + s];
}
__syncthreads();
}
int total = num * spatial;
if (tid == 0) {
var[c] = buffer[0] / total;
}
}
template <>
__global__ void sync_bn_forward_var_cuda_kernel(const phalf *input,
const float *mean, float *var,
int num, int channels,
int spatial) {
__shared__ float buffer[THREADS_PER_BLOCK];
int tid = threadIdx.x;
int c = blockIdx.x;
buffer[tid] = 0;
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
float td = static_cast<float>(input[index]) - mean[c];
buffer[tid] += td * td;
}
__syncthreads();
for (int s = blockDim.x / 2; s > 0; s >>= 1) {
if (tid < s) {
buffer[tid] += buffer[tid + s];
}
__syncthreads();
}
int total = num * spatial;
if (tid == 0) {
var[c] = buffer[0] / total;
}
}
template <typename T>
__global__ void sync_bn_forward_output_cuda_kernel(
const T *input, const float *mean, const float *var, float *running_mean,
float *running_var, const float *weight, const float *bias, float *norm,
float *std, T *output, int num, int channels, int spatial, float eps,
float momentum, int group_size) {
int tid = threadIdx.x;
int c = blockIdx.x;
float mean_value = mean[c];
float std_value = sqrt(var[c] + eps);
if (weight != nullptr) {
float weight_value = weight[c];
float bias_value = bias[c];
if (norm != nullptr) {
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index =
(i / spatial) * channels * spatial + c * spatial + i % spatial;
norm[index] = (input[index] - mean_value) / std_value;
output[index] = norm[index] * weight_value + bias_value;
}
} else {
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index =
(i / spatial) * channels * spatial + c * spatial + i % spatial;
output[index] =
(input[index] - mean_value) / std_value * weight_value + bias_value;
}
}
} else {
if (norm != nullptr) {
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index =
(i / spatial) * channels * spatial + c * spatial + i % spatial;
output[index] = norm[index] = (input[index] - mean_value) / std_value;
}
} else {
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index =
(i / spatial) * channels * spatial + c * spatial + i % spatial;
output[index] = (input[index] - mean_value) / std_value;
}
}
}
if (tid == 0) {
if (std != nullptr) std[c] = std_value;
if (running_mean != nullptr) {
running_mean[c] =
momentum * mean_value + (1 - momentum) * running_mean[c];
int count = num * spatial * group_size;
float var_unbias = count > 1 ? var[c] * count / (count - 1) : var[c];
running_var[c] = momentum * var_unbias + (1 - momentum) * running_var[c];
}
}
}
template <>
__global__ void sync_bn_forward_output_cuda_kernel(
const phalf *input, const float *mean, const float *var,
float *running_mean, float *running_var, const float *weight,
const float *bias, float *norm, float *std, phalf *output, int num,
int channels, int spatial, float eps, float momentum, int group_size) {
int tid = threadIdx.x;
int c = blockIdx.x;
float mean_value = mean[c];
float std_value = sqrt(var[c] + eps);
if (weight != nullptr) {
float weight_value = weight[c];
float bias_value = bias[c];
if (norm != nullptr) {
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index =
(i / spatial) * channels * spatial + c * spatial + i % spatial;
norm[index] =
(static_cast<float>(input[index]) - mean_value) / std_value;
output[index] =
static_cast<phalf>(norm[index] * weight_value + bias_value);
}
} else {
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index =
(i / spatial) * channels * spatial + c * spatial + i % spatial;
output[index] =
static_cast<phalf>((static_cast<float>(input[index]) - mean_value) /
std_value * weight_value +
bias_value);
}
}
} else {
if (norm != nullptr) {
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index =
(i / spatial) * channels * spatial + c * spatial + i % spatial;
norm[index] =
(static_cast<float>(input[index]) - mean_value) / std_value;
output[index] = static_cast<phalf>(norm[index]);
}
} else {
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index =
(i / spatial) * channels * spatial + c * spatial + i % spatial;
output[index] = static_cast<phalf>(
(static_cast<float>(input[index]) - mean_value) / std_value);
}
}
}
if (tid == 0) {
if (std != nullptr) std[c] = std_value;
if (running_mean != nullptr) {
running_mean[c] =
momentum * mean_value + (1 - momentum) * running_mean[c];
int count = num * spatial * group_size;
float var_unbias = count > 1 ? var[c] * count / (count - 1) : var[c];
running_var[c] = momentum * var_unbias + (1 - momentum) * running_var[c];
}
}
}
template <typename T>
__global__ void sync_bn_backward_param_cuda_kernel(const T *grad_output,
const float *norm,
float *grad_weight,
float *grad_bias, int num,
int channels, int spatial) {
__shared__ float buffer1[THREADS_PER_BLOCK];
__shared__ float buffer2[THREADS_PER_BLOCK];
int tid = threadIdx.x;
int c = blockIdx.x;
buffer1[tid] = buffer2[tid] = 0;
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
buffer1[tid] += grad_output[index] * norm[index];
buffer2[tid] += grad_output[index];
}
__syncthreads();
for (int s = blockDim.x / 2; s > 0; s >>= 1) {
if (tid < s) {
buffer1[tid] += buffer1[tid + s];
buffer2[tid] += buffer2[tid + s];
}
__syncthreads();
}
if (tid == 0) {
grad_weight[c] = buffer1[0];
grad_bias[c] = buffer2[0];
}
}
template <>
__global__ void sync_bn_backward_param_cuda_kernel(const phalf *grad_output,
const float *norm,
float *grad_weight,
float *grad_bias, int num,
int channels, int spatial) {
__shared__ float buffer1[THREADS_PER_BLOCK];
__shared__ float buffer2[THREADS_PER_BLOCK];
int tid = threadIdx.x;
int c = blockIdx.x;
buffer1[tid] = buffer2[tid] = 0;
for (int i = tid; i < num * spatial; i += blockDim.x) {
int index = (i / spatial) * channels * spatial + c * spatial + i % spatial;
buffer1[tid] += static_cast<float>(grad_output[index]) * norm[index];
buffer2[tid] += static_cast<float>(grad_output[index]);
}
__syncthreads();
for (int s = blockDim.x / 2; s > 0; s >>= 1) {
if (tid < s) {
buffer1[tid] += buffer1[tid + s];
buffer2[tid] += buffer2[tid + s];
}
__syncthreads();
}
if (tid == 0) {
grad_weight[c] = buffer1[0];
grad_bias[c] = buffer2[0];
}
}
template <typename T>
__global__ void sync_bn_backward_data_cuda_kernel(
int output_size, const T *grad_output, const float *weight,
const float *grad_weight, const float *grad_bias, const float *norm,
const float *std, T *grad_input, int num, int channels, int spatial) {
int factor = num * spatial;
CUDA_1D_KERNEL_LOOP(index, output_size) {
int c = (index / spatial) % channels;
grad_input[index] =
weight[c] *
(grad_output[index] -
(grad_weight[c] * norm[index] + grad_bias[c]) / factor) /
std[c];
}
}
template <>
__global__ void sync_bn_backward_data_cuda_kernel(
int output_size, const phalf *grad_output, const float *weight,
const float *grad_weight, const float *grad_bias, const float *norm,
const float *std, phalf *grad_input, int num, int channels, int spatial) {
int factor = num * spatial;
CUDA_1D_KERNEL_LOOP(index, output_size) {
int c = (index / spatial) % channels;
grad_input[index] = static_cast<phalf>(
weight[c] *
(static_cast<float>(grad_output[index]) -
(grad_weight[c] * norm[index] + grad_bias[c]) / factor) /
std[c]);
}
}
#endif // SYNC_BN_KERNEL_CUH
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair, _single
from ..cnn import CONV_LAYERS
from ..utils import ext_loader, print_log
ext_module = ext_loader.load_ext('_ext', [
'deform_conv_forward', 'deform_conv_backward_input',
'deform_conv_backward_parameters'
])
class DeformConv2dFunction(Function):
@staticmethod
def symbolic(g, input, offset, weight, stride, padding, dilation, groups,
deform_groups, bias, im2col_step):
return g.op(
'MMCVDeformConv2d',
input,
offset,
weight,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
deform_groups=deform_groups,
bias=bias,
im2col_step=im2col_step)
@staticmethod
def forward(ctx,
input,
offset,
weight,
stride=1,
padding=0,
dilation=1,
groups=1,
deform_groups=1,
bias=False,
im2col_step=32):
if input is not None and input.dim() != 4:
raise ValueError(
f'Expected 4D tensor as input, got {input.dim()}D tensor \
instead.')
assert bias is False, 'Only support bias is False.'
ctx.stride = _pair(stride)
ctx.padding = _pair(padding)
ctx.dilation = _pair(dilation)
ctx.groups = groups
ctx.deform_groups = deform_groups
ctx.im2col_step = im2col_step
ctx.save_for_backward(input, offset, weight)
output = input.new_empty(
DeformConv2dFunction._output_size(ctx, input, weight))
ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones
cur_im2col_step = min(ctx.im2col_step, input.size(0))
assert (input.size(0) %
cur_im2col_step) == 0, 'im2col step must divide batchsize'
ext_module.deform_conv_forward(
input,
weight,
offset,
output,
ctx.bufs_[0],
ctx.bufs_[1],
kW=weight.size(3),
kH=weight.size(2),
dW=ctx.stride[1],
dH=ctx.stride[0],
padW=ctx.padding[1],
padH=ctx.padding[0],
dilationW=ctx.dilation[1],
dilationH=ctx.dilation[0],
group=ctx.groups,
deformable_group=ctx.deform_groups,
im2col_step=cur_im2col_step)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, offset, weight = ctx.saved_tensors
grad_input = grad_offset = grad_weight = None
cur_im2col_step = min(ctx.im2col_step, input.size(0))
assert (input.size(0) %
cur_im2col_step) == 0, 'im2col step must divide batchsize'
grad_output = grad_output.contiguous()
if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
grad_input = torch.zeros_like(input)
grad_offset = torch.zeros_like(offset)
ext_module.deform_conv_backward_input(
input,
offset,
grad_output,
grad_input,
grad_offset,
weight,
ctx.bufs_[0],
kW=weight.size(3),
kH=weight.size(2),
dW=ctx.stride[1],
dH=ctx.stride[0],
padW=ctx.padding[1],
padH=ctx.padding[0],
dilationW=ctx.dilation[1],
dilationH=ctx.dilation[0],
group=ctx.groups,
deformable_group=ctx.deform_groups,
im2col_step=cur_im2col_step)
if ctx.needs_input_grad[2]:
grad_weight = torch.zeros_like(weight)
ext_module.deform_conv_backward_parameters(
input,
offset,
grad_output,
grad_weight,
ctx.bufs_[0],
ctx.bufs_[1],
kW=weight.size(3),
kH=weight.size(2),
dW=ctx.stride[1],
dH=ctx.stride[0],
padW=ctx.padding[1],
padH=ctx.padding[0],
dilationW=ctx.dilation[1],
dilationH=ctx.dilation[0],
group=ctx.groups,
deformable_group=ctx.deform_groups,
scale=1,
im2col_step=cur_im2col_step)
return grad_input, grad_offset, grad_weight, \
None, None, None, None, None, None, None
@staticmethod
def _output_size(ctx, input, weight):
channels = weight.size(0)
output_size = (input.size(0), channels)
for d in range(input.dim() - 2):
in_size = input.size(d + 2)
pad = ctx.padding[d]
kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1
stride_ = ctx.stride[d]
output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
if not all(map(lambda s: s > 0, output_size)):
raise ValueError(
'convolution input is too small (output would be ' +
'x'.join(map(str, output_size)) + ')')
return output_size
deform_conv2d = DeformConv2dFunction.apply
class DeformConv2d(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
deform_groups=1,
bias=False):
super(DeformConv2d, self).__init__()
assert in_channels % groups == 0, \
f'in_channels {in_channels} cannot be divisible by groups {groups}'
assert out_channels % groups == 0, \
f'out_channels {out_channels} cannot be divisible by groups \
{groups}'
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.stride = _pair(stride)
self.padding = _pair(padding)
self.dilation = _pair(dilation)
self.groups = groups
self.deform_groups = deform_groups
# enable compatibility with nn.Conv2d
self.transposed = False
self.output_padding = _single(0)
# only weight, no bias
self.weight = nn.Parameter(
torch.Tensor(out_channels, in_channels // self.groups,
*self.kernel_size))
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
def forward(self, x, offset):
# To fix an assert error in deform_conv_cuda.cpp:128
# input image is smaller than kernel
input_pad = (x.size(2) < self.kernel_size[0]) or (x.size(3) <
self.kernel_size[1])
if input_pad:
pad_h = max(self.kernel_size[0] - x.size(2), 0)
pad_w = max(self.kernel_size[1] - x.size(3), 0)
x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant', 0)
offset = offset.contiguous()
out = deform_conv2d(x, offset, self.weight, self.stride, self.padding,
self.dilation, self.groups, self.deform_groups)
if input_pad:
out = out[:, :, :out.size(2) - pad_h, :out.size(3) -
pad_w].contiguous()
return out
@CONV_LAYERS.register_module('DCN')
class DeformConv2dPack(DeformConv2d):
"""A Deformable Conv Encapsulation that acts as normal Conv layers.
The offset tensor is like `[y0, x0, y1, x1, y2, x2, ..., y8, x8]`.
The spatial arrangement is like:
```
(x0, y0) (x1, y1) (x2, y2)
(x3, y3) (x4, y4) (x5, y5)
(x6, y6) (x7, y7) (x8, y8)
```
Args:
in_channels (int): Same as nn.Conv2d.
out_channels (int): Same as nn.Conv2d.
kernel_size (int or tuple[int]): Same as nn.Conv2d.
stride (int or tuple[int]): Same as nn.Conv2d.
padding (int or tuple[int]): Same as nn.Conv2d.
dilation (int or tuple[int]): Same as nn.Conv2d.
groups (int): Same as nn.Conv2d.
bias (bool or str): If specified as `auto`, it will be decided by the
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
False.
"""
_version = 2
def __init__(self, *args, **kwargs):
super(DeformConv2dPack, self).__init__(*args, **kwargs)
self.conv_offset = nn.Conv2d(
self.in_channels,
self.deform_groups * 2 * self.kernel_size[0] * self.kernel_size[1],
kernel_size=self.kernel_size,
stride=_pair(self.stride),
padding=_pair(self.padding),
dilation=_pair(self.dilation),
bias=True)
self.init_offset()
def init_offset(self):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x):
offset = self.conv_offset(x)
return deform_conv2d(x, offset, self.weight, self.stride, self.padding,
self.dilation, self.groups, self.deform_groups)
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs):
version = local_metadata.get('version', None)
if version is None or version < 2:
# the key is different in early versions
# In version < 2, DeformConvPack loads previous benchmark models.
if (prefix + 'conv_offset.weight' not in state_dict
and prefix[:-1] + '_offset.weight' in state_dict):
state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
prefix[:-1] + '_offset.weight')
if (prefix + 'conv_offset.bias' not in state_dict
and prefix[:-1] + '_offset.bias' in state_dict):
state_dict[prefix +
'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
'_offset.bias')
if version is not None and version > 1:
print_log(
f'DeformConv2dPack {prefix.rstrip(".")} is upgraded to '
'version 2.',
logger='root')
super()._load_from_state_dict(state_dict, prefix, local_metadata,
strict, missing_keys, unexpected_keys,
error_msgs)
from torch import nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext', ['deform_roi_pool_forward', 'deform_roi_pool_backward'])
class DeformRoIPoolFunction(Function):
@staticmethod
def symbolic(g, input, rois, offset, output_size, spatial_scale,
sampling_ratio, gamma):
return g.op(
'MMCVDeformRoIPool',
input,
rois,
offset,
pooled_height=output_size[0],
pooled_width=output_size[1],
spatial_scale=spatial_scale,
sampling_ratio=sampling_ratio,
gamma=gamma)
@staticmethod
def forward(ctx,
input,
rois,
offset,
output_size,
spatial_scale=1.0,
sampling_ratio=0,
gamma=0.1):
if offset is None:
offset = input.new_zeros(0)
ctx.output_size = _pair(output_size)
ctx.spatial_scale = float(spatial_scale)
ctx.sampling_ratio = int(sampling_ratio)
ctx.gamma = float(gamma)
assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
ctx.output_size[1])
output = input.new_zeros(output_shape)
ext_module.deform_roi_pool_forward(
input,
rois,
offset,
output,
pooled_height=ctx.output_size[0],
pooled_width=ctx.output_size[1],
spatial_scale=ctx.spatial_scale,
sampling_ratio=ctx.sampling_ratio,
gamma=ctx.gamma)
ctx.save_for_backward(input, rois, offset)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, rois, offset = ctx.saved_tensors
grad_input = grad_output.new_zeros(input.shape)
grad_offset = grad_output.new_zeros(offset.shape)
ext_module.deform_roi_pool_backward(
grad_output,
input,
rois,
offset,
grad_input,
grad_offset,
pooled_height=ctx.output_size[0],
pooled_width=ctx.output_size[1],
spatial_scale=ctx.spatial_scale,
sampling_ratio=ctx.sampling_ratio,
gamma=ctx.gamma)
if grad_offset.numel() == 0:
grad_offset = None
return grad_input, None, grad_offset, None, None, None, None
deform_roi_pool = DeformRoIPoolFunction.apply
class DeformRoIPool(nn.Module):
def __init__(self,
output_size,
spatial_scale=1.0,
sampling_ratio=0,
gamma=0.1):
super(DeformRoIPool, self).__init__()
self.output_size = _pair(output_size)
self.spatial_scale = float(spatial_scale)
self.sampling_ratio = int(sampling_ratio)
self.gamma = float(gamma)
def forward(self, input, rois, offset=None):
return deform_roi_pool(input, rois, offset, self.output_size,
self.spatial_scale, self.sampling_ratio,
self.gamma)
class DeformRoIPoolPack(DeformRoIPool):
def __init__(self,
output_size,
output_channels,
deform_fc_channels=1024,
spatial_scale=1.0,
sampling_ratio=0,
gamma=0.1):
super(DeformRoIPoolPack, self).__init__(output_size, spatial_scale,
sampling_ratio, gamma)
self.output_channels = output_channels
self.deform_fc_channels = deform_fc_channels
self.offset_fc = nn.Sequential(
nn.Linear(
self.output_size[0] * self.output_size[1] *
self.output_channels, self.deform_fc_channels),
nn.ReLU(inplace=True),
nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
nn.ReLU(inplace=True),
nn.Linear(self.deform_fc_channels,
self.output_size[0] * self.output_size[1] * 2))
self.offset_fc[-1].weight.data.zero_()
self.offset_fc[-1].bias.data.zero_()
def forward(self, input, rois):
assert input.size(1) == self.output_channels
x = deform_roi_pool(input, rois, None, self.output_size,
self.spatial_scale, self.sampling_ratio,
self.gamma)
rois_num = rois.size(0)
offset = self.offset_fc(x.view(rois_num, -1))
offset = offset.view(rois_num, 2, self.output_size[0],
self.output_size[1])
return deform_roi_pool(input, rois, offset, self.output_size,
self.spatial_scale, self.sampling_ratio,
self.gamma)
class ModulatedDeformRoIPoolPack(DeformRoIPool):
def __init__(self,
output_size,
output_channels,
deform_fc_channels=1024,
spatial_scale=1.0,
sampling_ratio=0,
gamma=0.1):
super(ModulatedDeformRoIPoolPack,
self).__init__(output_size, spatial_scale, sampling_ratio, gamma)
self.output_channels = output_channels
self.deform_fc_channels = deform_fc_channels
self.offset_fc = nn.Sequential(
nn.Linear(
self.output_size[0] * self.output_size[1] *
self.output_channels, self.deform_fc_channels),
nn.ReLU(inplace=True),
nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
nn.ReLU(inplace=True),
nn.Linear(self.deform_fc_channels,
self.output_size[0] * self.output_size[1] * 2))
self.offset_fc[-1].weight.data.zero_()
self.offset_fc[-1].bias.data.zero_()
self.mask_fc = nn.Sequential(
nn.Linear(
self.output_size[0] * self.output_size[1] *
self.output_channels, self.deform_fc_channels),
nn.ReLU(inplace=True),
nn.Linear(self.deform_fc_channels,
self.output_size[0] * self.output_size[1] * 1),
nn.Sigmoid())
self.mask_fc[2].weight.data.zero_()
self.mask_fc[2].bias.data.zero_()
def forward(self, input, rois):
assert input.size(1) == self.output_channels
x = deform_roi_pool(input, rois, None, self.output_size,
self.spatial_scale, self.sampling_ratio,
self.gamma)
rois_num = rois.size(0)
offset = self.offset_fc(x.view(rois_num, -1))
offset = offset.view(rois_num, 2, self.output_size[0],
self.output_size[1])
mask = self.mask_fc(x.view(rois_num, -1))
mask = mask.view(rois_num, 1, self.output_size[0], self.output_size[1])
d = deform_roi_pool(input, rois, offset, self.output_size,
self.spatial_scale, self.sampling_ratio,
self.gamma)
return d * mask
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', [
'sigmoid_focal_loss_forward', 'sigmoid_focal_loss_backward',
'softmax_focal_loss_forward', 'softmax_focal_loss_backward'
])
class SigmoidFocalLossFunction(Function):
@staticmethod
def symbolic(g, input, target, gamma, alpha, weight, reduction):
return g.op(
'MMCVSigmoidFocalLoss',
input,
target,
gamma=gamma,
alpha=alpha,
weight=weight,
reduction=reduction)
@staticmethod
def forward(ctx,
input,
target,
gamma=2.0,
alpha=0.25,
weight=None,
reduction='mean'):
assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
assert input.dim() == 2
assert target.dim() == 1
assert input.size(0) == target.size(0)
if weight is None:
weight = input.new_empty(0)
else:
assert weight.dim() == 1
assert input.size(1) == weight.size(0)
ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
assert reduction in ctx.reduction_dict.keys()
ctx.gamma = float(gamma)
ctx.alpha = float(alpha)
ctx.reduction = ctx.reduction_dict[reduction]
output = input.new_zeros(input.size())
ext_module.sigmoid_focal_loss_forward(
input, target, weight, output, gamma=ctx.gamma, alpha=ctx.alpha)
if ctx.reduction == ctx.reduction_dict['mean']:
output = output.sum() / input.size(0)
elif ctx.reduction == ctx.reduction_dict['sum']:
output = output.sum()
ctx.save_for_backward(input, target, weight)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, target, weight = ctx.saved_tensors
grad_input = input.new_zeros(input.size())
ext_module.sigmoid_focal_loss_backward(
input,
target,
weight,
grad_input,
gamma=ctx.gamma,
alpha=ctx.alpha)
grad_input *= grad_output
if ctx.reduction == ctx.reduction_dict['mean']:
grad_input /= input.size(0)
return grad_input, None, None, None, None, None
sigmoid_focal_loss = SigmoidFocalLossFunction.apply
class SigmoidFocalLoss(nn.Module):
def __init__(self, gamma, alpha, weight=None, reduction='mean'):
super(SigmoidFocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
self.register_buffer('weight', weight)
self.reduction = reduction
def forward(self, input, target):
return sigmoid_focal_loss(input, target, self.gamma, self.alpha,
self.weight, self.reduction)
def __repr__(self):
s = self.__class__.__name__
s += f'(gamma={self.gamma}, '
s += f'alpha={self.alpha}, '
s += f'reduction={self.reduction})'
return s
class SoftmaxFocalLossFunction(Function):
@staticmethod
def symbolic(g, input, target, gamma, alpha, weight, reduction):
return g.op(
'MMCVSoftmaxFocalLoss',
input,
target,
gamma=gamma,
alpha=alpha,
weight=weight,
reduction=reduction)
@staticmethod
def forward(ctx,
input,
target,
gamma=2.0,
alpha=0.25,
weight=None,
reduction='mean'):
assert isinstance(target, (torch.LongTensor, torch.cuda.LongTensor))
assert input.dim() == 2
assert target.dim() == 1
assert input.size(0) == target.size(0)
if weight is None:
weight = input.new_empty(0)
else:
assert weight.dim() == 1
assert input.size(1) == weight.size(0)
ctx.reduction_dict = {'none': 0, 'mean': 1, 'sum': 2}
assert reduction in ctx.reduction_dict.keys()
ctx.gamma = float(gamma)
ctx.alpha = float(alpha)
ctx.reduction = ctx.reduction_dict[reduction]
channel_stats, _ = torch.max(input, dim=1)
input_softmax = input - channel_stats.unsqueeze(1).expand_as(input)
input_softmax.exp_()
channel_stats = input_softmax.sum(dim=1)
input_softmax /= channel_stats.unsqueeze(1).expand_as(input)
output = input.new_zeros(input.size(0))
ext_module.softmax_focal_loss_forward(
input_softmax,
target,
weight,
output,
gamma=ctx.gamma,
alpha=ctx.alpha)
if ctx.reduction == ctx.reduction_dict['mean']:
output = output.sum() / input.size(0)
elif ctx.reduction == ctx.reduction_dict['sum']:
output = output.sum()
ctx.save_for_backward(input_softmax, target, weight)
return output
@staticmethod
def backward(ctx, grad_output):
input_softmax, target, weight = ctx.saved_tensors
buff = input_softmax.new_zeros(input_softmax.size(0))
grad_input = input_softmax.new_zeros(input_softmax.size())
ext_module.softmax_focal_loss_backward(
input_softmax,
target,
weight,
buff,
grad_input,
gamma=ctx.gamma,
alpha=ctx.alpha)
grad_input *= grad_output
if ctx.reduction == ctx.reduction_dict['mean']:
grad_input /= input_softmax.size(0)
return grad_input, None, None, None, None, None
softmax_focal_loss = SoftmaxFocalLossFunction.apply
class SoftmaxFocalLoss(nn.Module):
def __init__(self, gamma, alpha, weight=None, reduction='mean'):
super(SoftmaxFocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
self.register_buffer('weight', weight)
self.reduction = reduction
def forward(self, input, target):
return softmax_focal_loss(input, target, self.gamma, self.alpha,
self.weight, self.reduction)
def __repr__(self):
s = self.__class__.__name__
s += f'(gamma={self.gamma}, '
s += f'alpha={self.alpha}, '
s += f'reduction={self.reduction})'
return s
import math
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from ..cnn import kaiming_init
class GeneralizedAttention(nn.Module):
"""GeneralizedAttention module.
See 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks'
(https://arxiv.org/abs/1711.07971) for details.
Args:
in_channels (int): Channels of the input feature map.
spatial_range (int): The spatial range.
-1 indicates no spatial range constraint.
num_heads (int): The head number of empirical_attention module.
position_embedding_dim (int): The position embedding dimension.
position_magnitude (int): A multiplier acting on coord difference.
kv_stride (int): The feature stride acting on key/value feature map.
q_stride (int): The feature stride acting on query feature map.
attention_type (str): A binary indicator string for indicating which
items in generalized empirical_attention module are used.
'1000' indicates 'query and key content' (appr - appr) item,
'0100' indicates 'query content and relative position'
(appr - position) item,
'0010' indicates 'key content only' (bias - appr) item,
'0001' indicates 'relative position only' (bias - position) item.
"""
def __init__(self,
in_channels,
spatial_range=-1,
num_heads=9,
position_embedding_dim=-1,
position_magnitude=1,
kv_stride=2,
q_stride=1,
attention_type='1111'):
super(GeneralizedAttention, self).__init__()
# hard range means local range for non-local operation
self.position_embedding_dim = (
position_embedding_dim
if position_embedding_dim > 0 else in_channels)
self.position_magnitude = position_magnitude
self.num_heads = num_heads
self.in_channels = in_channels
self.spatial_range = spatial_range
self.kv_stride = kv_stride
self.q_stride = q_stride
self.attention_type = [bool(int(_)) for _ in attention_type]
self.qk_embed_dim = in_channels // num_heads
out_c = self.qk_embed_dim * num_heads
if self.attention_type[0] or self.attention_type[1]:
self.query_conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_c,
kernel_size=1,
bias=False)
self.query_conv.kaiming_init = True
if self.attention_type[0] or self.attention_type[2]:
self.key_conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_c,
kernel_size=1,
bias=False)
self.key_conv.kaiming_init = True
self.v_dim = in_channels // num_heads
self.value_conv = nn.Conv2d(
in_channels=in_channels,
out_channels=self.v_dim * num_heads,
kernel_size=1,
bias=False)
self.value_conv.kaiming_init = True
if self.attention_type[1] or self.attention_type[3]:
self.appr_geom_fc_x = nn.Linear(
self.position_embedding_dim // 2, out_c, bias=False)
self.appr_geom_fc_x.kaiming_init = True
self.appr_geom_fc_y = nn.Linear(
self.position_embedding_dim // 2, out_c, bias=False)
self.appr_geom_fc_y.kaiming_init = True
if self.attention_type[2]:
stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
appr_bias_value = -2 * stdv * torch.rand(out_c) + stdv
self.appr_bias = nn.Parameter(appr_bias_value)
if self.attention_type[3]:
stdv = 1.0 / math.sqrt(self.qk_embed_dim * 2)
geom_bias_value = -2 * stdv * torch.rand(out_c) + stdv
self.geom_bias = nn.Parameter(geom_bias_value)
self.proj_conv = nn.Conv2d(
in_channels=self.v_dim * num_heads,
out_channels=in_channels,
kernel_size=1,
bias=True)
self.proj_conv.kaiming_init = True
self.gamma = nn.Parameter(torch.zeros(1))
if self.spatial_range >= 0:
# only works when non local is after 3*3 conv
if in_channels == 256:
max_len = 84
elif in_channels == 512:
max_len = 42
max_len_kv = int((max_len - 1.0) / self.kv_stride + 1)
local_constraint_map = np.ones(
(max_len, max_len, max_len_kv, max_len_kv), dtype=np.int)
for iy in range(max_len):
for ix in range(max_len):
local_constraint_map[
iy, ix,
max((iy - self.spatial_range) //
self.kv_stride, 0):min((iy + self.spatial_range +
1) // self.kv_stride +
1, max_len),
max((ix - self.spatial_range) //
self.kv_stride, 0):min((ix + self.spatial_range +
1) // self.kv_stride +
1, max_len)] = 0
self.local_constraint_map = nn.Parameter(
torch.from_numpy(local_constraint_map).byte(),
requires_grad=False)
if self.q_stride > 1:
self.q_downsample = nn.AvgPool2d(
kernel_size=1, stride=self.q_stride)
else:
self.q_downsample = None
if self.kv_stride > 1:
self.kv_downsample = nn.AvgPool2d(
kernel_size=1, stride=self.kv_stride)
else:
self.kv_downsample = None
self.init_weights()
def get_position_embedding(self,
h,
w,
h_kv,
w_kv,
q_stride,
kv_stride,
device,
feat_dim,
wave_length=1000):
h_idxs = torch.linspace(0, h - 1, h).cuda(device)
h_idxs = h_idxs.view((h, 1)) * q_stride
w_idxs = torch.linspace(0, w - 1, w).cuda(device)
w_idxs = w_idxs.view((w, 1)) * q_stride
h_kv_idxs = torch.linspace(0, h_kv - 1, h_kv).cuda(device)
h_kv_idxs = h_kv_idxs.view((h_kv, 1)) * kv_stride
w_kv_idxs = torch.linspace(0, w_kv - 1, w_kv).cuda(device)
w_kv_idxs = w_kv_idxs.view((w_kv, 1)) * kv_stride
# (h, h_kv, 1)
h_diff = h_idxs.unsqueeze(1) - h_kv_idxs.unsqueeze(0)
h_diff *= self.position_magnitude
# (w, w_kv, 1)
w_diff = w_idxs.unsqueeze(1) - w_kv_idxs.unsqueeze(0)
w_diff *= self.position_magnitude
feat_range = torch.arange(0, feat_dim / 4).cuda(device)
dim_mat = torch.Tensor([wave_length]).cuda(device)
dim_mat = dim_mat**((4. / feat_dim) * feat_range)
dim_mat = dim_mat.view((1, 1, -1))
embedding_x = torch.cat(
((w_diff / dim_mat).sin(), (w_diff / dim_mat).cos()), dim=2)
embedding_y = torch.cat(
((h_diff / dim_mat).sin(), (h_diff / dim_mat).cos()), dim=2)
return embedding_x, embedding_y
def forward(self, x_input):
num_heads = self.num_heads
# use empirical_attention
if self.q_downsample is not None:
x_q = self.q_downsample(x_input)
else:
x_q = x_input
n, _, h, w = x_q.shape
if self.kv_downsample is not None:
x_kv = self.kv_downsample(x_input)
else:
x_kv = x_input
_, _, h_kv, w_kv = x_kv.shape
if self.attention_type[0] or self.attention_type[1]:
proj_query = self.query_conv(x_q).view(
(n, num_heads, self.qk_embed_dim, h * w))
proj_query = proj_query.permute(0, 1, 3, 2)
if self.attention_type[0] or self.attention_type[2]:
proj_key = self.key_conv(x_kv).view(
(n, num_heads, self.qk_embed_dim, h_kv * w_kv))
if self.attention_type[1] or self.attention_type[3]:
position_embed_x, position_embed_y = self.get_position_embedding(
h, w, h_kv, w_kv, self.q_stride, self.kv_stride,
x_input.device, self.position_embedding_dim)
# (n, num_heads, w, w_kv, dim)
position_feat_x = self.appr_geom_fc_x(position_embed_x).\
view(1, w, w_kv, num_heads, self.qk_embed_dim).\
permute(0, 3, 1, 2, 4).\
repeat(n, 1, 1, 1, 1)
# (n, num_heads, h, h_kv, dim)
position_feat_y = self.appr_geom_fc_y(position_embed_y).\
view(1, h, h_kv, num_heads, self.qk_embed_dim).\
permute(0, 3, 1, 2, 4).\
repeat(n, 1, 1, 1, 1)
position_feat_x /= math.sqrt(2)
position_feat_y /= math.sqrt(2)
# accelerate for saliency only
if (np.sum(self.attention_type) == 1) and self.attention_type[2]:
appr_bias = self.appr_bias.\
view(1, num_heads, 1, self.qk_embed_dim).\
repeat(n, 1, 1, 1)
energy = torch.matmul(appr_bias, proj_key).\
view(n, num_heads, 1, h_kv * w_kv)
h = 1
w = 1
else:
# (n, num_heads, h*w, h_kv*w_kv), query before key, 540mb for
if not self.attention_type[0]:
energy = torch.zeros(
n,
num_heads,
h,
w,
h_kv,
w_kv,
dtype=x_input.dtype,
device=x_input.device)
# attention_type[0]: appr - appr
# attention_type[1]: appr - position
# attention_type[2]: bias - appr
# attention_type[3]: bias - position
if self.attention_type[0] or self.attention_type[2]:
if self.attention_type[0] and self.attention_type[2]:
appr_bias = self.appr_bias.\
view(1, num_heads, 1, self.qk_embed_dim)
energy = torch.matmul(proj_query + appr_bias, proj_key).\
view(n, num_heads, h, w, h_kv, w_kv)
elif self.attention_type[0]:
energy = torch.matmul(proj_query, proj_key).\
view(n, num_heads, h, w, h_kv, w_kv)
elif self.attention_type[2]:
appr_bias = self.appr_bias.\
view(1, num_heads, 1, self.qk_embed_dim).\
repeat(n, 1, 1, 1)
energy += torch.matmul(appr_bias, proj_key).\
view(n, num_heads, 1, 1, h_kv, w_kv)
if self.attention_type[1] or self.attention_type[3]:
if self.attention_type[1] and self.attention_type[3]:
geom_bias = self.geom_bias.\
view(1, num_heads, 1, self.qk_embed_dim)
proj_query_reshape = (proj_query + geom_bias).\
view(n, num_heads, h, w, self.qk_embed_dim)
energy_x = torch.matmul(
proj_query_reshape.permute(0, 1, 3, 2, 4),
position_feat_x.permute(0, 1, 2, 4, 3))
energy_x = energy_x.\
permute(0, 1, 3, 2, 4).unsqueeze(4)
energy_y = torch.matmul(
proj_query_reshape,
position_feat_y.permute(0, 1, 2, 4, 3))
energy_y = energy_y.unsqueeze(5)
energy += energy_x + energy_y
elif self.attention_type[1]:
proj_query_reshape = proj_query.\
view(n, num_heads, h, w, self.qk_embed_dim)
proj_query_reshape = proj_query_reshape.\
permute(0, 1, 3, 2, 4)
position_feat_x_reshape = position_feat_x.\
permute(0, 1, 2, 4, 3)
position_feat_y_reshape = position_feat_y.\
permute(0, 1, 2, 4, 3)
energy_x = torch.matmul(proj_query_reshape,
position_feat_x_reshape)
energy_x = energy_x.permute(0, 1, 3, 2, 4).unsqueeze(4)
energy_y = torch.matmul(proj_query_reshape,
position_feat_y_reshape)
energy_y = energy_y.unsqueeze(5)
energy += energy_x + energy_y
elif self.attention_type[3]:
geom_bias = self.geom_bias.\
view(1, num_heads, self.qk_embed_dim, 1).\
repeat(n, 1, 1, 1)
position_feat_x_reshape = position_feat_x.\
view(n, num_heads, w*w_kv, self.qk_embed_dim)
position_feat_y_reshape = position_feat_y.\
view(n, num_heads, h * h_kv, self.qk_embed_dim)
energy_x = torch.matmul(position_feat_x_reshape, geom_bias)
energy_x = energy_x.view(n, num_heads, 1, w, 1, w_kv)
energy_y = torch.matmul(position_feat_y_reshape, geom_bias)
energy_y = energy_y.view(n, num_heads, h, 1, h_kv, 1)
energy += energy_x + energy_y
energy = energy.view(n, num_heads, h * w, h_kv * w_kv)
if self.spatial_range >= 0:
cur_local_constraint_map = \
self.local_constraint_map[:h, :w, :h_kv, :w_kv].\
contiguous().\
view(1, 1, h*w, h_kv*w_kv)
energy = energy.masked_fill_(cur_local_constraint_map,
float('-inf'))
attention = F.softmax(energy, 3)
proj_value = self.value_conv(x_kv)
proj_value_reshape = proj_value.\
view((n, num_heads, self.v_dim, h_kv * w_kv)).\
permute(0, 1, 3, 2)
out = torch.matmul(attention, proj_value_reshape).\
permute(0, 1, 3, 2).\
contiguous().\
view(n, self.v_dim * self.num_heads, h, w)
out = self.proj_conv(out)
out = self.gamma * out + x_input
return out
def init_weights(self):
for m in self.modules():
if hasattr(m, 'kaiming_init') and m.kaiming_init:
kaiming_init(
m,
mode='fan_in',
nonlinearity='leaky_relu',
bias=0,
distribution='uniform',
a=1)
import torch
if torch.__version__ == 'parrots':
import parrots
def get_compiler_version():
return 'GCC ' + parrots.version.compiler
def get_compiling_cuda_version():
return parrots.version.cuda
else:
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext', ['get_compiler_version', 'get_compiling_cuda_version'])
def get_compiler_version():
return ext_module.get_compiler_version()
def get_compiling_cuda_version():
return ext_module.get_compiling_cuda_version()
import math
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair
from ..utils import ext_loader
ext_module = ext_loader.load_ext(
'_ext', ['masked_im2col_forward', 'masked_col2im_forward'])
class MaskedConv2dFunction(Function):
@staticmethod
def symbolic(g, features, mask, weight, bias, padding, stride):
return g.op(
'MMCVMaskedConv2d',
features,
mask,
weight,
bias,
padding=padding,
stride=stride)
@staticmethod
def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
assert mask.dim() == 3 and mask.size(0) == 1
assert features.dim() == 4 and features.size(0) == 1
assert features.size()[2:] == mask.size()[1:]
pad_h, pad_w = _pair(padding)
stride_h, stride_w = _pair(stride)
if stride_h != 1 or stride_w != 1:
raise ValueError(
'Stride could not only be 1 in masked_conv2d currently.')
out_channel, in_channel, kernel_h, kernel_w = weight.size()
batch_size = features.size(0)
out_h = int(
math.floor((features.size(2) + 2 * pad_h -
(kernel_h - 1) - 1) / stride_h + 1))
out_w = int(
math.floor((features.size(3) + 2 * pad_w -
(kernel_h - 1) - 1) / stride_w + 1))
mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False)
output = features.new_zeros(batch_size, out_channel, out_h, out_w)
if mask_inds.numel() > 0:
mask_h_idx = mask_inds[:, 0].contiguous()
mask_w_idx = mask_inds[:, 1].contiguous()
data_col = features.new_zeros(in_channel * kernel_h * kernel_w,
mask_inds.size(0))
ext_module.masked_im2col_forward(
features,
mask_h_idx,
mask_w_idx,
data_col,
kernel_h=kernel_h,
kernel_w=kernel_w,
pad_h=pad_h,
pad_w=pad_w)
masked_output = torch.addmm(1, bias[:, None], 1,
weight.view(out_channel, -1), data_col)
ext_module.masked_col2im_forward(
masked_output,
mask_h_idx,
mask_w_idx,
output,
height=out_h,
width=out_w,
channels=out_channel)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
return (None, ) * 5
masked_conv2d = MaskedConv2dFunction.apply
class MaskedConv2d(nn.Conv2d):
"""A MaskedConv2d which inherits the official Conv2d.
The masked forward doesn't implement the backward function and only
supports the stride parameter to be 1 currently.
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
bias=True):
super(MaskedConv2d,
self).__init__(in_channels, out_channels, kernel_size, stride,
padding, dilation, groups, bias)
def forward(self, input, mask=None):
if mask is None: # fallback to the normal Conv2d
return super(MaskedConv2d, self).forward(input)
else:
return masked_conv2d(input, mask, self.weight, self.bias,
self.padding)
from abc import abstractmethod
import torch
import torch.nn as nn
import torch.nn.functional as F
from ..cnn import ConvModule
class BaseMergeCell(nn.Module):
"""The basic class for cells used in NAS-FPN and NAS-FCOS.
BaseMergeCell takes 2 inputs. After applying concolution
on them, they are resized to the target size. Then,
they go through binary_op, which depends on the type of cell.
If with_out_conv is True, the result of output will go through
another convolution layer.
Args:
in_channels (int): number of input channels in out_conv layer.
out_channels (int): number of output channels in out_conv layer.
with_out_conv (bool): Whether to use out_conv layer
out_conv_cfg (dict): Config dict for convolution layer, which should
contain "groups", "kernel_size", "padding", "bias" to build
out_conv layer.
out_norm_cfg (dict): Config dict for normalization layer in out_conv.
out_conv_order (tuple): The order of conv/norm/activation layers in
out_conv.
with_input1_conv (bool): Whether to use convolution on input1.
with_input2_conv (bool): Whether to use convolution on input2.
input_conv_cfg (dict): Config dict for building input1_conv layer and
input2_conv layer, which is expected to contain the type of
convolution.
Default: None, which means using conv2d.
input_norm_cfg (dict): Config dict for normalization layer in
input1_conv and input2_conv layer. Default: None.
upsample_mode (str): Interpolation method used to resize the output
of input1_conv and input2_conv to target size. Currently, we
support ['nearest', 'bilinear']. Default: 'nearest'.
"""
def __init__(self,
fused_channels=256,
out_channels=256,
with_out_conv=True,
out_conv_cfg=dict(
groups=1, kernel_size=3, padding=1, bias=True),
out_norm_cfg=None,
out_conv_order=('act', 'conv', 'norm'),
with_input1_conv=False,
with_input2_conv=False,
input_conv_cfg=None,
input_norm_cfg=None,
upsample_mode='nearest'):
super(BaseMergeCell, self).__init__()
assert upsample_mode in ['nearest', 'bilinear']
self.with_out_conv = with_out_conv
self.with_input1_conv = with_input1_conv
self.with_input2_conv = with_input2_conv
self.upsample_mode = upsample_mode
if self.with_out_conv:
self.out_conv = ConvModule(
fused_channels,
out_channels,
**out_conv_cfg,
norm_cfg=out_norm_cfg,
order=out_conv_order)
self.input1_conv = self._build_input_conv(
out_channels, input_conv_cfg,
input_norm_cfg) if with_input1_conv else nn.Sequential()
self.input2_conv = self._build_input_conv(
out_channels, input_conv_cfg,
input_norm_cfg) if with_input2_conv else nn.Sequential()
def _build_input_conv(self, channel, conv_cfg, norm_cfg):
return ConvModule(
channel,
channel,
3,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
bias=True)
@abstractmethod
def _binary_op(self, x1, x2):
pass
def _resize(self, x, size):
if x.shape[-2:] == size:
return x
elif x.shape[-2:] < size:
return F.interpolate(x, size=size, mode=self.upsample_mode)
else:
assert x.shape[-2] % size[-2] == 0 and x.shape[-1] % size[-1] == 0
kernel_size = x.shape[-1] // size[-1]
x = F.max_pool2d(x, kernel_size=kernel_size, stride=kernel_size)
return x
def forward(self, x1, x2, out_size=None):
assert x1.shape[:2] == x2.shape[:2]
assert out_size is None or len(out_size) == 2
if out_size is None: # resize to larger one
out_size = max(x1.size()[2:], x2.size()[2:])
x1 = self.input1_conv(x1)
x2 = self.input2_conv(x2)
x1 = self._resize(x1, out_size)
x2 = self._resize(x2, out_size)
x = self._binary_op(x1, x2)
if self.with_out_conv:
x = self.out_conv(x)
return x
class SumCell(BaseMergeCell):
def __init__(self, in_channels, out_channels, **kwargs):
super(SumCell, self).__init__(in_channels, out_channels, **kwargs)
def _binary_op(self, x1, x2):
return x1 + x2
class ConcatCell(BaseMergeCell):
def __init__(self, in_channels, out_channels, **kwargs):
super(ConcatCell, self).__init__(in_channels * 2, out_channels,
**kwargs)
def _binary_op(self, x1, x2):
ret = torch.cat([x1, x2], dim=1)
return ret
class GlobalPoolingCell(BaseMergeCell):
def __init__(self, in_channels=None, out_channels=None, **kwargs):
super().__init__(in_channels, out_channels, **kwargs)
self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
def _binary_op(self, x1, x2):
x2_att = self.global_pool(x2).sigmoid()
return x2 + x2_att * x1
import math
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair, _single
from ..cnn import CONV_LAYERS
from ..utils import ext_loader, print_log
ext_module = ext_loader.load_ext(
'_ext',
['modulated_deform_conv_forward', 'modulated_deform_conv_backward'])
class ModulatedDeformConv2dFunction(Function):
@staticmethod
def symbolic(g, input, offset, mask, weight, bias, stride, padding,
dilation, groups, deform_groups):
return g.op(
'MMCVModulatedDeformConv2d',
input,
offset,
mask,
weight,
bias,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
deform_groups=deform_groups)
@staticmethod
def forward(ctx,
input,
offset,
mask,
weight,
bias=None,
stride=1,
padding=0,
dilation=1,
groups=1,
deform_groups=1):
if input is not None and input.dim() != 4:
raise ValueError(
f'Expected 4D tensor as input, got {input.dim()}D tensor \
instead.')
ctx.stride = _pair(stride)
ctx.padding = _pair(padding)
ctx.dilation = _pair(dilation)
ctx.groups = groups
ctx.deform_groups = deform_groups
ctx.with_bias = bias is not None
if not ctx.with_bias:
bias = input.new_empty(0) # fake tensor
if weight.requires_grad or mask.requires_grad or offset.requires_grad \
or input.requires_grad:
ctx.save_for_backward(input, offset, mask, weight, bias)
output = input.new_empty(
ModulatedDeformConv2dFunction._output_size(ctx, input, weight))
ctx._bufs = [input.new_empty(0), input.new_empty(0)]
ext_module.modulated_deform_conv_forward(
input,
weight,
bias,
ctx._bufs[0],
offset,
mask,
output,
ctx._bufs[1],
kernel_h=weight.size(2),
kernel_w=weight.size(3),
stride_h=ctx.stride[0],
stride_w=ctx.stride[1],
pad_h=ctx.padding[0],
pad_w=ctx.padding[1],
dilation_h=ctx.dilation[0],
dilation_w=ctx.dilation[1],
group=ctx.groups,
deformable_group=ctx.deform_groups,
with_bias=ctx.with_bias)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, offset, mask, weight, bias = ctx.saved_tensors
grad_input = torch.zeros_like(input)
grad_offset = torch.zeros_like(offset)
grad_mask = torch.zeros_like(mask)
grad_weight = torch.zeros_like(weight)
grad_bias = torch.zeros_like(bias)
grad_output = grad_output.contiguous()
ext_module.modulated_deform_conv_backward(
input,
weight,
bias,
ctx._bufs[0],
offset,
mask,
ctx._bufs[1],
grad_input,
grad_weight,
grad_bias,
grad_offset,
grad_mask,
grad_output,
kernel_h=weight.size(2),
kernel_w=weight.size(3),
stride_h=ctx.stride[0],
stride_w=ctx.stride[1],
pad_h=ctx.padding[0],
pad_w=ctx.padding[1],
dilation_h=ctx.dilation[0],
dilation_w=ctx.dilation[1],
group=ctx.groups,
deformable_group=ctx.deform_groups,
with_bias=ctx.with_bias)
if not ctx.with_bias:
grad_bias = None
return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias,
None, None, None, None, None)
@staticmethod
def _output_size(ctx, input, weight):
channels = weight.size(0)
output_size = (input.size(0), channels)
for d in range(input.dim() - 2):
in_size = input.size(d + 2)
pad = ctx.padding[d]
kernel = ctx.dilation[d] * (weight.size(d + 2) - 1) + 1
stride_ = ctx.stride[d]
output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1, )
if not all(map(lambda s: s > 0, output_size)):
raise ValueError(
'convolution input is too small (output would be ' +
'x'.join(map(str, output_size)) + ')')
return output_size
modulated_deform_conv2d = ModulatedDeformConv2dFunction.apply
class ModulatedDeformConv2d(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
deform_groups=1,
bias=True):
super(ModulatedDeformConv2d, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.stride = _pair(stride)
self.padding = _pair(padding)
self.dilation = _pair(dilation)
self.groups = groups
self.deform_groups = deform_groups
# enable compatibility with nn.Conv2d
self.transposed = False
self.output_padding = _single(0)
self.weight = nn.Parameter(
torch.Tensor(out_channels, in_channels // groups,
*self.kernel_size))
if bias:
self.bias = nn.Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias', None)
self.init_weights()
def init_weights(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.zero_()
def forward(self, x, offset, mask):
return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
self.stride, self.padding,
self.dilation, self.groups,
self.deform_groups)
@CONV_LAYERS.register_module('DCNv2')
class ModulatedDeformConv2dPack(ModulatedDeformConv2d):
"""A ModulatedDeformable Conv Encapsulation that acts as normal Conv
layers.
Args:
in_channels (int): Same as nn.Conv2d.
out_channels (int): Same as nn.Conv2d.
kernel_size (int or tuple[int]): Same as nn.Conv2d.
stride (int): Same as nn.Conv2d, while tuple is not supported.
padding (int): Same as nn.Conv2d, while tuple is not supported.
dilation (int): Same as nn.Conv2d, while tuple is not supported.
groups (int): Same as nn.Conv2d.
bias (bool or str): If specified as `auto`, it will be decided by the
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
False.
"""
_version = 2
def __init__(self, *args, **kwargs):
super(ModulatedDeformConv2dPack, self).__init__(*args, **kwargs)
self.conv_offset = nn.Conv2d(
self.in_channels,
self.deform_groups * 3 * self.kernel_size[0] * self.kernel_size[1],
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
bias=True)
self.init_weights()
def init_weights(self):
super(ModulatedDeformConv2dPack, self).init_weights()
if hasattr(self, 'conv_offset'):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x):
out = self.conv_offset(x)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
return modulated_deform_conv2d(x, offset, mask, self.weight, self.bias,
self.stride, self.padding,
self.dilation, self.groups,
self.deform_groups)
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs):
version = local_metadata.get('version', None)
if version is None or version < 2:
# the key is different in early versions
# In version < 2, ModulatedDeformConvPack
# loads previous benchmark models.
if (prefix + 'conv_offset.weight' not in state_dict
and prefix[:-1] + '_offset.weight' in state_dict):
state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
prefix[:-1] + '_offset.weight')
if (prefix + 'conv_offset.bias' not in state_dict
and prefix[:-1] + '_offset.bias' in state_dict):
state_dict[prefix +
'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
'_offset.bias')
if version is not None and version > 1:
print_log(
f'ModulatedDeformConvPack {prefix.rstrip(".")} is upgraded to '
'version 2.',
logger='root')
super()._load_from_state_dict(state_dict, prefix, local_metadata,
strict, missing_keys, unexpected_keys,
error_msgs)
import numpy as np
import torch
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', ['nms', 'softnms', 'nms_match'])
def nms(boxes, scores, iou_threshold, offset=0):
"""Dispatch to either CPU or GPU NMS implementations.
The input can be either torch tensor or numpy array. GPU NMS will be used
if the input is gpu tensor, otherwise CPU NMS
will be used. The returned type will always be the same as inputs.
Arguments:
boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
scores (torch.Tensor or np.ndarray): scores in shape (N, ).
iou_threshold (float): IoU threshold for NMS.
offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \
same data type as the input.
Example:
>>> boxes = np.array([[49.1, 32.4, 51.0, 35.9],
>>> [49.3, 32.9, 51.0, 35.3],
>>> [49.2, 31.8, 51.0, 35.4],
>>> [35.1, 11.5, 39.1, 15.7],
>>> [35.6, 11.8, 39.3, 14.2],
>>> [35.3, 11.5, 39.9, 14.5],
>>> [35.2, 11.7, 39.7, 15.7]], dtype=np.float32)
>>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.5, 0.4, 0.3],\
dtype=np.float32)
>>> iou_threshold = 0.6
>>> dets, inds = nms(boxes, scores, iou_threshold)
>>> assert len(inds) == len(dets) == 3
"""
assert isinstance(boxes, (torch.Tensor, np.ndarray))
assert isinstance(scores, (torch.Tensor, np.ndarray))
is_numpy = False
if isinstance(boxes, np.ndarray):
is_numpy = True
boxes = torch.from_numpy(boxes)
if isinstance(scores, np.ndarray):
scores = torch.from_numpy(scores)
assert boxes.size(1) == 4
assert boxes.size(0) == scores.size(0)
assert offset in (0, 1)
if torch.__version__ == 'parrots':
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1 + offset) * (y2 - y1 + offset)
_, order = scores.sort(0, descending=True)
if boxes.device == 'cpu':
indata_list = [boxes, order, areas]
indata_dict = {
'iou_threshold': float(iou_threshold),
'offset': int(offset)
}
select = ext_module.nms(*indata_list, **indata_dict).byte()
else:
boxes_sorted = boxes.index_select(0, order)
indata_list = [boxes_sorted, order, areas]
indata_dict = {
'iou_threshold': float(iou_threshold),
'offset': int(offset)
}
select = ext_module.nms(*indata_list, **indata_dict)
inds = order.masked_select(select)
else:
inds = ext_module.nms(
boxes,
scores,
iou_threshold=float(iou_threshold),
offset=int(offset))
dets = torch.cat((boxes[inds], scores[inds].reshape(-1, 1)), dim=1)
if is_numpy:
dets = dets.cpu().numpy()
inds = inds.cpu().numpy()
return dets, inds
def soft_nms(boxes,
scores,
iou_threshold=0.3,
sigma=0.5,
min_score=1e-3,
method='linear',
offset=0):
"""Dispatch to only CPU Soft NMS implementations.
The input can be either a torch tensor or numpy array.
The returned type will always be the same as inputs.
Arguments:
boxes (torch.Tensor or np.ndarray): boxes in shape (N, 4).
scores (torch.Tensor or np.ndarray): scores in shape (N, ).
iou_threshold (float): IoU threshold for NMS.
sigma (float): hyperparameter for gaussian method
min_score (float): score filter threshold
method (str): either 'linear' or 'gaussian'
offset (int, 0 or 1): boxes' width or height is (x2 - x1 + offset).
Returns:
tuple: kept dets(boxes and scores) and indice, which is always the \
same data type as the input.
Example:
>>> boxes = np.array([[4., 3., 5., 3.],
>>> [4., 3., 5., 4.],
>>> [3., 1., 3., 1.],
>>> [3., 1., 3., 1.],
>>> [3., 1., 3., 1.],
>>> [3., 1., 3., 1.]], dtype=np.float32)
>>> scores = np.array([0.9, 0.9, 0.5, 0.5, 0.4, 0.0], dtype=np.float32)
>>> iou_threshold = 0.6
>>> dets, inds = soft_nms(boxes, scores, iou_threshold, sigma=0.5)
>>> assert len(inds) == len(dets) == 5
"""
assert isinstance(boxes, (torch.Tensor, np.ndarray))
assert isinstance(scores, (torch.Tensor, np.ndarray))
is_numpy = False
if isinstance(boxes, np.ndarray):
is_numpy = True
boxes = torch.from_numpy(boxes)
if isinstance(scores, np.ndarray):
scores = torch.from_numpy(scores)
assert boxes.size(1) == 4
assert boxes.size(0) == scores.size(0)
assert offset in (0, 1)
method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2}
assert method in method_dict.keys()
if torch.__version__ == 'parrots':
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1 + offset) * (y2 - y1 + offset)
indata_list = [boxes.cpu(), scores.cpu(), areas.cpu()]
indata_dict = {
'iou_threshold': float(iou_threshold),
'sigma': float(sigma),
'min_score': min_score,
'method': method_dict[method],
'offset': int(offset)
}
dets, inds, num_out = ext_module.softnms(*indata_list, **indata_dict)
inds = inds[:num_out]
else:
dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
inds = ext_module.softnms(
boxes.cpu(),
scores.cpu(),
dets.cpu(),
iou_threshold=float(iou_threshold),
sigma=float(sigma),
min_score=float(min_score),
method=method_dict[method],
offset=int(offset))
dets = dets[:inds.size(0)]
if is_numpy:
dets = dets.cpu().numpy()
inds = inds.cpu().numpy()
return dets, inds
else:
return dets.to(device=boxes.device), inds.to(device=boxes.device)
def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False):
"""Performs non-maximum suppression in a batched fashion.
Modified from https://github.com/pytorch/vision/blob
/505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39.
In order to perform NMS independently per class, we add an offset to all
the boxes. The offset is dependent only on the class idx, and is large
enough so that boxes from different classes do not overlap.
Arguments:
boxes (torch.Tensor): boxes in shape (N, 4).
scores (torch.Tensor): scores in shape (N, ).
idxs (torch.Tensor): each index value correspond to a bbox cluster,
and NMS will not be applied between elements of different idxs,
shape (N, ).
nms_cfg (dict): specify nms type and other parameters like iou_thr.
class_agnostic (bool): if true, nms is class agnostic,
i.e. IoU thresholding happens over all boxes,
regardless of the predicted class
Returns:
tuple: kept dets and indice.
"""
nms_cfg_ = nms_cfg.copy()
class_agnostic = nms_cfg_.pop('class_agnostic', class_agnostic)
if class_agnostic:
boxes_for_nms = boxes
else:
max_coordinate = boxes.max()
offsets = idxs.to(boxes) * (max_coordinate + 1)
boxes_for_nms = boxes + offsets[:, None]
nms_type = nms_cfg_.pop('type', 'nms')
nms_op = eval(nms_type)
dets, keep = nms_op(boxes_for_nms, scores, **nms_cfg_)
boxes = boxes[keep]
scores = dets[:, -1]
return torch.cat([boxes, scores[:, None]], -1), keep
def nms_match(dets, iou_threshold):
"""Matched dets into different groups by NMS.
NMS match is Similar to NMS but when a bbox is suppressed, nms match will
record the indice of supporessed bbox and form a group with the indice of
kept bbox. In each group, indice is sorted as score order.
Arguments:
dets (torch.Tensor | np.ndarray): Det boxes with scores, shape (N, 5).
iou_thr (float): IoU thresh for NMS.
Returns:
List[Tensor | ndarray]: The outer list corresponds different matched
group, the inner Tensor corresponds the indices for a group in
score order.
"""
if dets.shape[0] == 0:
matched = []
else:
assert dets.shape[-1] == 5, 'inputs dets.shape should be (N, 5), ' \
f'but get {dets.shape}'
if isinstance(dets, torch.Tensor):
dets_t = dets.detach().cpu()
else:
dets_t = torch.from_numpy(dets)
matched = ext_module.nms_match(dets_t, float(iou_threshold))
if isinstance(dets, torch.Tensor):
return [dets.new_tensor(m, dtype=torch.long) for m in matched]
else:
return [np.array(m, dtype=np.int) for m in matched]
from ..cnn import ConvModule, NonLocal2d
from .context_block import ContextBlock
from .generalized_attention import GeneralizedAttention
plugin_cfg = {
# format: layer_type: (abbreviation, module)
'ContextBlock': ('context_block', ContextBlock),
'GeneralizedAttention': ('gen_attention_block', GeneralizedAttention),
'NonLocal2d': ('nonlocal_block', NonLocal2d),
'ConvModule': ('conv_block', ConvModule),
}
def build_plugin_layer(cfg, postfix='', **kwargs):
"""Build plugin layer.
Args:
cfg (None or dict): cfg should contain:
type (str): identify plugin layer type.
layer args: args needed to instantiate a plugin layer.
postfix (int, str): appended into norm abbreviation to
create named layer.
Returns:
name (str): abbreviation + postfix
layer (nn.Module): created plugin layer
"""
assert isinstance(cfg, dict) and 'type' in cfg
cfg_ = cfg.copy()
layer_type = cfg_.pop('type')
if layer_type not in plugin_cfg:
raise KeyError(f'Unrecognized plugin type {layer_type}')
else:
abbr, plugin_layer = plugin_cfg[layer_type]
assert isinstance(postfix, (int, str))
name = abbr + str(postfix)
layer = plugin_layer(**kwargs, **cfg_)
return name, layer
# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.utils import _pair
def normalize(grid):
"""Normalize input grid from [-1, 1] to [0, 1]
Args:
grid (Tensor): The grid to be normalize, range [-1, 1].
Returns:
Tensor: Normalized grid, range [0, 1].
"""
return (grid + 1.0) / 2.0
def denormalize(grid):
"""Denormalize input grid from range [0, 1] to [-1, 1]
Args:
grid (Tensor): The grid to be denormalize, range [0, 1].
Returns:
Tensor: Denormalized grid, range [-1, 1].
"""
return grid * 2.0 - 1.0
def generate_grid(num_grid, size, device):
"""Generate regular square grid of points in [0, 1] x [0, 1] coordinate
space.
Args:
num_grid (int): The number of grids to sample, one for each region.
size (tuple(int, int)): The side size of the regular grid.
device (torch.device): Desired device of returned tensor.
Returns:
(torch.Tensor): A tensor of shape (num_grid, size[0]*size[1], 2) that
contains coordinates for the regular grids.
"""
affine_trans = torch.tensor([[[1., 0., 0.], [0., 1., 0.]]], device=device)
grid = F.affine_grid(
affine_trans, torch.Size((1, 1, *size)), align_corners=False)
grid = normalize(grid)
return grid.view(1, -1, 2).expand(num_grid, -1, -1)
def rel_roi_point_to_abs_img_point(rois, rel_roi_points):
"""Convert roi based relative point coordinates to image based absolute
point coordinates.
Args:
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
rel_roi_points (Tensor): Point coordinates inside RoI, relative to
RoI, location, range (0, 1), shape (N, P, 2)
Returns:
Tensor: Image based absolute point coordinates, shape (N, P, 2)
"""
with torch.no_grad():
assert rel_roi_points.size(0) == rois.size(0)
assert rois.dim() == 2
assert rel_roi_points.dim() == 3
assert rel_roi_points.size(2) == 2
# remove batch idx
if rois.size(1) == 5:
rois = rois[:, 1:]
abs_img_points = rel_roi_points.clone()
abs_img_points[:, :, 0] = abs_img_points[:, :, 0] * (
rois[:, None, 2] - rois[:, None, 0])
abs_img_points[:, :, 1] = abs_img_points[:, :, 1] * (
rois[:, None, 3] - rois[:, None, 1])
abs_img_points[:, :, 0] += rois[:, None, 0]
abs_img_points[:, :, 1] += rois[:, None, 1]
return abs_img_points
def abs_img_point_to_rel_img_point(abs_img_points,
img_shape,
spatial_scale=1.):
"""Convert image based absolute point coordinates to image based relative
coordinates for sampling.
Args:
abs_img_points (Tensor): Image based absolute point coordinates,
shape (N, P, 2)
img_shape (tuple): (height, width) of image or feature map.
spatial_scale (float): Scale points by this factor. Default: 1.
Returns:
Tensor: Image based relative point coordinates for sampling,
shape (N, P, 2)
"""
assert isinstance(img_shape, tuple) and len(img_shape) == 2
h, w = img_shape
scale = torch.tensor([w, h],
dtype=torch.float,
device=abs_img_points.device)
scale = scale.view(1, 1, 2)
rel_img_points = abs_img_points / scale * spatial_scale
return rel_img_points
def rel_roi_point_to_rel_img_point(rois,
rel_roi_points,
img_shape,
spatial_scale=1.):
"""Convert roi based relative point coordinates to image based absolute
point coordinates.
Args:
rois (Tensor): RoIs or BBoxes, shape (N, 4) or (N, 5)
rel_roi_points (Tensor): Point coordinates inside RoI, relative to
RoI, location, range (0, 1), shape (N, P, 2)
img_shape (tuple): (height, width) of image or feature map.
spatial_scale (float): Scale points by this factor. Default: 1.
Returns:
Tensor: Image based relative point coordinates for sampling,
shape (N, P, 2)
"""
abs_img_point = rel_roi_point_to_abs_img_point(rois, rel_roi_points)
rel_img_point = abs_img_point_to_rel_img_point(abs_img_point, img_shape,
spatial_scale)
return rel_img_point
def point_sample(input, points, align_corners=False, **kwargs):
"""A wrapper around :function:`grid_sample` to support 3D point_coords
tensors Unlike :function:`torch.nn.functional.grid_sample` it assumes
point_coords to lie inside [0, 1] x [0, 1] square.
Args:
input (Tensor): Feature map, shape (N, C, H, W).
points (Tensor): Image based absolute point coordinates (normalized),
range [0, 1] x [0, 1], shape (N, P, 2) or (N, Hgrid, Wgrid, 2).
align_corners (bool): Whether align_corners. Default: False
Returns:
Tensor: Features of `point` on `input`, shape (N, C, P) or
(N, C, Hgrid, Wgrid).
"""
add_dim = False
if points.dim() == 3:
add_dim = True
points = points.unsqueeze(2)
output = F.grid_sample(
input, denormalize(points), align_corners=align_corners, **kwargs)
if add_dim:
output = output.squeeze(3)
return output
class SimpleRoIAlign(nn.Module):
def __init__(self, out_size, spatial_scale, aligned=True):
"""Simple RoI align in PointRend, faster than standard RoIAlign.
Args:
out_size (tuple[int]): h, w
spatial_scale (float): scale the input boxes by this number
aligned (bool): if False, use the legacy implementation in
MMDetection, align_corners=True will be used in F.grid_sample.
If True, align the results more perfectly.
"""
super(SimpleRoIAlign, self).__init__()
self.out_size = _pair(out_size)
self.spatial_scale = float(spatial_scale)
# to be consistent with other RoI ops
self.use_torchvision = False
self.aligned = aligned
def forward(self, features, rois):
num_imgs = features.size(0)
num_rois = rois.size(0)
rel_roi_points = generate_grid(
num_rois, self.out_size, device=rois.device)
point_feats = []
for batch_ind in range(num_imgs):
# unravel batch dim
feat = features[batch_ind].unsqueeze(0)
inds = (rois[:, 0].long() == batch_ind)
if inds.any():
rel_img_points = rel_roi_point_to_rel_img_point(
rois[inds], rel_roi_points[inds], feat.shape[2:],
self.spatial_scale).unsqueeze(0)
point_feat = point_sample(
feat, rel_img_points, align_corners=not self.aligned)
point_feat = point_feat.squeeze(0).transpose(0, 1)
point_feats.append(point_feat)
channels = features.size(1)
roi_feats = torch.cat(point_feats, dim=0)
roi_feats = roi_feats.reshape(num_rois, channels, *self.out_size)
return roi_feats
def __repr__(self):
format_str = self.__class__.__name__
format_str += '(out_size={}, spatial_scale={}'.format(
self.out_size, self.spatial_scale)
return format_str
# Modified from https://github.com/hszhao/semseg/blob/master/lib/psa
from torch import nn
from torch.autograd import Function
from torch.nn.modules.utils import _pair
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext',
['psamask_forward', 'psamask_backward'])
class PSAMaskFunction(Function):
@staticmethod
def symbolic(g, input, psa_type, mask_size):
return g.op(
'MMCVPSAMask', input, psa_type=psa_type, mask_size=mask_size)
@staticmethod
def forward(ctx, input, psa_type, mask_size):
ctx.psa_type = psa_type
ctx.mask_size = _pair(mask_size)
ctx.save_for_backward(input)
h_mask, w_mask = ctx.mask_size
batch_size, channels, h_feature, w_feature = input.size()
assert channels == h_mask * w_mask
output = input.new_zeros(
(batch_size, h_feature * w_feature, h_feature, w_feature))
ext_module.psamask_forward(
input,
output,
psa_type=psa_type,
num_=batch_size,
h_feature=h_feature,
w_feature=w_feature,
h_mask=h_mask,
w_mask=w_mask,
half_h_mask=(h_mask - 1) // 2,
half_w_mask=(w_mask - 1) // 2)
return output
@staticmethod
def backward(ctx, grad_output):
input = ctx.saved_tensors[0]
psa_type = ctx.psa_type
h_mask, w_mask = ctx.mask_size
batch_size, channels, h_feature, w_feature = input.size()
grad_input = grad_output.new_zeros(
(batch_size, channels, h_feature, w_feature))
ext_module.psamask_backward(
grad_output,
grad_input,
psa_type=psa_type,
num_=batch_size,
h_feature=h_feature,
w_feature=w_feature,
h_mask=h_mask,
w_mask=w_mask,
half_h_mask=(h_mask - 1) // 2,
half_w_mask=(w_mask - 1) // 2)
return grad_input, None, None, None
psa_mask = PSAMaskFunction.apply
class PSAMask(nn.Module):
def __init__(self, psa_type, mask_size=None):
super(PSAMask, self).__init__()
assert psa_type in ['collect', 'distribute']
if psa_type == 'collect':
psa_type_enum = 0
else:
psa_type_enum = 1
self.psa_type_enum = psa_type_enum
self.mask_size = mask_size
self.psa_type = psa_type
def forward(self, input):
return psa_mask(input, self.psa_type_enum, self.mask_size)
def __repr__(self):
s = self.__class__.__name__
s += f'(psa_type={self.psa_type}, '
s += f'mask_size={self.mask_size})'
return s
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext',
['roi_align_forward', 'roi_align_backward'])
class RoIAlignFunction(Function):
@staticmethod
def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio,
pool_mode, aligned):
return g.op(
'MMCVRoIAlign',
input,
rois,
aligned_height=output_size[0],
aligned_weight=output_size[1],
spatial_scale=spatial_scale,
sampling_ratio=sampling_ratio,
pool_mode=pool_mode,
aligned=aligned)
@staticmethod
def forward(ctx,
input,
rois,
output_size,
spatial_scale=1.0,
sampling_ratio=0,
pool_mode='avg',
aligned=True):
ctx.output_size = _pair(output_size)
ctx.spatial_scale = spatial_scale
ctx.sampling_ratio = sampling_ratio
assert pool_mode in ('max', 'avg')
ctx.pool_mode = 0 if pool_mode == 'max' else 1
ctx.aligned = aligned
ctx.input_shape = input.size()
assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
ctx.output_size[1])
output = input.new_zeros(output_shape)
if ctx.pool_mode == 0:
argmax_y = input.new_zeros(output_shape)
argmax_x = input.new_zeros(output_shape)
else:
argmax_y = input.new_zeros(0)
argmax_x = input.new_zeros(0)
ext_module.roi_align_forward(
input,
rois,
output,
argmax_y,
argmax_x,
aligned_height=ctx.output_size[0],
aligned_width=ctx.output_size[1],
spatial_scale=ctx.spatial_scale,
sampling_ratio=ctx.sampling_ratio,
pool_mode=ctx.pool_mode,
aligned=ctx.aligned)
ctx.save_for_backward(rois, argmax_y, argmax_x)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
rois, argmax_y, argmax_x = ctx.saved_tensors
grad_input = grad_output.new_zeros(ctx.input_shape)
ext_module.roi_align_backward(
grad_output,
rois,
argmax_y,
argmax_x,
grad_input,
aligned_height=ctx.output_size[0],
aligned_width=ctx.output_size[1],
spatial_scale=ctx.spatial_scale,
sampling_ratio=ctx.sampling_ratio,
pool_mode=ctx.pool_mode,
aligned=ctx.aligned)
return grad_input, None, None, None, None, None, None
roi_align = RoIAlignFunction.apply
class RoIAlign(nn.Module):
"""RoI align pooling layer.
Args:
output_size (tuple): h, w
spatial_scale (float): scale the input boxes by this number
sampling_ratio (int): number of inputs samples to take for each
output sample. 0 to take samples densely for current models.
pool_mode (str, 'avg' or 'max'): pooling mode in each bin.
aligned (bool): if False, use the legacy implementation in
MMDetection. If True, align the results more perfectly.
Note:
The implementation of RoIAlign when aligned=True is modified from
https://github.com/facebookresearch/detectron2/
The meaning of aligned=True:
Given a continuous coordinate c, its two neighboring pixel
indices (in our pixel model) are computed by floor(c - 0.5) and
ceil(c - 0.5). For example, c=1.3 has pixel neighbors with discrete
indices [0] and [1] (which are sampled from the underlying signal
at continuous coordinates 0.5 and 1.5). But the original roi_align
(aligned=False) does not subtract the 0.5 when computing
neighboring pixel indices and therefore it uses pixels with a
slightly incorrect alignment (relative to our pixel model) when
performing bilinear interpolation.
With `aligned=True`,
we first appropriately scale the ROI and then shift it by -0.5
prior to calling roi_align. This produces the correct neighbors;
The difference does not make a difference to the model's
performance if ROIAlign is used together with conv layers.
"""
def __init__(self,
output_size,
spatial_scale=1.0,
sampling_ratio=0,
pool_mode='avg',
aligned=True):
super(RoIAlign, self).__init__()
self.output_size = _pair(output_size)
self.spatial_scale = float(spatial_scale)
self.sampling_ratio = int(sampling_ratio)
self.pool_mode = pool_mode
self.aligned = aligned
def forward(self, input, rois):
"""
Args:
input: NCHW images
rois: Bx5 boxes. First column is the index into N.\
The other 4 columns are xyxy.
"""
return roi_align(input, rois, self.output_size, self.spatial_scale,
self.sampling_ratio, self.pool_mode, self.aligned)
def __repr__(self):
s = self.__class__.__name__
s += f'(output_size={self.output_size}, '
s += f'spatial_scale={self.spatial_scale}, '
s += f'sampling_ratio={self.sampling_ratio}, '
s += f'pool_mode={self.pool_mode}, '
s += f'aligned={self.aligned})'
return s
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext',
['roi_pool_forward', 'roi_pool_backward'])
class RoIPoolFunction(Function):
@staticmethod
def symbolic(g, input, rois, output_size, spatial_scale):
return g.op(
'MMCVRoIPool',
input,
rois,
pooled_height=output_size[0],
pooled_width=output_size[1],
spatial_scale=spatial_scale)
@staticmethod
def forward(ctx, input, rois, output_size, spatial_scale=1.0):
ctx.output_size = _pair(output_size)
ctx.spatial_scale = spatial_scale
ctx.input_shape = input.size()
assert rois.size(1) == 5, 'RoI must be (idx, x1, y1, x2, y2)!'
output_shape = (rois.size(0), input.size(1), ctx.output_size[0],
ctx.output_size[1])
output = input.new_zeros(output_shape)
argmax = input.new_zeros(output_shape, dtype=torch.int)
ext_module.roi_pool_forward(
input,
rois,
output,
argmax,
pooled_height=ctx.output_size[0],
pooled_width=ctx.output_size[1],
spatial_scale=ctx.spatial_scale)
ctx.save_for_backward(rois, argmax)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
rois, argmax = ctx.saved_tensors
grad_input = grad_output.new_zeros(ctx.input_shape)
ext_module.roi_pool_backward(
grad_output,
rois,
argmax,
grad_input,
pooled_height=ctx.output_size[0],
pooled_width=ctx.output_size[1],
spatial_scale=ctx.spatial_scale)
return grad_input, None, None, None
roi_pool = RoIPoolFunction.apply
class RoIPool(nn.Module):
def __init__(self, output_size, spatial_scale=1.0):
super(RoIPool, self).__init__()
self.output_size = _pair(output_size)
self.spatial_scale = float(spatial_scale)
def forward(self, input, rois):
return roi_pool(input, rois, self.output_size, self.spatial_scale)
def __repr__(self):
s = self.__class__.__name__
s += f'(output_size={self.output_size}, '
s += f'spatial_scale={self.spatial_scale})'
return s
import torch
import torch.distributed as dist
import torch.nn.functional as F
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.module import Module
from torch.nn.parameter import Parameter
from ..utils import ext_loader
ext_module = ext_loader.load_ext('_ext', [
'sync_bn_forward_mean', 'sync_bn_forward_var', 'sync_bn_forward_output',
'sync_bn_backward_param', 'sync_bn_backward_data'
])
class SyncBatchNormFunction(Function):
@staticmethod
def symbolic(g, input, running_mean, running_var, weight, bias, momentum,
eps, group, group_size):
return g.op(
'MMCVSyncBatchNorm',
input,
running_mean,
running_var,
weight,
bias,
momentum=momentum,
eps=eps,
group=group,
group_size=group_size)
@staticmethod
def forward(self, input, running_mean, running_var, weight, bias, momentum,
eps, group, group_size):
self.momentum = momentum
self.eps = eps
self.group = group
self.group_size = group_size
assert isinstance(
input, (torch.HalfTensor, torch.FloatTensor,
torch.cuda.HalfTensor, torch.cuda.FloatTensor)), \
f'only support Half or Float Tensor, but {input.type()}'
output = torch.empty_like(input)
input3d = input.view(input.size(0), input.size(1), -1)
output3d = output.view_as(input3d)
mean = torch.empty(
input3d.size(1), dtype=torch.float, device=input3d.device)
var = torch.empty(
input3d.size(1), dtype=torch.float, device=input3d.device)
if input3d.requires_grad or weight.requires_grad or bias.requires_grad:
norm = torch.empty_like(
input3d, dtype=torch.float, device=input3d.device)
std = torch.empty(
input3d.size(1), dtype=torch.float, device=input3d.device)
else:
norm = torch.empty(0, dtype=torch.float, device=input3d.device)
std = torch.empty(0, dtype=torch.float, device=input3d.device)
ext_module.sync_bn_forward_mean(input3d, mean)
if self.group_size > 1:
dist.all_reduce(mean, group=self.group)
mean /= self.group_size
ext_module.sync_bn_forward_var(input3d, mean, var)
if self.group_size > 1:
dist.all_reduce(var, group=self.group)
var /= self.group_size
ext_module.sync_bn_forward_output(
input3d,
mean,
var,
running_mean,
running_var,
weight,
bias,
norm,
std,
output3d,
eps=self.eps,
momentum=self.momentum,
group_size=self.group_size)
self.save_for_backward(norm, std, weight)
return output
@staticmethod
@once_differentiable
def backward(self, grad_output):
norm, std, weight = self.saved_tensors
grad_weight = torch.empty_like(weight)
grad_bias = torch.empty_like(weight)
grad_input = torch.empty_like(grad_output)
grad_output3d = grad_output.view(
grad_output.size(0), grad_output.size(1), -1)
grad_input3d = grad_input.view_as(grad_output3d)
ext_module.sync_bn_backward_param(grad_output3d, norm, grad_weight,
grad_bias)
# all reduce
if self.group_size > 1:
dist.all_reduce(grad_weight, group=self.group)
dist.all_reduce(grad_bias, group=self.group)
grad_weight /= self.group_size
grad_bias /= self.group_size
ext_module.sync_bn_backward_data(grad_output3d, weight, grad_weight,
grad_bias, norm, std, grad_input3d)
return grad_input, None, None, grad_weight, grad_bias, \
None, None, None, None
class SyncBatchNorm(Module):
def __init__(self,
num_features,
eps=1e-5,
momentum=0.1,
affine=True,
track_running_stats=True,
group=dist.group.WORLD):
super(SyncBatchNorm, self).__init__()
self.num_features = num_features
self.eps = eps
self.momentum = momentum
self.affine = affine
self.track_running_stats = track_running_stats
self.group = group
self.group_size = dist.get_world_size(group)
if self.affine:
self.weight = Parameter(torch.Tensor(num_features))
self.bias = Parameter(torch.Tensor(num_features))
else:
self.register_parameter('weight', None)
self.register_parameter('bias', None)
if self.track_running_stats:
self.register_buffer('running_mean', torch.zeros(num_features))
self.register_buffer('running_var', torch.ones(num_features))
self.register_buffer('num_batches_tracked',
torch.tensor(0, dtype=torch.long))
else:
self.register_buffer('running_mean', None)
self.register_buffer('running_var', None)
self.register_buffer('num_batches_tracked', None)
self.reset_parameters()
def reset_running_stats(self):
if self.track_running_stats:
self.running_mean.zero_()
self.running_var.fill_(1)
self.num_batches_tracked.zero_()
def reset_parameters(self):
self.reset_running_stats()
if self.affine:
self.weight.data.uniform_() # pytorch use ones_()
self.bias.data.zero_()
def forward(self, input):
if input.dim() < 2:
raise ValueError(
f'expected at least 2D input, got {input.dim()}D input')
if self.momentum is None:
exponential_average_factor = 0.0
else:
exponential_average_factor = self.momentum
if self.training and self.track_running_stats:
if self.num_batches_tracked is not None:
self.num_batches_tracked += 1
if self.momentum is None: # use cumulative moving average
exponential_average_factor = 1.0 / float(
self.num_batches_tracked)
else: # use exponential moving average
exponential_average_factor = self.momentum
if self.training or not self.track_running_stats:
return SyncBatchNormFunction.apply(input, self.running_mean,
self.running_var, self.weight,
self.bias,
exponential_average_factor,
self.eps, self.group,
self.group_size)
else:
return F.batch_norm(input, self.running_mean, self.running_var,
self.weight, self.bias, False,
exponential_average_factor, self.eps)
def __repr__(self):
s = self.__class__.__name__
s += f'({self.num_features}, '
s += f'eps={self.eps}, '
s += f'momentum={self.momentum}, '
s += f'affine={self.affine}, '
s += f'track_running_stats={self.track_running_stats}, '
s += f'group_size={self.group_size})'
return s
"""Modified from https://github.com/facebookresearch/detectron2/blob/master.
/detectron2/layers/wrappers.py Wrap some nn modules to support empty
tensor input. Currently, these wrappers are mainly used in mask heads
like fcn_mask_head and maskiou_heads since mask heads are trained on
only positive RoIs.
"""
import math
import torch
import torch.nn as nn
from torch.nn.modules.utils import _pair
from ..cnn import CONV_LAYERS
class NewEmptyTensorOp(torch.autograd.Function):
@staticmethod
def forward(ctx, x, new_shape):
ctx.shape = x.shape
return x.new_empty(new_shape)
@staticmethod
def backward(ctx, grad):
shape = ctx.shape
return NewEmptyTensorOp.apply(grad, shape), None
@CONV_LAYERS.register_module('Conv', force=True)
class Conv2d(nn.Conv2d):
def forward(self, x):
if x.numel() == 0 and torch.__version__ <= '1.4':
out_shape = [x.shape[0], self.out_channels]
for i, k, p, s, d in zip(x.shape[-2:], self.kernel_size,
self.padding, self.stride, self.dilation):
o = (i + 2 * p - (d * (k - 1) + 1)) // s + 1
out_shape.append(o)
empty = NewEmptyTensorOp.apply(x, out_shape)
if self.training:
# produce dummy gradient to avoid DDP warning.
dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
return empty + dummy
else:
return empty
return super().forward(x)
class ConvTranspose2d(nn.ConvTranspose2d):
def forward(self, x):
if x.numel() == 0 and torch.__version__ <= '1.4.0':
out_shape = [x.shape[0], self.out_channels]
for i, k, p, s, d, op in zip(x.shape[-2:], self.kernel_size,
self.padding, self.stride,
self.dilation, self.output_padding):
out_shape.append((i - 1) * s - 2 * p + (d * (k - 1) + 1) + op)
empty = NewEmptyTensorOp.apply(x, out_shape)
if self.training:
# produce dummy gradient to avoid DDP warning.
dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
return empty + dummy
else:
return empty
return super(ConvTranspose2d, self).forward(x)
class MaxPool2d(nn.MaxPool2d):
def forward(self, x):
if x.numel() == 0 and torch.__version__ <= '1.4':
out_shape = list(x.shape[:2])
for i, k, p, s, d in zip(x.shape[-2:], _pair(self.kernel_size),
_pair(self.padding), _pair(self.stride),
_pair(self.dilation)):
o = (i + 2 * p - (d * (k - 1) + 1)) / s + 1
o = math.ceil(o) if self.ceil_mode else math.floor(o)
out_shape.append(o)
empty = NewEmptyTensorOp.apply(x, out_shape)
return empty
return super().forward(x)
class Linear(torch.nn.Linear):
def forward(self, x):
if x.numel() == 0:
out_shape = [x.shape[0], self.out_features]
empty = NewEmptyTensorOp.apply(x, out_shape)
if self.training:
# produce dummy gradient to avoid DDP warning.
dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
return empty + dummy
else:
return empty
return super().forward(x)
import importlib
import os
from collections import namedtuple
import torch
if torch.__version__ != 'parrots':
def load_ext(name, funcs):
ext = importlib.import_module('mmcv.' + name)
for fun in funcs:
assert hasattr(ext, fun), f'{fun} miss in module {name}'
return ext
else:
from parrots import extension
def load_ext(name, funcs):
ExtModule = namedtuple('ExtModule', funcs)
ext_list = []
lib_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
for fun in funcs:
if fun in ['nms', 'softnms']:
ext_list.append(extension.load(fun, name, lib_dir=lib_root).op)
else:
ext_list.append(
extension.load(fun, name, lib_dir=lib_root).op_)
return ExtModule(*ext_list)
# Copyright (c) Open-MMLab. All rights reserved. # Copyright (c) Open-MMLab. All rights reserved.
__version__ = '0.6.2' __version__ = '1.0rc0'
# Copyright (c) Open-MMLab. All rights reserved. # Copyright (c) Open-MMLab. All rights reserved.
import numpy as np import numpy as np
from mmcv._ext import flow_warp_c from mmcv._flow_warp_ext import flow_warp_c
from mmcv.arraymisc import dequantize, quantize from mmcv.arraymisc import dequantize, quantize
from mmcv.image import imread, imwrite from mmcv.image import imread, imwrite
from mmcv.utils import is_str from mmcv.utils import is_str
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment