Unverified Commit 66fa7039 authored by Yizhou Wang's avatar Yizhou Wang Committed by GitHub
Browse files

Merge pull request #37 from yizhou-wang/v1.1-mnet-tdc

V1.1 add mnet tdc modules
parents 7c3fd6f9 d0140132
from .deform_conv_2d import DeformConv2D, DeformConvPack2D
from .deform_conv_2d import ModulatedDeformConv2D, ModulatedDeformConvPack2D
from .deform_pool_2d import DeformRoIPooling2D, DeformRoIPoolingPack2D
from .deform_pool_2d import ModulatedDeformRoIPoolingPack2D
from .deform_conv_3d import DeformConv3D, DeformConvPack3D
from .deform_conv_3d import ModulatedDeformConv3D, ModulatedDeformConvPack3D
# from .deform_pool_3d import DeformRoIPooling3D, DeformRoIPoolingPack3D
# from .deform_pool_3d import ModulatedDeformRoIPoolingPack3D
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair, _single
# from mmdet.utils import print_log
from . import deform_conv_2d_cuda
class DeformConvFunction2D(Function):
@staticmethod
def forward(ctx,
input,
offset,
weight,
stride=1,
padding=0,
dilation=1,
groups=1,
deformable_groups=1,
im2col_step=64):
if input is not None and input.dim() != 4:
raise ValueError(
'Expected 4D tensor as input, got {}D tensor instead.'.format(
input.dim()))
ctx.stride = _pair(stride)
ctx.padding = _pair(padding)
ctx.dilation = _pair(dilation)
ctx.groups = groups
ctx.deformable_groups = deformable_groups
ctx.im2col_step = im2col_step
ctx.save_for_backward(input, offset, weight)
output = input.new_empty(
DeformConvFunction2D._output_size(input, weight, ctx.padding,
ctx.dilation, ctx.stride))
ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones
if not input.is_cuda:
raise NotImplementedError
else:
cur_im2col_step = min(ctx.im2col_step, input.shape[0])
assert (input.shape[0] %
cur_im2col_step) == 0, 'im2col step must divide batchsize'
deform_conv_2d_cuda.deform_conv_forward_cuda(
input, weight, offset, output, ctx.bufs_[0], ctx.bufs_[1],
weight.size(3), weight.size(2), ctx.stride[1], ctx.stride[0],
ctx.padding[1], ctx.padding[0], ctx.dilation[1],
ctx.dilation[0], ctx.groups, ctx.deformable_groups,
cur_im2col_step)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, offset, weight = ctx.saved_tensors
grad_input = grad_offset = grad_weight = None
if not grad_output.is_cuda:
raise NotImplementedError
else:
cur_im2col_step = min(ctx.im2col_step, input.shape[0])
assert (input.shape[0] %
cur_im2col_step) == 0, 'im2col step must divide batchsize'
if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
grad_input = torch.zeros_like(input)
grad_offset = torch.zeros_like(offset)
deform_conv_2d_cuda.deform_conv_backward_input_cuda(
input, offset, grad_output, grad_input,
grad_offset, weight, ctx.bufs_[0], weight.size(3),
weight.size(2), ctx.stride[1], ctx.stride[0],
ctx.padding[1], ctx.padding[0], ctx.dilation[1],
ctx.dilation[0], ctx.groups, ctx.deformable_groups,
cur_im2col_step)
if ctx.needs_input_grad[2]:
grad_weight = torch.zeros_like(weight)
deform_conv_2d_cuda.deform_conv_backward_parameters_cuda(
input, offset, grad_output,
grad_weight, ctx.bufs_[0], ctx.bufs_[1], weight.size(3),
weight.size(2), ctx.stride[1], ctx.stride[0],
ctx.padding[1], ctx.padding[0], ctx.dilation[1],
ctx.dilation[0], ctx.groups, ctx.deformable_groups, 1,
cur_im2col_step)
return (grad_input, grad_offset, grad_weight, None, None, None, None,
None)
@staticmethod
def _output_size(input, weight, padding, dilation, stride):
channels = weight.size(0)
output_size = (input.size(0), channels)
for d in range(input.dim() - 2):
in_size = input.size(d + 2)
pad = padding[d]
kernel = dilation[d] * (weight.size(d + 2) - 1) + 1
stride_ = stride[d]
output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,)
if not all(map(lambda s: s > 0, output_size)):
raise ValueError(
'convolution input is too small (output would be {})'.format(
'x'.join(map(str, output_size))))
return output_size
class ModulatedDeformConvFunction2D(Function):
@staticmethod
def forward(ctx,
input,
offset,
mask,
weight,
bias=None,
stride=1,
padding=0,
dilation=1,
groups=1,
deformable_groups=1):
ctx.stride = stride
ctx.padding = padding
ctx.dilation = dilation
ctx.groups = groups
ctx.deformable_groups = deformable_groups
ctx.with_bias = bias is not None
if not ctx.with_bias:
bias = input.new_empty(1) # fake tensor
if not input.is_cuda:
raise NotImplementedError
if weight.requires_grad or mask.requires_grad or offset.requires_grad \
or input.requires_grad:
ctx.save_for_backward(input, offset, mask, weight, bias)
output = input.new_empty(
ModulatedDeformConvFunction2D._infer_shape(ctx, input, weight))
ctx._bufs = [input.new_empty(0), input.new_empty(0)]
deform_conv_2d_cuda.modulated_deform_conv_cuda_forward(
input, weight, bias, ctx._bufs[0], offset, mask, output,
ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride,
ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
ctx.groups, ctx.deformable_groups, ctx.with_bias)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
if not grad_output.is_cuda:
raise NotImplementedError
input, offset, mask, weight, bias = ctx.saved_tensors
grad_input = torch.zeros_like(input)
grad_offset = torch.zeros_like(offset)
grad_mask = torch.zeros_like(mask)
grad_weight = torch.zeros_like(weight)
grad_bias = torch.zeros_like(bias)
deform_conv_2d_cuda.modulated_deform_conv_cuda_backward(
input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1],
grad_input, grad_weight, grad_bias, grad_offset, grad_mask,
grad_output, weight.shape[2], weight.shape[3], ctx.stride,
ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
ctx.groups, ctx.deformable_groups, ctx.with_bias)
if not ctx.with_bias:
grad_bias = None
return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias,
None, None, None, None, None)
@staticmethod
def _infer_shape(ctx, input, weight):
n = input.size(0)
channels_out = weight.size(0)
height, width = input.shape[2:4]
kernel_h, kernel_w = weight.shape[2:4]
height_out = (height + 2 * ctx.padding -
(ctx.dilation * (kernel_h - 1) + 1)) // ctx.stride + 1
width_out = (width + 2 * ctx.padding -
(ctx.dilation * (kernel_w - 1) + 1)) // ctx.stride + 1
return n, channels_out, height_out, width_out
deform_conv = DeformConvFunction2D.apply
modulated_deform_conv = ModulatedDeformConvFunction2D.apply
class DeformConv2D(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
deformable_groups=1,
bias=False):
super(DeformConv2D, self).__init__()
assert not bias
assert in_channels % groups == 0, \
'in_channels {} cannot be divisible by groups {}'.format(
in_channels, groups)
assert out_channels % groups == 0, \
'out_channels {} cannot be divisible by groups {}'.format(
out_channels, groups)
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.stride = _pair(stride)
self.padding = _pair(padding)
self.dilation = _pair(dilation)
self.groups = groups
self.deformable_groups = deformable_groups
# enable compatibility with nn.Conv2d
self.transposed = False
self.output_padding = _single(0)
self.weight = nn.Parameter(
torch.Tensor(out_channels, in_channels // self.groups,
*self.kernel_size))
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
def forward(self, x, offset):
# To fix an assert error in deform_conv_cuda.cpp:128
# input image is smaller than kernel
input_pad = (
x.size(2) < self.kernel_size[0] or x.size(3) < self.kernel_size[1])
if input_pad:
pad_h = max(self.kernel_size[0] - x.size(2), 0)
pad_w = max(self.kernel_size[1] - x.size(3), 0)
x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant',
0).contiguous()
out = deform_conv(x, offset, self.weight, self.stride, self.padding,
self.dilation, self.groups, self.deformable_groups)
if input_pad:
out = out[:, :, :out.size(2) - pad_h, :out.size(3) -
pad_w].contiguous()
return out
class DeformConvPack2D(DeformConv2D):
"""A Deformable Conv Encapsulation that acts as normal Conv layers.
Args:
in_channels (int): Same as nn.Conv2d.
out_channels (int): Same as nn.Conv2d.
kernel_size (int or tuple[int]): Same as nn.Conv2d.
stride (int or tuple[int]): Same as nn.Conv2d.
padding (int or tuple[int]): Same as nn.Conv2d.
dilation (int or tuple[int]): Same as nn.Conv2d.
groups (int): Same as nn.Conv2d.
bias (bool or str): If specified as `auto`, it will be decided by the
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
False.
"""
_version = 2
def __init__(self, *args, **kwargs):
super(DeformConvPack2D, self).__init__(*args, **kwargs)
self.conv_offset = nn.Conv2d(
self.in_channels,
self.deformable_groups * 2 * self.kernel_size[0] *
self.kernel_size[1],
kernel_size=self.kernel_size,
stride=_pair(self.stride),
padding=_pair(self.padding),
dilation=_pair(self.dilation),
bias=True)
self.init_offset()
def init_offset(self):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x):
offset = self.conv_offset(x)
return deform_conv(x, offset, self.weight, self.stride, self.padding,
self.dilation, self.groups, self.deformable_groups)
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs):
version = local_metadata.get('version', None)
if version is None or version < 2:
# the key is different in early versions
# In version < 2, DeformConvPack loads previous benchmark models.
if (prefix + 'conv_offset.weight' not in state_dict
and prefix[:-1] + '_offset.weight' in state_dict):
state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
prefix[:-1] + '_offset.weight')
if (prefix + 'conv_offset.bias' not in state_dict
and prefix[:-1] + '_offset.bias' in state_dict):
state_dict[prefix +
'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
'_offset.bias')
if version is not None and version > 1:
print_log(
'DeformConvPack {} is upgraded to version 2.'.format(
prefix.rstrip('.')),
logger='root')
super()._load_from_state_dict(state_dict, prefix, local_metadata,
strict, missing_keys, unexpected_keys,
error_msgs)
class ModulatedDeformConv2D(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
deformable_groups=1,
bias=True):
super(ModulatedDeformConv2D, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
self.deformable_groups = deformable_groups
self.with_bias = bias
# enable compatibility with nn.Conv2d
self.transposed = False
self.output_padding = _single(0)
self.weight = nn.Parameter(
torch.Tensor(out_channels, in_channels // groups,
*self.kernel_size))
if bias:
self.bias = nn.Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.zero_()
def forward(self, x, offset, mask):
return modulated_deform_conv(x, offset, mask, self.weight, self.bias,
self.stride, self.padding, self.dilation,
self.groups, self.deformable_groups)
class ModulatedDeformConvPack2D(ModulatedDeformConv2D):
"""A ModulatedDeformable Conv Encapsulation that acts as normal Conv layers.
Args:
in_channels (int): Same as nn.Conv2d.
out_channels (int): Same as nn.Conv2d.
kernel_size (int or tuple[int]): Same as nn.Conv2d.
stride (int or tuple[int]): Same as nn.Conv2d.
padding (int or tuple[int]): Same as nn.Conv2d.
dilation (int or tuple[int]): Same as nn.Conv2d.
groups (int): Same as nn.Conv2d.
bias (bool or str): If specified as `auto`, it will be decided by the
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
False.
"""
_version = 2
def __init__(self, *args, **kwargs):
super(ModulatedDeformConvPack2D, self).__init__(*args, **kwargs)
self.conv_offset = nn.Conv2d(
self.in_channels,
self.deformable_groups * 3 * self.kernel_size[0] *
self.kernel_size[1],
kernel_size=self.kernel_size,
stride=_pair(self.stride),
padding=_pair(self.padding),
dilation=_pair(self.dilation),
bias=True)
self.init_offset()
def init_offset(self):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x):
out = self.conv_offset(x)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
return modulated_deform_conv(x, offset, mask, self.weight, self.bias,
self.stride, self.padding, self.dilation,
self.groups, self.deformable_groups)
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs):
version = local_metadata.get('version', None)
if version is None or version < 2:
# the key is different in early versions
# In version < 2, ModulatedDeformConvPack
# loads previous benchmark models.
if (prefix + 'conv_offset.weight' not in state_dict
and prefix[:-1] + '_offset.weight' in state_dict):
state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
prefix[:-1] + '_offset.weight')
if (prefix + 'conv_offset.bias' not in state_dict
and prefix[:-1] + '_offset.bias' in state_dict):
state_dict[prefix +
'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
'_offset.bias')
if version is not None and version > 1:
print_log(
'ModulatedDeformConvPack {} is upgraded to version 2.'.format(
prefix.rstrip('.')),
logger='root')
super()._load_from_state_dict(state_dict, prefix, local_metadata,
strict, missing_keys, unexpected_keys,
error_msgs)
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _triple, _pair, _single
from . import deform_conv_3d_cuda
class DeformConvFunction3D(Function):
@staticmethod
def forward(ctx,
input,
offset,
weight,
stride=1,
padding=0,
dilation=1,
groups=1,
deformable_groups=1,
im2col_step=64):
if input is not None and input.dim() != 5:
raise ValueError(
'Expected 5D tensor as input, got {}D tensor instead.'.format(
input.dim()))
ctx.stride = _triple(stride)
ctx.padding = _triple(padding)
ctx.dilation = _triple(dilation)
ctx.groups = groups
ctx.deformable_groups = deformable_groups
ctx.im2col_step = im2col_step
ctx.save_for_backward(input, offset, weight)
output = input.new_empty(
DeformConvFunction3D._output_size(input, weight, ctx.padding,
ctx.dilation, ctx.stride))
ctx.bufs_ = [input.new_empty(0), input.new_empty(0)] # columns, ones
if not input.is_cuda:
raise NotImplementedError
else:
cur_im2col_step = min(ctx.im2col_step, input.shape[0])
assert (input.shape[0] %
cur_im2col_step) == 0, 'im2col step must divide batchsize'
deform_conv_3d_cuda.deform_conv_forward_cuda(
input, weight, offset, output, ctx.bufs_[0], ctx.bufs_[1],
weight.size(4), weight.size(3), weight.size(2),
ctx.stride[2], ctx.stride[1], ctx.stride[0],
ctx.padding[2], ctx.padding[1], ctx.padding[0],
ctx.dilation[2], ctx.dilation[1], ctx.dilation[0],
ctx.groups, ctx.deformable_groups,
cur_im2col_step)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
input, offset, weight = ctx.saved_tensors
grad_input = grad_offset = grad_weight = None
if not grad_output.is_cuda:
raise NotImplementedError
else:
cur_im2col_step = min(ctx.im2col_step, input.shape[0])
assert (input.shape[0] %
cur_im2col_step) == 0, 'im2col step must divide batchsize'
# needs_input_grad[0][1] for input and offset, [2] for kernel weights
if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
grad_input = torch.zeros_like(input)
grad_offset = torch.zeros_like(offset)
deform_conv_3d_cuda.deform_conv_backward_input_cuda(
input, offset, grad_output, grad_input,
grad_offset, weight, ctx.bufs_[0],
weight.size(4), weight.size(3), weight.size(2),
ctx.stride[2], ctx.stride[1], ctx.stride[0],
ctx.padding[2], ctx.padding[1], ctx.padding[0],
ctx.dilation[2], ctx.dilation[1], ctx.dilation[0],
ctx.groups, ctx.deformable_groups,
cur_im2col_step)
if ctx.needs_input_grad[2]:
grad_weight = torch.zeros_like(weight)
deform_conv_3d_cuda.deform_conv_backward_parameters_cuda(
input, offset, grad_output,
grad_weight, ctx.bufs_[0], ctx.bufs_[1],
weight.size(4), weight.size(3), weight.size(2),
ctx.stride[2], ctx.stride[1], ctx.stride[0],
ctx.padding[2], ctx.padding[1], ctx.padding[0],
ctx.dilation[2], ctx.dilation[1], ctx.dilation[0],
ctx.groups, ctx.deformable_groups, 1,
cur_im2col_step)
return (grad_input, grad_offset, grad_weight, None, None, None, None,
None)
@staticmethod
def _output_size(input, weight, padding, dilation, stride):
channels = weight.size(0)
output_size = (input.size(0), channels)
for d in range(input.dim() - 2):
in_size = input.size(d + 2)
pad = padding[d]
kernel = dilation[d] * (weight.size(d + 2) - 1) + 1
stride_ = stride[d]
output_size += ((in_size + (2 * pad) - kernel) // stride_ + 1,)
if not all(map(lambda s: s > 0, output_size)):
raise ValueError(
'convolution input is too small (output would be {})'.format(
'x'.join(map(str, output_size))))
return output_size
class ModulatedDeformConvFunction3D(Function):
@staticmethod
def forward(ctx,
input,
offset,
mask,
weight,
bias=None,
stride=1,
padding=0,
dilation=1,
groups=1,
deformable_groups=1):
ctx.stride = stride
ctx.padding = padding
ctx.dilation = dilation
ctx.groups = groups
ctx.deformable_groups = deformable_groups
ctx.with_bias = bias is not None
if not ctx.with_bias:
bias = input.new_empty(1) # fake tensor
if not input.is_cuda:
raise NotImplementedError
if weight.requires_grad or mask.requires_grad or offset.requires_grad \
or input.requires_grad:
ctx.save_for_backward(input, offset, mask, weight, bias)
output = input.new_empty(
ModulatedDeformConvFunction3D._infer_shape(ctx, input, weight))
ctx._bufs = [input.new_empty(0), input.new_empty(0)]
deform_conv_3d_cuda.modulated_deform_conv_cuda_forward(
input, weight, bias, ctx._bufs[0], offset, mask, output,
ctx._bufs[1], weight.shape[2], weight.shape[3], ctx.stride,
ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
ctx.groups, ctx.deformable_groups, ctx.with_bias)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
if not grad_output.is_cuda:
raise NotImplementedError
input, offset, mask, weight, bias = ctx.saved_tensors
grad_input = torch.zeros_like(input)
grad_offset = torch.zeros_like(offset)
grad_mask = torch.zeros_like(mask)
grad_weight = torch.zeros_like(weight)
grad_bias = torch.zeros_like(bias)
deform_conv_3d_cuda.modulated_deform_conv_cuda_backward(
input, weight, bias, ctx._bufs[0], offset, mask, ctx._bufs[1],
grad_input, grad_weight, grad_bias, grad_offset, grad_mask,
grad_output, weight.shape[2], weight.shape[3], ctx.stride,
ctx.stride, ctx.padding, ctx.padding, ctx.dilation, ctx.dilation,
ctx.groups, ctx.deformable_groups, ctx.with_bias)
if not ctx.with_bias:
grad_bias = None
return (grad_input, grad_offset, grad_mask, grad_weight, grad_bias,
None, None, None, None, None)
@staticmethod
def _infer_shape(ctx, input, weight):
n = input.size(0)
channels_out = weight.size(0)
height, width = input.shape[2:4]
kernel_h, kernel_w = weight.shape[2:4]
height_out = (height + 2 * ctx.padding -
(ctx.dilation * (kernel_h - 1) + 1)) // ctx.stride + 1
width_out = (width + 2 * ctx.padding -
(ctx.dilation * (kernel_w - 1) + 1)) // ctx.stride + 1
return n, channels_out, height_out, width_out
deform_conv = DeformConvFunction3D.apply
modulated_deform_conv = ModulatedDeformConvFunction3D.apply
class DeformConv3D(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
deformable_groups=1,
bias=False):
super(DeformConv3D, self).__init__()
assert not bias
assert in_channels % groups == 0, \
'in_channels {} cannot be divisible by groups {}'.format(
in_channels, groups)
assert out_channels % groups == 0, \
'out_channels {} cannot be divisible by groups {}'.format(
out_channels, groups)
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _triple(kernel_size)
self.stride = _triple(stride)
self.padding = _triple(padding)
self.dilation = _triple(dilation)
self.groups = groups
self.deformable_groups = deformable_groups
# enable compatibility with nn.Conv2d
self.transposed = False
self.output_padding = _single(0)
self.weight = nn.Parameter(
torch.Tensor(out_channels, in_channels // self.groups,
*self.kernel_size))
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
def forward(self, x, offset):
# To fix an assert error in deform_conv_cuda.cpp:128
# input image is smaller than kernel
# TODO: add t to input_pad
input_pad = (
x.size(2) < self.kernel_size[0] or x.size(3) < self.kernel_size[1])
if input_pad:
pad_h = max(self.kernel_size[0] - x.size(2), 0)
pad_w = max(self.kernel_size[1] - x.size(3), 0)
x = F.pad(x, (0, pad_w, 0, pad_h), 'constant', 0).contiguous()
offset = F.pad(offset, (0, pad_w, 0, pad_h), 'constant',
0).contiguous()
out = deform_conv(x, offset, self.weight, self.stride, self.padding,
self.dilation, self.groups, self.deformable_groups)
if input_pad:
out = out[:, :, :out.size(2) - pad_h, :out.size(3) -
pad_w].contiguous()
return out
class DeformConvPack3D(DeformConv3D):
"""A Deformable Conv Encapsulation that acts as normal Conv layers.
Args:
in_channels (int): Same as nn.Conv2d.
out_channels (int): Same as nn.Conv2d.
kernel_size (int or tuple[int]): Same as nn.Conv2d.
stride (int or tuple[int]): Same as nn.Conv2d.
padding (int or tuple[int]): Same as nn.Conv2d.
dilation (int or tuple[int]): Same as nn.Conv2d.
groups (int): Same as nn.Conv2d.
bias (bool or str): If specified as `auto`, it will be decided by the
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
False.
"""
_version = 2
def __init__(self, *args, **kwargs):
super(DeformConvPack3D, self).__init__(*args, **kwargs)
self.conv_offset = nn.Conv3d(
self.in_channels,
self.deformable_groups * 2 * self.kernel_size[0] *
self.kernel_size[1] * self.kernel_size[2],
kernel_size=self.kernel_size,
stride=_triple(self.stride),
padding=_triple(self.padding),
dilation=_triple(self.dilation),
bias=True)
self.init_offset()
def init_offset(self):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x):
offset = self.conv_offset(x)
return deform_conv(x, offset, self.weight, self.stride, self.padding,
self.dilation, self.groups, self.deformable_groups)
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs):
version = local_metadata.get('version', None)
if version is None or version < 2:
# the key is different in early versions
# In version < 2, DeformConvPack loads previous benchmark models.
# TODO: check here
if (prefix + 'conv_offset.weight' not in state_dict
and prefix[:-1] + '_offset.weight' in state_dict):
state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
prefix[:-1] + '_offset.weight')
if (prefix + 'conv_offset.bias' not in state_dict
and prefix[:-1] + '_offset.bias' in state_dict):
state_dict[prefix +
'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
'_offset.bias')
super()._load_from_state_dict(state_dict, prefix, local_metadata,
strict, missing_keys, unexpected_keys,
error_msgs)
class ModulatedDeformConv3D(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
deformable_groups=1,
bias=True):
super(ModulatedDeformConv3D, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = _pair(kernel_size)
self.stride = stride
self.padding = padding
self.dilation = dilation
self.groups = groups
self.deformable_groups = deformable_groups
self.with_bias = bias
# enable compatibility with nn.Conv2d
self.transposed = False
self.output_padding = _single(0)
self.weight = nn.Parameter(
torch.Tensor(out_channels, in_channels // groups,
*self.kernel_size))
if bias:
self.bias = nn.Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.zero_()
def forward(self, x, offset, mask):
return modulated_deform_conv(x, offset, mask, self.weight, self.bias,
self.stride, self.padding, self.dilation,
self.groups, self.deformable_groups)
class ModulatedDeformConvPack3D(ModulatedDeformConv3D):
"""A ModulatedDeformable Conv Encapsulation that acts as normal Conv layers.
Args:
in_channels (int): Same as nn.Conv2d.
out_channels (int): Same as nn.Conv2d.
kernel_size (int or tuple[int]): Same as nn.Conv2d.
stride (int or tuple[int]): Same as nn.Conv2d.
padding (int or tuple[int]): Same as nn.Conv2d.
dilation (int or tuple[int]): Same as nn.Conv2d.
groups (int): Same as nn.Conv2d.
bias (bool or str): If specified as `auto`, it will be decided by the
norm_cfg. Bias will be set as True if norm_cfg is None, otherwise
False.
"""
_version = 2
def __init__(self, *args, **kwargs):
super(ModulatedDeformConvPack3D, self).__init__(*args, **kwargs)
self.conv_offset = nn.Conv2d(
self.in_channels,
self.deformable_groups * 3 * self.kernel_size[0] *
self.kernel_size[1],
kernel_size=self.kernel_size,
stride=_pair(self.stride),
padding=_pair(self.padding),
dilation=_pair(self.dilation),
bias=True)
self.init_offset()
def init_offset(self):
self.conv_offset.weight.data.zero_()
self.conv_offset.bias.data.zero_()
def forward(self, x):
out = self.conv_offset(x)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
return modulated_deform_conv(x, offset, mask, self.weight, self.bias,
self.stride, self.padding, self.dilation,
self.groups, self.deformable_groups)
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs):
version = local_metadata.get('version', None)
if version is None or version < 2:
# the key is different in early versions
# In version < 2, ModulatedDeformConvPack
# loads previous benchmark models.
if (prefix + 'conv_offset.weight' not in state_dict
and prefix[:-1] + '_offset.weight' in state_dict):
state_dict[prefix + 'conv_offset.weight'] = state_dict.pop(
prefix[:-1] + '_offset.weight')
if (prefix + 'conv_offset.bias' not in state_dict
and prefix[:-1] + '_offset.bias' in state_dict):
state_dict[prefix +
'conv_offset.bias'] = state_dict.pop(prefix[:-1] +
'_offset.bias')
if version is not None and version > 1:
print_log(
'ModulatedDeformConvPack {} is upgraded to version 2.'.format(
prefix.rstrip('.')),
logger='root')
super()._load_from_state_dict(state_dict, prefix, local_metadata,
strict, missing_keys, unexpected_keys,
error_msgs)
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair
from . import deform_pool_2d_cuda
class DeformRoIPoolingFunction2D(Function):
@staticmethod
def forward(ctx,
data,
rois,
offset,
spatial_scale,
out_size,
out_channels,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0):
# TODO: support unsquare RoIs
out_h, out_w = _pair(out_size)
assert isinstance(out_h, int) and isinstance(out_w, int)
assert out_h == out_w
out_size = out_h # out_h and out_w must be equal
ctx.spatial_scale = spatial_scale
ctx.out_size = out_size
ctx.out_channels = out_channels
ctx.no_trans = no_trans
ctx.group_size = group_size
ctx.part_size = out_size if part_size is None else part_size
ctx.sample_per_part = sample_per_part
ctx.trans_std = trans_std
assert 0.0 <= ctx.trans_std <= 1.0
if not data.is_cuda:
raise NotImplementedError
n = rois.shape[0]
output = data.new_empty(n, out_channels, out_size, out_size)
output_count = data.new_empty(n, out_channels, out_size, out_size)
deform_pool_2d_cuda.deform_psroi_pooling_cuda_forward(
data, rois, offset, output, output_count, ctx.no_trans,
ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size,
ctx.part_size, ctx.sample_per_part, ctx.trans_std)
if data.requires_grad or rois.requires_grad or offset.requires_grad:
ctx.save_for_backward(data, rois, offset)
ctx.output_count = output_count
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
if not grad_output.is_cuda:
raise NotImplementedError
data, rois, offset = ctx.saved_tensors
output_count = ctx.output_count
grad_input = torch.zeros_like(data)
grad_rois = None
grad_offset = torch.zeros_like(offset)
deform_pool_2d_cuda.deform_psroi_pooling_cuda_backward(
grad_output, data, rois, offset, output_count, grad_input,
grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels,
ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part,
ctx.trans_std)
return (grad_input, grad_rois, grad_offset, None, None, None, None,
None, None, None, None)
deform_roi_pooling = DeformRoIPoolingFunction2D.apply
class DeformRoIPooling2D(nn.Module):
def __init__(self,
spatial_scale,
out_size,
out_channels,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0):
super(DeformRoIPooling2D, self).__init__()
self.spatial_scale = spatial_scale
self.out_size = _pair(out_size)
self.out_channels = out_channels
self.no_trans = no_trans
self.group_size = group_size
self.part_size = out_size if part_size is None else part_size
self.sample_per_part = sample_per_part
self.trans_std = trans_std
def forward(self, data, rois, offset):
if self.no_trans:
offset = data.new_empty(0)
return deform_roi_pooling(data, rois, offset, self.spatial_scale,
self.out_size, self.out_channels,
self.no_trans, self.group_size,
self.part_size, self.sample_per_part,
self.trans_std)
class DeformRoIPoolingPack2D(DeformRoIPooling2D):
def __init__(self,
spatial_scale,
out_size,
out_channels,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0,
num_offset_fcs=3,
deform_fc_channels=1024):
super(DeformRoIPoolingPack2D,
self).__init__(spatial_scale, out_size, out_channels, no_trans,
group_size, part_size, sample_per_part, trans_std)
self.num_offset_fcs = num_offset_fcs
self.deform_fc_channels = deform_fc_channels
if not no_trans:
seq = []
ic = self.out_size[0] * self.out_size[1] * self.out_channels
for i in range(self.num_offset_fcs):
if i < self.num_offset_fcs - 1:
oc = self.deform_fc_channels
else:
oc = self.out_size[0] * self.out_size[1] * 2
seq.append(nn.Linear(ic, oc))
ic = oc
if i < self.num_offset_fcs - 1:
seq.append(nn.ReLU(inplace=True))
self.offset_fc = nn.Sequential(*seq)
self.offset_fc[-1].weight.data.zero_()
self.offset_fc[-1].bias.data.zero_()
def forward(self, data, rois):
assert data.size(1) == self.out_channels
n = rois.shape[0]
if n == 0:
return data.new_empty(n, self.out_channels, self.out_size[0],
self.out_size[1])
if self.no_trans:
offset = data.new_empty(0)
return deform_roi_pooling(data, rois, offset, self.spatial_scale,
self.out_size, self.out_channels,
self.no_trans, self.group_size,
self.part_size, self.sample_per_part,
self.trans_std)
else:
offset = data.new_empty(0)
x = deform_roi_pooling(data, rois, offset, self.spatial_scale,
self.out_size, self.out_channels, True,
self.group_size, self.part_size,
self.sample_per_part, self.trans_std)
offset = self.offset_fc(x.view(n, -1))
offset = offset.view(n, 2, self.out_size[0], self.out_size[1])
return deform_roi_pooling(data, rois, offset, self.spatial_scale,
self.out_size, self.out_channels,
self.no_trans, self.group_size,
self.part_size, self.sample_per_part,
self.trans_std)
class ModulatedDeformRoIPoolingPack2D(DeformRoIPooling2D):
def __init__(self,
spatial_scale,
out_size,
out_channels,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0,
num_offset_fcs=3,
num_mask_fcs=2,
deform_fc_channels=1024):
super(ModulatedDeformRoIPoolingPack2D,
self).__init__(spatial_scale, out_size, out_channels, no_trans,
group_size, part_size, sample_per_part, trans_std)
self.num_offset_fcs = num_offset_fcs
self.num_mask_fcs = num_mask_fcs
self.deform_fc_channels = deform_fc_channels
if not no_trans:
offset_fc_seq = []
ic = self.out_size[0] * self.out_size[1] * self.out_channels
for i in range(self.num_offset_fcs):
if i < self.num_offset_fcs - 1:
oc = self.deform_fc_channels
else:
oc = self.out_size[0] * self.out_size[1] * 2
offset_fc_seq.append(nn.Linear(ic, oc))
ic = oc
if i < self.num_offset_fcs - 1:
offset_fc_seq.append(nn.ReLU(inplace=True))
self.offset_fc = nn.Sequential(*offset_fc_seq)
self.offset_fc[-1].weight.data.zero_()
self.offset_fc[-1].bias.data.zero_()
mask_fc_seq = []
ic = self.out_size[0] * self.out_size[1] * self.out_channels
for i in range(self.num_mask_fcs):
if i < self.num_mask_fcs - 1:
oc = self.deform_fc_channels
else:
oc = self.out_size[0] * self.out_size[1]
mask_fc_seq.append(nn.Linear(ic, oc))
ic = oc
if i < self.num_mask_fcs - 1:
mask_fc_seq.append(nn.ReLU(inplace=True))
else:
mask_fc_seq.append(nn.Sigmoid())
self.mask_fc = nn.Sequential(*mask_fc_seq)
self.mask_fc[-2].weight.data.zero_()
self.mask_fc[-2].bias.data.zero_()
def forward(self, data, rois):
assert data.size(1) == self.out_channels
n = rois.shape[0]
if n == 0:
return data.new_empty(n, self.out_channels, self.out_size[0],
self.out_size[1])
if self.no_trans:
offset = data.new_empty(0)
return deform_roi_pooling(data, rois, offset, self.spatial_scale,
self.out_size, self.out_channels,
self.no_trans, self.group_size,
self.part_size, self.sample_per_part,
self.trans_std)
else:
offset = data.new_empty(0)
x = deform_roi_pooling(data, rois, offset, self.spatial_scale,
self.out_size, self.out_channels, True,
self.group_size, self.part_size,
self.sample_per_part, self.trans_std)
offset = self.offset_fc(x.view(n, -1))
offset = offset.view(n, 2, self.out_size[0], self.out_size[1])
mask = self.mask_fc(x.view(n, -1))
mask = mask.view(n, 1, self.out_size[0], self.out_size[1])
return deform_roi_pooling(
data, rois, offset, self.spatial_scale, self.out_size,
self.out_channels, self.no_trans, self.group_size,
self.part_size, self.sample_per_part, self.trans_std) * mask
import torch
import torch.nn as nn
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.modules.utils import _pair
from . import deform_pool_3d_cuda
class DeformRoIPoolingFunction3D(Function):
@staticmethod
def forward(ctx,
data,
rois,
offset,
spatial_scale,
out_size,
out_channels,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0):
# TODO: support unsquare RoIs
out_h, out_w = _pair(out_size)
assert isinstance(out_h, int) and isinstance(out_w, int)
assert out_h == out_w
out_size = out_h # out_h and out_w must be equal
ctx.spatial_scale = spatial_scale
ctx.out_size = out_size
ctx.out_channels = out_channels
ctx.no_trans = no_trans
ctx.group_size = group_size
ctx.part_size = out_size if part_size is None else part_size
ctx.sample_per_part = sample_per_part
ctx.trans_std = trans_std
assert 0.0 <= ctx.trans_std <= 1.0
if not data.is_cuda:
raise NotImplementedError
n = rois.shape[0]
output = data.new_empty(n, out_channels, out_size, out_size)
output_count = data.new_empty(n, out_channels, out_size, out_size)
deform_pool_3d_cuda.deform_psroi_pooling_cuda_forward(
data, rois, offset, output, output_count, ctx.no_trans,
ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size,
ctx.part_size, ctx.sample_per_part, ctx.trans_std)
if data.requires_grad or rois.requires_grad or offset.requires_grad:
ctx.save_for_backward(data, rois, offset)
ctx.output_count = output_count
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
if not grad_output.is_cuda:
raise NotImplementedError
data, rois, offset = ctx.saved_tensors
output_count = ctx.output_count
grad_input = torch.zeros_like(data)
grad_rois = None
grad_offset = torch.zeros_like(offset)
deform_pool_3d_cuda.deform_psroi_pooling_cuda_backward(
grad_output, data, rois, offset, output_count, grad_input,
grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels,
ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part,
ctx.trans_std)
return (grad_input, grad_rois, grad_offset, None, None, None, None,
None, None, None, None)
deform_roi_pooling = DeformRoIPoolingFunction3D.apply
class DeformRoIPooling3D(nn.Module):
def __init__(self,
spatial_scale,
out_size,
out_channels,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0):
super(DeformRoIPooling3D, self).__init__()
self.spatial_scale = spatial_scale
self.out_size = _pair(out_size)
self.out_channels = out_channels
self.no_trans = no_trans
self.group_size = group_size
self.part_size = out_size if part_size is None else part_size
self.sample_per_part = sample_per_part
self.trans_std = trans_std
def forward(self, data, rois, offset):
if self.no_trans:
offset = data.new_empty(0)
return deform_roi_pooling(data, rois, offset, self.spatial_scale,
self.out_size, self.out_channels,
self.no_trans, self.group_size,
self.part_size, self.sample_per_part,
self.trans_std)
class DeformRoIPoolingPack3D(DeformRoIPooling3D):
def __init__(self,
spatial_scale,
out_size,
out_channels,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0,
num_offset_fcs=3,
deform_fc_channels=1024):
super(DeformRoIPoolingPack3D,
self).__init__(spatial_scale, out_size, out_channels, no_trans,
group_size, part_size, sample_per_part, trans_std)
self.num_offset_fcs = num_offset_fcs
self.deform_fc_channels = deform_fc_channels
if not no_trans:
seq = []
ic = self.out_size[0] * self.out_size[1] * self.out_channels
for i in range(self.num_offset_fcs):
if i < self.num_offset_fcs - 1:
oc = self.deform_fc_channels
else:
oc = self.out_size[0] * self.out_size[1] * 2
seq.append(nn.Linear(ic, oc))
ic = oc
if i < self.num_offset_fcs - 1:
seq.append(nn.ReLU(inplace=True))
self.offset_fc = nn.Sequential(*seq)
self.offset_fc[-1].weight.data.zero_()
self.offset_fc[-1].bias.data.zero_()
def forward(self, data, rois):
assert data.size(1) == self.out_channels
n = rois.shape[0]
if n == 0:
return data.new_empty(n, self.out_channels, self.out_size[0],
self.out_size[1])
if self.no_trans:
offset = data.new_empty(0)
return deform_roi_pooling(data, rois, offset, self.spatial_scale,
self.out_size, self.out_channels,
self.no_trans, self.group_size,
self.part_size, self.sample_per_part,
self.trans_std)
else:
offset = data.new_empty(0)
x = deform_roi_pooling(data, rois, offset, self.spatial_scale,
self.out_size, self.out_channels, True,
self.group_size, self.part_size,
self.sample_per_part, self.trans_std)
offset = self.offset_fc(x.view(n, -1))
offset = offset.view(n, 2, self.out_size[0], self.out_size[1])
return deform_roi_pooling(data, rois, offset, self.spatial_scale,
self.out_size, self.out_channels,
self.no_trans, self.group_size,
self.part_size, self.sample_per_part,
self.trans_std)
class ModulatedDeformRoIPoolingPack3D(DeformRoIPooling3D):
def __init__(self,
spatial_scale,
out_size,
out_channels,
no_trans,
group_size=1,
part_size=None,
sample_per_part=4,
trans_std=.0,
num_offset_fcs=3,
num_mask_fcs=2,
deform_fc_channels=1024):
super(ModulatedDeformRoIPoolingPack3D,
self).__init__(spatial_scale, out_size, out_channels, no_trans,
group_size, part_size, sample_per_part, trans_std)
self.num_offset_fcs = num_offset_fcs
self.num_mask_fcs = num_mask_fcs
self.deform_fc_channels = deform_fc_channels
if not no_trans:
offset_fc_seq = []
ic = self.out_size[0] * self.out_size[1] * self.out_channels
for i in range(self.num_offset_fcs):
if i < self.num_offset_fcs - 1:
oc = self.deform_fc_channels
else:
oc = self.out_size[0] * self.out_size[1] * 2
offset_fc_seq.append(nn.Linear(ic, oc))
ic = oc
if i < self.num_offset_fcs - 1:
offset_fc_seq.append(nn.ReLU(inplace=True))
self.offset_fc = nn.Sequential(*offset_fc_seq)
self.offset_fc[-1].weight.data.zero_()
self.offset_fc[-1].bias.data.zero_()
mask_fc_seq = []
ic = self.out_size[0] * self.out_size[1] * self.out_channels
for i in range(self.num_mask_fcs):
if i < self.num_mask_fcs - 1:
oc = self.deform_fc_channels
else:
oc = self.out_size[0] * self.out_size[1]
mask_fc_seq.append(nn.Linear(ic, oc))
ic = oc
if i < self.num_mask_fcs - 1:
mask_fc_seq.append(nn.ReLU(inplace=True))
else:
mask_fc_seq.append(nn.Sigmoid())
self.mask_fc = nn.Sequential(*mask_fc_seq)
self.mask_fc[-2].weight.data.zero_()
self.mask_fc[-2].bias.data.zero_()
def forward(self, data, rois):
assert data.size(1) == self.out_channels
n = rois.shape[0]
if n == 0:
return data.new_empty(n, self.out_channels, self.out_size[0],
self.out_size[1])
if self.no_trans:
offset = data.new_empty(0)
return deform_roi_pooling(data, rois, offset, self.spatial_scale,
self.out_size, self.out_channels,
self.no_trans, self.group_size,
self.part_size, self.sample_per_part,
self.trans_std)
else:
offset = data.new_empty(0)
x = deform_roi_pooling(data, rois, offset, self.spatial_scale,
self.out_size, self.out_channels, True,
self.group_size, self.part_size,
self.sample_per_part, self.trans_std)
offset = self.offset_fc(x.view(n, -1))
offset = offset.view(n, 2, self.out_size[0], self.out_size[1])
mask = self.mask_fc(x.view(n, -1))
mask = mask.view(n, 1, self.out_size[0], self.out_size[1])
return deform_roi_pooling(
data, rois, offset, self.spatial_scale, self.out_size,
self.out_channels, self.no_trans, self.group_size,
self.part_size, self.sample_per_part, self.trans_std) * mask
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
// modify from
// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
// based on
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
#include <torch/extension.h>
#include <ATen/DeviceGuard.h>
#include <cmath>
#include <vector>
void DeformablePSROIPoolForward(
const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
at::Tensor out, at::Tensor top_count, const int batch, const int channels,
const int height, const int width, const int num_bbox,
const int channels_trans, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std);
void DeformablePSROIPoolBackwardAcc(
const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
at::Tensor trans_grad, const int batch, const int channels,
const int height, const int width, const int num_bbox,
const int channels_trans, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std);
void deform_psroi_pooling_cuda_forward(
at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
at::Tensor top_count, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std) {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
at::DeviceGuard guard(input.device());
const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
const int channels_trans = no_trans ? 2 : trans.size(1);
const int num_bbox = bbox.size(0);
if (num_bbox != out.size(0))
AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
out.size(0), num_bbox);
DeformablePSROIPoolForward(
input, bbox, trans, out, top_count, batch, channels, height, width,
num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
pooled_size, part_size, sample_per_part, trans_std);
}
void deform_psroi_pooling_cuda_backward(
at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
const int no_trans, const float spatial_scale, const int output_dim,
const int group_size, const int pooled_size, const int part_size,
const int sample_per_part, const float trans_std) {
AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
at::DeviceGuard guard(input.device());
const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
const int channels_trans = no_trans ? 2 : trans.size(1);
const int num_bbox = bbox.size(0);
if (num_bbox != out_grad.size(0))
AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
out_grad.size(0), num_bbox);
DeformablePSROIPoolBackwardAcc(
out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
channels, height, width, num_bbox, channels_trans, no_trans,
spatial_scale, output_dim, group_size, pooled_size, part_size,
sample_per_part, trans_std);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward,
"deform psroi pooling forward(CUDA)");
m.def("deform_psroi_pooling_cuda_backward",
&deform_psroi_pooling_cuda_backward,
"deform psroi pooling backward(CUDA)");
}
This diff is collapsed.
// modify from
// https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
// based on
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
#include <torch/extension.h>
#include <ATen/DeviceGuard.h>
#include <cmath>
#include <vector>
void DeformablePSROIPoolForward(
const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
at::Tensor out, at::Tensor top_count, const int batch, const int channels,
const int height, const int width, const int num_bbox,
const int channels_trans, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std);
void DeformablePSROIPoolBackwardAcc(
const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
at::Tensor trans_grad, const int batch, const int channels,
const int height, const int width, const int num_bbox,
const int channels_trans, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std);
void deform_psroi_pooling_cuda_forward(
at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
at::Tensor top_count, const int no_trans, const float spatial_scale,
const int output_dim, const int group_size, const int pooled_size,
const int part_size, const int sample_per_part, const float trans_std) {
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
at::DeviceGuard guard(input.device());
const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
const int channels_trans = no_trans ? 2 : trans.size(1);
const int num_bbox = bbox.size(0);
if (num_bbox != out.size(0))
AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
out.size(0), num_bbox);
DeformablePSROIPoolForward(
input, bbox, trans, out, top_count, batch, channels, height, width,
num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
pooled_size, part_size, sample_per_part, trans_std);
}
void deform_psroi_pooling_cuda_backward(
at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
const int no_trans, const float spatial_scale, const int output_dim,
const int group_size, const int pooled_size, const int part_size,
const int sample_per_part, const float trans_std) {
AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
at::DeviceGuard guard(input.device());
const int batch = input.size(0);
const int channels = input.size(1);
const int height = input.size(2);
const int width = input.size(3);
const int channels_trans = no_trans ? 2 : trans.size(1);
const int num_bbox = bbox.size(0);
if (num_bbox != out_grad.size(0))
AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
out_grad.size(0), num_bbox);
DeformablePSROIPoolBackwardAcc(
out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
channels, height, width, num_bbox, channels_trans, no_trans,
spatial_scale, output_dim, group_size, pooled_size, part_size,
sample_per_part, trans_std);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward,
"deform psroi pooling forward(CUDA)");
m.def("deform_psroi_pooling_cuda_backward",
&deform_psroi_pooling_cuda_backward,
"deform psroi pooling backward(CUDA)");
}
This diff is collapsed.
...@@ -18,3 +18,13 @@ def load_configs_from_file(config_path): ...@@ -18,3 +18,13 @@ def load_configs_from_file(config_path):
if not name.startswith('__') if not name.startswith('__')
} }
return cfg_dict return cfg_dict
def update_config_dict(config_dict, args):
data_root_old = config_dict['dataset_cfg']['base_root']
config_dict['dataset_cfg']['base_root'] = args.data_root
config_dict['dataset_cfg']['data_root'] = config_dict['dataset_cfg']['data_root'].replace(data_root_old,
args.data_root)
config_dict['dataset_cfg']['anno_root'] = config_dict['dataset_cfg']['anno_root'].replace(data_root_old,
args.data_root)
return config_dict
...@@ -6,6 +6,7 @@ from torch.utils.cpp_extension import BuildExtension, CUDAExtension ...@@ -6,6 +6,7 @@ from torch.utils.cpp_extension import BuildExtension, CUDAExtension
os.environ['CFLAGS'] = '-Wno-deprecated-declarations' # suppress warnings in debug mode os.environ['CFLAGS'] = '-Wno-deprecated-declarations' # suppress warnings in debug mode
def readme(): def readme():
with open('README.md', encoding='utf-8') as f: with open('README.md', encoding='utf-8') as f:
content = f.read() content = f.read()
...@@ -44,7 +45,7 @@ def make_cuda_ext(name, module, sources): ...@@ -44,7 +45,7 @@ def make_cuda_ext(name, module, sources):
if __name__ == '__main__': if __name__ == '__main__':
setup( setup(
name='rodnet', name='rodnet',
version='1.0', version='1.1',
description='RODNet: Object Detection from Radar Data', description='RODNet: Object Detection from Radar Data',
long_description=readme(), long_description=readme(),
long_description_content_type='text/markdown', long_description_content_type='text/markdown',
...@@ -77,7 +78,39 @@ if __name__ == '__main__': ...@@ -77,7 +78,39 @@ if __name__ == '__main__':
keywords='rodnet, object detection, radar, autonomous driving', keywords='rodnet, object detection, radar, autonomous driving',
packages=find_packages(include=["rodnet.*"]), packages=find_packages(include=["rodnet.*"]),
package_data={'rodnet.ops': ['*/*.so']},
python_requires='>=3.6', python_requires='>=3.6',
install_requires=get_requirements(), install_requires=get_requirements(),
ext_modules=[
make_cuda_ext(
name='deform_conv_2d_cuda',
module='rodnet.ops.dcn',
sources=[
'src/deform_conv_2d_cuda.cpp',
'src/deform_conv_2d_cuda_kernel.cu'
]),
make_cuda_ext(
name='deform_conv_3d_cuda',
module='rodnet.ops.dcn',
sources=[
'src/deform_conv_3d_cuda.cpp',
'src/deform_conv_3d_cuda_kernel.cu'
]),
make_cuda_ext(
name='deform_pool_2d_cuda',
module='rodnet.ops.dcn',
sources=[
'src/deform_pool_2d_cuda.cpp',
'src/deform_pool_2d_cuda_kernel.cu'
]),
make_cuda_ext(
name='deform_pool_3d_cuda',
module='rodnet.ops.dcn',
sources=[
'src/deform_pool_3d_cuda.cpp',
'src/deform_pool_3d_cuda_kernel.cu'
]),
],
cmdclass={'build_ext': BuildExtension},
zip_safe=False zip_safe=False
) )
import os
from setuptools import setup, find_packages
import torch
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
os.environ['CFLAGS'] = '-Wno-deprecated-declarations' # suppress warnings in debug mode
def readme():
with open('README.md', encoding='utf-8') as f:
content = f.read()
return content
def get_requirements(filename='requirements.txt'):
here = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(here, filename), 'r') as f:
requires = [line.replace('\n', '') for line in f.readlines()]
return requires
def make_cuda_ext(name, module, sources):
define_macros = []
if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
define_macros += [('WITH_CUDA', None)]
else:
raise EnvironmentError('CUDA is required to compile RODNet!')
return CUDAExtension(
name='{}.{}'.format(module, name),
sources=[os.path.join(*module.split('.'), p) for p in sources],
define_macros=define_macros,
extra_compile_args={
'cxx': [],
'nvcc': [
'-D__CUDA_NO_HALF_OPERATORS__',
'-D__CUDA_NO_HALF_CONVERSIONS__',
'-D__CUDA_NO_HALF2_OPERATORS__',
]
})
if __name__ == '__main__':
setup(
name='rodnet',
version='1.1',
description='RODNet: Object Detection from Radar Data',
long_description=readme(),
long_description_content_type='text/markdown',
url='https://github.com/yizhou-wang/RODNet',
author='Yizhou Wang',
author_email='ywang26@uw.edu',
classifiers=[
# How mature is this project? Common values are
# 3 - Alpha
# 4 - Beta
# 5 - Production/Stable
'Development Status :: 3 - Alpha',
# Indicate who your project is intended for
'Intended Audience :: Developers',
'Topic :: Software Development :: Build Tools',
# Pick your license as you wish
'License :: OSI Approved :: MIT License',
# Specify the Python versions you support here. In particular, ensure
# that you indicate whether you support Python 2, Python 3 or both.
# These classifiers are *not* checked by 'pip install'. See instead
# 'python_requires' below.
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
],
keywords='rodnet, object detection, radar, autonomous driving',
packages=find_packages(include=["rodnet.*"]),
# package_data={'rodnet.ops': ['*/*.so']},
python_requires='>=3.6',
install_requires=get_requirements(),
# ext_modules=[
# make_cuda_ext(
# name='deform_conv_2d_cuda',
# module='rodnet.ops.dcn',
# sources=[
# 'src/deform_conv_2d_cuda.cpp',
# 'src/deform_conv_2d_cuda_kernel.cu'
# ]),
# make_cuda_ext(
# name='deform_conv_3d_cuda',
# module='rodnet.ops.dcn',
# sources=[
# 'src/deform_conv_3d_cuda.cpp',
# 'src/deform_conv_3d_cuda_kernel.cu'
# ]),
# make_cuda_ext(
# name='deform_pool_2d_cuda',
# module='rodnet.ops.dcn',
# sources=[
# 'src/deform_pool_2d_cuda.cpp',
# 'src/deform_pool_2d_cuda_kernel.cu'
# ]),
# make_cuda_ext(
# name='deform_pool_3d_cuda',
# module='rodnet.ops.dcn',
# sources=[
# 'src/deform_pool_3d_cuda.cpp',
# 'src/deform_pool_3d_cuda_kernel.cu'
# ]),
# ],
# cmdclass={'build_ext': BuildExtension},
zip_safe=False
)
...@@ -11,7 +11,7 @@ from cruw.annotation.init_json import init_meta_json ...@@ -11,7 +11,7 @@ from cruw.annotation.init_json import init_meta_json
from cruw.mapping import ra2idx from cruw.mapping import ra2idx
from rodnet.core.confidence_map import generate_confmap, normalize_confmap, add_noise_channel from rodnet.core.confidence_map import generate_confmap, normalize_confmap, add_noise_channel
from rodnet.utils.load_configs import load_configs_from_file from rodnet.utils.load_configs import load_configs_from_file, update_config_dict
from rodnet.utils.visualization import visualize_confmap from rodnet.utils.visualization import visualize_confmap
SPLITS_LIST = ['train', 'valid', 'test', 'demo'] SPLITS_LIST = ['train', 'valid', 'test', 'demo']
...@@ -20,7 +20,8 @@ SPLITS_LIST = ['train', 'valid', 'test', 'demo'] ...@@ -20,7 +20,8 @@ SPLITS_LIST = ['train', 'valid', 'test', 'demo']
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description='Prepare RODNet data.') parser = argparse.ArgumentParser(description='Prepare RODNet data.')
parser.add_argument('--config', type=str, dest='config', help='configuration file path') parser.add_argument('--config', type=str, dest='config', help='configuration file path')
parser.add_argument('--data_root', type=str, help='directory to the prepared data') parser.add_argument('--data_root', type=str,
help='directory to the dataset (will overwrite data_root in config file)')
parser.add_argument('--sensor_config', type=str, default='sensor_config_rod2021') parser.add_argument('--sensor_config', type=str, default='sensor_config_rod2021')
parser.add_argument('--split', type=str, dest='split', default='', parser.add_argument('--split', type=str, dest='split', default='',
help='choose from train, valid, test, supertest') help='choose from train, valid, test, supertest')
...@@ -220,6 +221,7 @@ if __name__ == "__main__": ...@@ -220,6 +221,7 @@ if __name__ == "__main__":
dataset = CRUW(data_root=data_root, sensor_config_name=args.sensor_config) dataset = CRUW(data_root=data_root, sensor_config_name=args.sensor_config)
config_dict = load_configs_from_file(args.config) config_dict = load_configs_from_file(args.config)
config_dict = update_config_dict(config_dict, args) # update configs by args
radar_configs = dataset.sensor_cfg.radar_cfg radar_configs = dataset.sensor_cfg.radar_cfg
if splits == None: if splits == None:
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment