Commit b952e97b authored by chenych's avatar chenych
Browse files

First Commit.

parents
import numpy as np
import torch.nn as nn
from .conv_module import ConvModule, bias_init_with_prob, normal_init
from six.moves import map, zip
def multi_apply(func, *args, **kwargs):
pfunc = partial(func, **kwargs) if kwargs else func
map_results = map(pfunc, *args)
return tuple(map(list, zip(*map_results)))
class RetinaHead(nn.Module):
"""
An anchor-based head used in [1]_.
The head contains two subnetworks. The first classifies anchor boxes and
the second regresses deltas for the anchors.
References:
.. [1] https://arxiv.org/pdf/1708.02002.pdf
Example:
>>> import torch
>>> self = RetinaHead(11, 7)
>>> x = torch.rand(1, 7, 32, 32)
>>> cls_score, bbox_pred = self.forward_single(x)
>>> # Each anchor predicts a score for each class except background
>>> cls_per_anchor = cls_score.shape[1] / self.num_anchors
>>> box_per_anchor = bbox_pred.shape[1] / self.num_anchors
>>> assert cls_per_anchor == (self.num_classes - 1)
>>> assert box_per_anchor == 4
"""
def __init__(self,
num_classes,
in_channels,
feat_channels=64,
stacked_convs=4,
octave_base_scale=4,
scales_per_octave=3,
conv_cfg=None,
norm_cfg=None,
**kwargs):
super(RetinaHead, self).__init__()
self.in_channels = in_channels
self.num_classes = num_classes
self.feat_channels = feat_channels
self.stacked_convs = stacked_convs
self.octave_base_scale = octave_base_scale
self.scales_per_octave = scales_per_octave
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
octave_scales = np.array(
[2**(i / scales_per_octave) for i in range(scales_per_octave)])
self.cls_out_channels = num_classes
self._init_layers()
def _init_layers(self):
self.relu = nn.ReLU(inplace=True)
self.cls_convs = nn.ModuleList()
#self.reg_convs = nn.ModuleList()
for i in range(self.stacked_convs):
chn = self.in_channels if i == 0 else self.feat_channels
self.cls_convs.append(
ConvModule(
chn,
self.feat_channels,
3,
stride=1,
padding=1,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg))
self.retina_cls = nn.Conv2d(
self.feat_channels,
self.cls_out_channels,
3,
padding=1)
#self.output_act = nn.Sigmoid()
def init_weights(self):
for m in self.cls_convs:
normal_init(m.conv, std=0.01)
for m in self.reg_convs:
normal_init(m.conv, std=0.01)
bias_cls = bias_init_with_prob(0.01)
normal_init(self.retina_cls, std=0.01, bias=bias_cls)
#normal_init(self.retina_reg, std=0.01)
def forward_single(self, x):
cls_feat = x
#reg_feat = x
for cls_conv in self.cls_convs:
cls_feat = cls_conv(cls_feat)
#for reg_conv in self.reg_convs:
# reg_feat = reg_conv(reg_feat)
cls_score = self.retina_cls(cls_feat)
# out is B x C x W x H, with C = n_classes + n_anchors
#cls_score = cls_score.permute(0, 2, 3, 1)
#batch_size, width, height, channels = cls_score.shape
#cls_score = cls_score.view(batch_size, width, height, self.num_anchors, self.num_classes)
#cls_score = cls_score.contiguous().view(x.size(0), -1, self.num_classes)
#bbox_pred = self.retina_reg(reg_feat)
#bbox_pred = bbox_pred.permute(0, 2, 3, 1)
#bbox_pred = bbox_pred.contiguous().view(bbox_pred.size(0), -1, 4)
return [cls_score]
def forward(self, feats):
return multi_apply(self.forward_single, feats)
import re
import math
import collections
from functools import partial
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils import model_zoo
########################################################################
############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ###############
########################################################################
# Parameters for the entire model (stem, all blocks, and head)
GlobalParams = collections.namedtuple('GlobalParams', [
'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate',
'num_classes', 'width_coefficient', 'depth_coefficient',
'depth_divisor', 'min_depth', 'drop_connect_rate', 'image_size'])
# Parameters for an individual model block
BlockArgs = collections.namedtuple('BlockArgs', [
'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
'expand_ratio', 'id_skip', 'stride', 'se_ratio'])
# Change namedtuple defaults
GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
class SwishImplementation(torch.autograd.Function):
@staticmethod
def forward(ctx, i):
result = i * torch.sigmoid(i)
ctx.save_for_backward(i)
return result
@staticmethod
def backward(ctx, grad_output):
i = ctx.saved_variables[0]
sigmoid_i = torch.sigmoid(i)
return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
class MemoryEfficientSwish(nn.Module):
def forward(self, x):
return SwishImplementation.apply(x)
class Swish(nn.Module):
def forward(self, x):
return x * torch.sigmoid(x)
def round_filters(filters, global_params):
""" Calculate and round number of filters based on depth multiplier. """
multiplier = global_params.width_coefficient
if not multiplier:
return filters
divisor = global_params.depth_divisor
min_depth = global_params.min_depth
filters *= multiplier
min_depth = min_depth or divisor
new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
if new_filters < 0.9 * filters: # prevent rounding by more than 10%
new_filters += divisor
return int(new_filters)
def round_repeats(repeats, global_params):
""" Round number of filters based on depth multiplier. """
multiplier = global_params.depth_coefficient
if not multiplier:
return repeats
return int(math.ceil(multiplier * repeats))
def drop_connect(inputs, p, training):
""" Drop connect. """
if not training: return inputs
batch_size = inputs.shape[0]
keep_prob = 1 - p
random_tensor = keep_prob
random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device)
binary_tensor = torch.floor(random_tensor)
output = inputs / keep_prob * binary_tensor
return output
def get_same_padding_conv2d(image_size=None):
""" Chooses static padding if you have specified an image size, and dynamic padding otherwise.
Static padding is necessary for ONNX exporting of models. """
if image_size is None:
return Conv2dDynamicSamePadding
else:
return partial(Conv2dStaticSamePadding, image_size=image_size)
class Conv2dDynamicSamePadding(nn.Conv2d):
""" 2D Convolutions like TensorFlow, for a dynamic image size """
def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True):
super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
def forward(self, x):
ih, iw = x.size()[-2:]
kh, kw = self.weight.size()[-2:]
sh, sw = self.stride
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
if pad_h > 0 or pad_w > 0:
x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2])
return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
class Conv2dStaticSamePadding(nn.Conv2d):
""" 2D Convolutions like TensorFlow, for a fixed image size"""
def __init__(self, in_channels, out_channels, kernel_size, image_size=None, **kwargs):
super().__init__(in_channels, out_channels, kernel_size, **kwargs)
self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2
# Calculate padding based on image size and save it
assert image_size is not None
ih, iw = image_size if type(image_size) == list else [image_size, image_size]
kh, kw = self.weight.size()[-2:]
sh, sw = self.stride
oh, ow = math.ceil(ih / sh), math.ceil(iw / sw)
pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0)
pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0)
if pad_h > 0 or pad_w > 0:
self.static_padding = nn.ZeroPad2d((pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2))
else:
self.static_padding = Identity()
def forward(self, x):
x = self.static_padding(x)
x = F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
return x
class Identity(nn.Module):
def __init__(self, ):
super(Identity, self).__init__()
def forward(self, input):
return input
########################################################################
############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ##############
########################################################################
def efficientnet_params(model_name):
""" Map EfficientNet model name to parameter coefficients. """
params_dict = {
# Coefficients: width,depth,res,dropout
'efficientnet-b0': (1.0, 1.0, 224, 0.2),
'efficientnet-b1': (1.0, 1.1, 240, 0.2),
'efficientnet-b2': (1.1, 1.2, 260, 0.3),
'efficientnet-b3': (1.2, 1.4, 300, 0.3),
'efficientnet-b4': (1.4, 1.8, 380, 0.4),
'efficientnet-b5': (1.6, 2.2, 456, 0.4),
'efficientnet-b6': (1.8, 2.6, 528, 0.5),
'efficientnet-b7': (2.0, 3.1, 600, 0.5),
}
return params_dict[model_name]
class BlockDecoder(object):
""" Block Decoder for readability, straight from the official TensorFlow repository """
@staticmethod
def _decode_block_string(block_string):
""" Gets a block through a string notation of arguments. """
assert isinstance(block_string, str)
ops = block_string.split('_')
options = {}
for op in ops:
splits = re.split(r'(\d.*)', op)
if len(splits) >= 2:
key, value = splits[:2]
options[key] = value
# Check stride
assert (('s' in options and len(options['s']) == 1) or
(len(options['s']) == 2 and options['s'][0] == options['s'][1]))
return BlockArgs(
kernel_size=int(options['k']),
num_repeat=int(options['r']),
input_filters=int(options['i']),
output_filters=int(options['o']),
expand_ratio=int(options['e']),
id_skip=('noskip' not in block_string),
se_ratio=float(options['se']) if 'se' in options else None,
stride=[int(options['s'][0])])
@staticmethod
def _encode_block_string(block):
"""Encodes a block to a string."""
args = [
'r%d' % block.num_repeat,
'k%d' % block.kernel_size,
's%d%d' % (block.strides[0], block.strides[1]),
'e%s' % block.expand_ratio,
'i%d' % block.input_filters,
'o%d' % block.output_filters
]
if 0 < block.se_ratio <= 1:
args.append('se%s' % block.se_ratio)
if block.id_skip is False:
args.append('noskip')
return '_'.join(args)
@staticmethod
def decode(string_list):
"""
Decodes a list of string notations to specify blocks inside the network.
:param string_list: a list of strings, each string is a notation of block
:return: a list of BlockArgs namedtuples of block args
"""
assert isinstance(string_list, list)
blocks_args = []
for block_string in string_list:
blocks_args.append(BlockDecoder._decode_block_string(block_string))
return blocks_args
@staticmethod
def encode(blocks_args):
"""
Encodes a list of BlockArgs to a list of strings.
:param blocks_args: a list of BlockArgs namedtuples of block args
:return: a list of strings, each string is a notation of block
"""
block_strings = []
for block in blocks_args:
block_strings.append(BlockDecoder._encode_block_string(block))
return block_strings
def efficientnet(width_coefficient=None, depth_coefficient=None, dropout_rate=0.2,
drop_connect_rate=0.2, image_size=None, num_classes=1000):
""" Creates a efficientnet model. """
blocks_args = [
'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
'r3_k5_s22_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
'r1_k3_s22_e6_i192_o320_se0.25',
]
blocks_args = BlockDecoder.decode(blocks_args)
global_params = GlobalParams(
batch_norm_momentum=0.99,
batch_norm_epsilon=1e-3,
dropout_rate=dropout_rate,
drop_connect_rate=drop_connect_rate,
# data_format='channels_last', # removed, this is always true in PyTorch
num_classes=num_classes,
width_coefficient=width_coefficient,
depth_coefficient=depth_coefficient,
depth_divisor=8,
min_depth=None,
image_size=image_size,
)
return blocks_args, global_params
def get_model_params(model_name, override_params):
""" Get the block args and global params for a given model """
if model_name.startswith('efficientnet'):
w, d, s, p = efficientnet_params(model_name)
# note: all models have drop connect rate = 0.2
blocks_args, global_params = efficientnet(
width_coefficient=w, depth_coefficient=d, dropout_rate=p, image_size=s)
else:
raise NotImplementedError('model name is not pre-defined: %s' % model_name)
if override_params:
# ValueError will be raised here if override_params has fields not included in global_params.
global_params = global_params._replace(**override_params)
return blocks_args, global_params
url_map = {
'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b0-355c32eb.pth',
'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b1-f1951068.pth',
'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b2-8bb594d6.pth',
'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b3-5fb5a3c3.pth',
'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b4-6ed6700e.pth',
'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b5-b6417697.pth',
'efficientnet-b6': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b6-c76e70fd.pth',
'efficientnet-b7': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b7-dcc49843.pth',
}
def load_pretrained_weights(model, model_name, load_fc=True):
""" Loads pretrained weights, and downloads if loading for the first time. """
state_dict = model_zoo.load_url(url_map[model_name], map_location=lambda storage, loc: storage)
if load_fc:
model.load_state_dict(state_dict)
else:
state_dict.pop('_fc.weight')
state_dict.pop('_fc.bias')
res = model.load_state_dict(state_dict, strict=False)
assert set(res.missing_keys) == set(['_fc.weight', '_fc.bias']), 'issue loading pretrained weights'
print('Loaded pretrained weights for {}'.format(model_name))
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
class ConvLayer(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel=3, stride=1, dropout=0.1):
super().__init__()
self.add_module('conv', nn.Conv2d(in_channels, out_channels, kernel_size=kernel,
stride=stride, padding=kernel//2, bias = False))
self.add_module('norm', nn.BatchNorm2d(out_channels))
self.add_module('relu', nn.ReLU(inplace=True))
def forward(self, x):
return super().forward(x)
class HarDBlock(nn.Module):
def get_link(self, layer, base_ch, growth_rate, grmul):
if layer == 0:
return base_ch, 0, []
out_channels = growth_rate
link = []
for i in range(10):
dv = 2 ** i
if layer % dv == 0:
k = layer - dv
link.append(k)
if i > 0:
out_channels *= grmul
out_channels = int(int(out_channels + 1) / 2) * 2
in_channels = 0
for i in link:
ch,_,_ = self.get_link(i, base_ch, growth_rate, grmul)
in_channels += ch
return out_channels, in_channels, link
def get_out_ch(self):
return self.out_channels
def __init__(self, in_channels, growth_rate, grmul, n_layers, keepBase=False, residual_out=False):
super().__init__()
self.keepBase = keepBase
self.links = []
layers_ = []
self.out_channels = 0 # if upsample else in_channels
for i in range(n_layers):
outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul)
self.links.append(link)
use_relu = residual_out
layers_.append(ConvLayer(inch, outch))
if (i % 2 == 0) or (i == n_layers - 1):
self.out_channels += outch
#print("Blk out =",self.out_channels)
self.layers = nn.ModuleList(layers_)
def forward(self, x):
layers_ = [x]
for layer in range(len(self.layers)):
link = self.links[layer]
tin = []
for i in link:
tin.append(layers_[i])
if len(tin) > 1:
x = torch.cat(tin, 1)
else:
x = tin[0]
out = self.layers[layer](x)
layers_.append(out)
t = len(layers_)
out_ = []
for i in range(t):
if (i == 0 and self.keepBase) or \
(i == t-1) or (i%2 == 1):
out_.append(layers_[i])
out = torch.cat(out_, 1)
return out
class TransitionUp(nn.Module):
def __init__(self, in_channels, out_channels):
super().__init__()
#print("upsample",in_channels, out_channels)
def forward(self, x, skip, concat=True):
out = F.interpolate(
x,
size=(skip.size(2), skip.size(3)),
mode="bilinear",
align_corners=True,
)
if concat:
out = torch.cat([out, skip], 1)
return out
class hardnet(nn.Module):
def __init__(self):
super(hardnet, self).__init__()
first_ch = [16,24,32,48]
ch_list = [ 64, 96, 160, 224, 320]
grmul = 1.7
gr = [ 10,16,18,24,32]
n_layers = [ 4, 4, 8, 8, 8]
blks = len(n_layers)
self.shortcut_layers = []
self.base = nn.ModuleList([])
self.base.append (
ConvLayer(in_channels=3, out_channels=first_ch[0], kernel=3,
stride=2) )
self.base.append ( ConvLayer(first_ch[0], first_ch[1], kernel=3) )
self.base.append ( ConvLayer(first_ch[1], first_ch[2], kernel=3, stride=2) )
self.base.append ( ConvLayer(first_ch[2], first_ch[3], kernel=3) )
skip_connection_channel_counts = []
ch = first_ch[3]
for i in range(blks):
blk = HarDBlock(ch, gr[i], grmul, n_layers[i])
ch = blk.get_out_ch()
skip_connection_channel_counts.append(ch)
self.base.append ( blk )
if i < blks-1:
self.shortcut_layers.append(len(self.base)-1)
self.base.append ( ConvLayer(ch, ch_list[i], kernel=1) )
ch = ch_list[i]
if i < blks-1:
self.base.append ( nn.AvgPool2d(kernel_size=2, stride=2) )
cur_channels_count = ch
prev_block_channels = ch
n_blocks = blks-1
self.n_blocks = n_blocks
#######################
# Upsampling path #
#######################
self.transUpBlocks = nn.ModuleList([])
self.denseBlocksUp = nn.ModuleList([])
self.conv1x1_up = nn.ModuleList([])
for i in range(n_blocks-1,-1,-1):
self.transUpBlocks.append(TransitionUp(prev_block_channels, prev_block_channels))
cur_channels_count = prev_block_channels + skip_connection_channel_counts[i]
self.conv1x1_up.append(ConvLayer(cur_channels_count, cur_channels_count//2, kernel=1))
cur_channels_count = cur_channels_count//2
blk = HarDBlock(cur_channels_count, gr[i], grmul, n_layers[i])
self.denseBlocksUp.append(blk)
prev_block_channels = blk.get_out_ch()
cur_channels_count = prev_block_channels
def forward(self, x):
skip_connections = []
size_in = x.size()
for i in range(len(self.base)):
x = self.base[i](x)
if i in self.shortcut_layers:
skip_connections.append(x)
out = x
for i in range(self.n_blocks):
skip = skip_connections.pop()
out = self.transUpBlocks[i](out, skip, True)
out = self.conv1x1_up[i](out)
out = self.denseBlocksUp[i](out)
return out
def get_hard_net(num_layers, cfg):
model = hardnet()
return model
# ------------------------------------------------------------------------------
# This code is base on
# CornerNet (https://github.com/princeton-vl/CornerNet)
# Copyright (c) 2018, University of Michigan
# Licensed under the BSD 3-Clause License
# ------------------------------------------------------------------------------
from __future__ import absolute_import, division, print_function
import numpy as np
import torch
import torch.nn as nn
class convolution(nn.Module):
def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
super(convolution, self).__init__()
pad = (k - 1) // 2
self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn)
self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential()
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
conv = self.conv(x)
bn = self.bn(conv)
relu = self.relu(bn)
return relu
class fully_connected(nn.Module):
def __init__(self, inp_dim, out_dim, with_bn=True):
super(fully_connected, self).__init__()
self.with_bn = with_bn
self.linear = nn.Linear(inp_dim, out_dim)
if self.with_bn:
self.bn = nn.BatchNorm1d(out_dim)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
linear = self.linear(x)
bn = self.bn(linear) if self.with_bn else linear
relu = self.relu(bn)
return relu
class residual(nn.Module):
def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
super(residual, self).__init__()
self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False)
self.bn1 = nn.BatchNorm2d(out_dim)
self.relu1 = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False)
self.bn2 = nn.BatchNorm2d(out_dim)
self.skip = nn.Sequential(
nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False),
nn.BatchNorm2d(out_dim)
) if stride != 1 or inp_dim != out_dim else nn.Sequential()
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
conv1 = self.conv1(x)
bn1 = self.bn1(conv1)
relu1 = self.relu1(bn1)
conv2 = self.conv2(relu1)
bn2 = self.bn2(conv2)
skip = self.skip(x)
return self.relu(bn2 + skip)
def make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
layers = [layer(k, inp_dim, out_dim, **kwargs)]
for _ in range(1, modules):
layers.append(layer(k, out_dim, out_dim, **kwargs))
return nn.Sequential(*layers)
def make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
layers = []
for _ in range(modules - 1):
layers.append(layer(k, inp_dim, inp_dim, **kwargs))
layers.append(layer(k, inp_dim, out_dim, **kwargs))
return nn.Sequential(*layers)
class MergeUp(nn.Module):
def forward(self, up1, up2):
return up1 + up2
def make_merge_layer(dim):
return MergeUp()
# def make_pool_layer(dim):
# return nn.MaxPool2d(kernel_size=2, stride=2)
def make_pool_layer(dim):
return nn.Sequential()
def make_unpool_layer(dim):
return nn.Upsample(scale_factor=2)
def make_kp_layer(cnv_dim, curr_dim, out_dim):
return nn.Sequential(
convolution(3, cnv_dim, curr_dim, with_bn=False),
nn.Conv2d(curr_dim, out_dim, (1, 1))
)
def make_inter_layer(dim):
return residual(3, dim, dim)
def make_cnv_layer(inp_dim, out_dim):
return convolution(3, inp_dim, out_dim)
class kp_module(nn.Module):
def __init__(
self, n, dims, modules, layer=residual,
make_up_layer=make_layer, make_low_layer=make_layer,
make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
make_merge_layer=make_merge_layer, **kwargs
):
super(kp_module, self).__init__()
self.n = n
curr_mod = modules[0]
next_mod = modules[1]
curr_dim = dims[0]
next_dim = dims[1]
self.up1 = make_up_layer(
3, curr_dim, curr_dim, curr_mod,
layer=layer, **kwargs
)
self.max1 = make_pool_layer(curr_dim)
self.low1 = make_hg_layer(
3, curr_dim, next_dim, curr_mod,
layer=layer, **kwargs
)
self.low2 = kp_module(
n - 1, dims[1:], modules[1:], layer=layer,
make_up_layer=make_up_layer,
make_low_layer=make_low_layer,
make_hg_layer=make_hg_layer,
make_hg_layer_revr=make_hg_layer_revr,
make_pool_layer=make_pool_layer,
make_unpool_layer=make_unpool_layer,
make_merge_layer=make_merge_layer,
**kwargs
) if self.n > 1 else \
make_low_layer(
3, next_dim, next_dim, next_mod,
layer=layer, **kwargs
)
self.low3 = make_hg_layer_revr(
3, next_dim, curr_dim, curr_mod,
layer=layer, **kwargs
)
self.up2 = make_unpool_layer(curr_dim)
self.merge = make_merge_layer(curr_dim)
def forward(self, x):
up1 = self.up1(x)
max1 = self.max1(x)
low1 = self.low1(max1)
low2 = self.low2(low1)
low3 = self.low3(low2)
up2 = self.up2(low3)
return self.merge(up1, up2)
class exkp(nn.Module):
def __init__(
self, n, nstack, dims, modules, heads, pre=None, cnv_dim=256,
make_tl_layer=None, make_br_layer=None,
make_cnv_layer=make_cnv_layer, make_heat_layer=make_kp_layer,
make_tag_layer=make_kp_layer, make_regr_layer=make_kp_layer,
make_up_layer=make_layer, make_low_layer=make_layer,
make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer,
kp_layer=residual
):
super(exkp, self).__init__()
self.nstack = nstack
self.heads = heads
curr_dim = dims[0]
self.pre = nn.Sequential(
convolution(7, 3, 128, stride=2),
residual(3, 128, 256, stride=2)
) if pre is None else pre
self.kps = nn.ModuleList([
kp_module(
n, dims, modules, layer=kp_layer,
make_up_layer=make_up_layer,
make_low_layer=make_low_layer,
make_hg_layer=make_hg_layer,
make_hg_layer_revr=make_hg_layer_revr,
make_pool_layer=make_pool_layer,
make_unpool_layer=make_unpool_layer,
make_merge_layer=make_merge_layer
) for _ in range(nstack)
])
self.cnvs = nn.ModuleList([
make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack)
])
self.inters = nn.ModuleList([
make_inter_layer(curr_dim) for _ in range(nstack - 1)
])
self.inters_ = nn.ModuleList([
nn.Sequential(
nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False),
nn.BatchNorm2d(curr_dim)
) for _ in range(nstack - 1)
])
self.cnvs_ = nn.ModuleList([
nn.Sequential(
nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False),
nn.BatchNorm2d(curr_dim)
) for _ in range(nstack - 1)
])
## keypoint heatmaps
for head in heads.keys():
if 'hm' in head:
module = nn.ModuleList([
make_heat_layer(
cnv_dim, curr_dim, heads[head]) for _ in range(nstack)
])
self.__setattr__(head, module)
for heat in self.__getattr__(head):
heat[-1].bias.data.fill_(-2.19)
else:
module = nn.ModuleList([
make_regr_layer(
cnv_dim, curr_dim, heads[head]) for _ in range(nstack)
])
self.__setattr__(head, module)
self.relu = nn.ReLU(inplace=True)
def forward(self, image):
# print('image shape', image.shape)
inter = self.pre(image)
outs = []
for ind in range(self.nstack):
kp_, cnv_ = self.kps[ind], self.cnvs[ind]
kp = kp_(inter)
cnv = cnv_(kp)
out = {}
for head in self.heads:
layer = self.__getattr__(head)[ind]
y = layer(cnv)
out[head] = y
outs.append(out)
if ind < self.nstack - 1:
inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv)
inter = self.relu(inter)
inter = self.inters[ind](inter)
return outs
def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs):
layers = [layer(kernel, dim0, dim1, stride=2)]
layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)]
return nn.Sequential(*layers)
class HourglassNet(exkp):
def __init__(self, heads, num_stacks=2):
n = 5
dims = [256, 256, 384, 384, 384, 512]
modules = [2, 2, 2, 2, 2, 4]
super(HourglassNet, self).__init__(
n, num_stacks, dims, modules, heads,
make_tl_layer=None,
make_br_layer=None,
make_pool_layer=make_pool_layer,
make_hg_layer=make_hg_layer,
kp_layer=residual, cnv_dim=256
)
def get_large_hourglass_net(num_layers, heads, head_conv):
model = HourglassNet(heads, 2)
return model
from torch import nn
import torch.utils.model_zoo as model_zoo
from collections import OrderedDict
import math
__all__ = ['MobileNetV2']
model_urls = {
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
}
def _make_divisible(v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
padding = (kernel_size - 1) // 2
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
# pw
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self,width_mult=1.0,round_nearest=8,):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = 32
inverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1], # 0
[6, 24, 2, 2], # 1
[6, 32, 3, 2], # 2
[6, 64, 4, 2], # 3
[6, 96, 3, 1], # 4
[6, 160, 3, 2],# 5
[6, 320, 1, 1],# 6
]
self.feat_id = [1,2,4,6]
self.feat_channel = []
# only check the first element, assuming user knows t,c,n,s are required
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
raise ValueError("inverted_residual_setting should be non-empty "
"or a 4-element list, got {}".format(inverted_residual_setting))
# building first layer
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
features = [ConvBNReLU(3, input_channel, stride=2)]
# building inverted residual blocks
for id,(t, c, n, s) in enumerate(inverted_residual_setting):
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
if id in self.feat_id :
self.__setattr__("feature_%d"%id,nn.Sequential(*features))
self.feat_channel.append(output_channel)
features = []
# weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
def forward(self, x):
y = []
for id in self.feat_id:
x = self.__getattr__("feature_%d"%id)(x)
y.append(x)
return y
def load_model(model,state_dict):
new_model=model.state_dict()
new_keys = list(new_model.keys())
old_keys = list(state_dict.keys())
restore_dict = OrderedDict()
for id in range(len(new_keys)):
restore_dict[new_keys[id]] = state_dict[old_keys[id]]
model.load_state_dict(restore_dict)
def dict2list(func):
def wrap(*args, **kwargs):
self = args[0]
x = args[1]
ret_list = []
ret = func(self, x)
for k, v in ret[0].items():
ret_list.append(v)
return ret_list
return wrap
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
def fill_fc_weights(layers):
for m in layers.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, std=0.001)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
class IDAUp(nn.Module):
def __init__(self, out_dim, channel):
super(IDAUp, self).__init__()
self.out_dim = out_dim
self.up = nn.Sequential(
nn.ConvTranspose2d(
out_dim, out_dim, kernel_size=2, stride=2, padding=0,
output_padding=0, groups=out_dim, bias=False),
nn.BatchNorm2d(out_dim,eps=0.001,momentum=0.1),
nn.ReLU())
self.conv = nn.Sequential(
nn.Conv2d(channel, out_dim,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(out_dim,eps=0.001,momentum=0.1),
nn.ReLU(inplace=True))
def forward(self, layers):
layers = list(layers)
x = self.up(layers[0])
y = self.conv(layers[1])
out = x + y
return out
class MobileNetUp(nn.Module):
def __init__(self, channels, out_dim = 24):
super(MobileNetUp, self).__init__()
channels = channels[::-1]
self.conv = nn.Sequential(
nn.Conv2d(channels[0], out_dim,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(out_dim,eps=0.001,momentum=0.1),
nn.ReLU(inplace=True))
self.conv_last = nn.Sequential(
nn.Conv2d(out_dim,out_dim,
kernel_size=3, stride=1, padding=1 ,bias=False),
nn.BatchNorm2d(out_dim,eps=1e-5,momentum=0.01),
nn.ReLU(inplace=True))
for i,channel in enumerate(channels[1:]):
setattr(self,'up_%d'%(i),IDAUp(out_dim,channel))
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m,nn.ConvTranspose2d):
fill_up_weights(m)
def forward(self, layers):
layers = list(layers)
assert len(layers) > 1
x = self.conv(layers[-1])
for i in range(0,len(layers)-1):
up = getattr(self, 'up_{}'.format(i))
x = up([x,layers[len(layers)-2-i]])
x = self.conv_last(x)
return x
class MobileNetSeg(nn.Module):
def __init__(self, base_name,heads,head_conv=24, pretrained = True):
super(MobileNetSeg, self).__init__()
self.heads = heads
self.base = globals()[base_name](
pretrained=pretrained)
channels = self.base.feat_channel
self.dla_up = MobileNetUp(channels, out_dim=head_conv)
for head in self.heads:
classes = self.heads[head]
fc =nn.Conv2d(head_conv, classes,
kernel_size=1, stride=1,
padding=0, bias=True)
if 'hm' in head:
fc.bias.data.fill_(-2.19)
else:
nn.init.normal_(fc.weight, std=0.001)
nn.init.constant_(fc.bias, 0)
self.__setattr__(head, fc)
# @dict2list # 转onnx的时候需要将输出由dict转成list模式
def forward(self, x):
x = self.base(x)
x = self.dla_up(x)
ret = {}
for head in self.heads:
ret[head] = self.__getattr__(head)(x)
return [ret]
def mobilenetv2_10(pretrained=True, **kwargs):
model = MobileNetV2(width_mult=1.0)
if pretrained:
state_dict = model_zoo.load_url(model_urls['mobilenet_v2'],
progress=True)
load_model(model,state_dict)
return model
def mobilenetv2_5(pretrained=False, **kwargs):
model = MobileNetV2(width_mult=0.5)
if pretrained:
print('This version does not have pretrain weights.')
return model
# num_layers : [10 , 5]
def get_mobile_net(num_layers, heads, head_conv=24):
model = MobileNetSeg('mobilenetv2_{}'.format(num_layers), heads,
pretrained=True,
head_conv=head_conv)
return model
if __name__ == '__main__':
import torch
input = torch.zeros([1,3,416,416])
model = get_mobile_net(5,{'hm':1,'reg':2,'wh':2},head_conv=24) # hm reference for the classes of objects//这个头文件只能做矩形框检测
res = model(input)
print(res.shape)
from torch import nn
import torch.utils.model_zoo as model_zoo
from collections import OrderedDict
import math
__all__ = ['MobileNetV2']
model_urls = {
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
}
def _make_divisible(v, divisor, min_value=None):
"""
This function is taken from the original tf repo.
It ensures that all layers have a channel number that is divisible by 8
It can be seen here:
https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
:param v:
:param divisor:
:param min_value:
:return:
"""
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
padding = (kernel_size - 1) // 2
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU(inplace=True) #replace ReLU6
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
# pw
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
# dw
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# pw-linear
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self, width_mult=1.0, round_nearest=8, ):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = 32
inverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1], # 0
[6, 24, 2, 2], # 1
[6, 32, 3, 2], # 2
[6, 64, 4, 2], # 3
[6, 96, 3, 1], # 4
[6, 160, 3, 2], # 5
[6, 320, 1, 1], # 6
]
self.feat_id = [1, 2, 4, 6]
self.feat_channel = []
# only check the first element, assuming user knows t,c,n,s are required
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
raise ValueError("inverted_residual_setting should be non-empty "
"or a 4-element list, got {}".format(inverted_residual_setting))
# building first layer
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
features = [ConvBNReLU(3, input_channel, stride=2)]
# building inverted residual blocks
for id, (t, c, n, s) in enumerate(inverted_residual_setting):
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
if id in self.feat_id:
self.__setattr__("feature_%d" % id, nn.Sequential(*features))
self.feat_channel.append(output_channel)
features = []
# weight initialization
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
def forward(self, x):
y = []
for id in self.feat_id:
x = self.__getattr__("feature_%d" % id)(x)
y.append(x)
return y
def load_model(model, state_dict):
new_model = model.state_dict()
new_keys = list(new_model.keys())
old_keys = list(state_dict.keys())
restore_dict = OrderedDict()
for id in range(len(new_keys)):
restore_dict[new_keys[id]] = state_dict[old_keys[id]]
model.load_state_dict(restore_dict)
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
def fill_fc_weights(layers):
for m in layers.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, std=0.001)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
class IDAUp(nn.Module):
def __init__(self, out_dim, channel):
super(IDAUp, self).__init__()
self.out_dim = out_dim
self.up = nn.Sequential(
nn.ConvTranspose2d(
out_dim, out_dim, kernel_size=2, stride=2, padding=0,
output_padding=0, groups=out_dim, bias=False),
nn.BatchNorm2d(out_dim, eps=0.001, momentum=0.1),
nn.ReLU())
self.conv = nn.Sequential(
nn.Conv2d(channel, out_dim,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(out_dim, eps=0.001, momentum=0.1),
nn.ReLU(inplace=True))
def forward(self, layers):
layers = list(layers)
x = self.up(layers[0])
y = self.conv(layers[1])
out = x + y
return out
class MobileNetUp(nn.Module):
def __init__(self, channels, out_dim=24):
super(MobileNetUp, self).__init__()
channels = channels[::-1]
self.conv = nn.Sequential(
nn.Conv2d(channels[0], out_dim,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(out_dim, eps=0.001, momentum=0.1),
nn.ReLU(inplace=True))
self.conv_last = nn.Sequential(
nn.Conv2d(out_dim, out_dim,
kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(out_dim, eps=1e-5, momentum=0.01),
nn.ReLU(inplace=True))
for i, channel in enumerate(channels[1:]):
setattr(self, 'up_%d' % (i), IDAUp(out_dim, channel))
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.ConvTranspose2d):
fill_up_weights(m)
def forward(self, layers):
layers = list(layers)
assert len(layers) > 1
x = self.conv(layers[-1])
for i in range(0, len(layers) - 1):
up = getattr(self, 'up_{}'.format(i))
x = up([x, layers[len(layers) - 2 - i]])
x = self.conv_last(x)
return x
class MobileNetSeg(nn.Module):
def __init__(self, base_name, head_conv=24, pretrained=True):
super(MobileNetSeg, self).__init__()
# self.heads = {'hm':1,'reg':2,'wh':2}
self.base = globals()[base_name](
pretrained=pretrained)
channels = self.base.feat_channel
self.dla_up = MobileNetUp(channels, out_dim=head_conv)
def forward(self, x):
x = self.base(x)
x = self.dla_up(x)
return x
def mobilenetv2_10(pretrained=True, **kwargs):
model = MobileNetV2(width_mult=1.0)
if pretrained:
state_dict = model_zoo.load_url(model_urls['mobilenet_v2'],
progress=True)
load_model(model, state_dict)
return model
def mobilenetv2_5(pretrained=False, **kwargs):
model = MobileNetV2(width_mult=0.5)
if pretrained:
print('This version does not have pretrain weights.')
return model
# num_layers : [10 , 5]
def get_mobile_pose_netv2(num_layers, cfg):
num_layers = 10
model = MobileNetSeg('mobilenetv2_{}'.format(num_layers),
pretrained=True,
head_conv=cfg.MODEL.INTERMEDIATE_CHANNEL)
return model
from __future__ import absolute_import, division, print_function
import math
import torch.nn.functional as F
from torch import nn
from torch.nn import init
from .DCNv2.dcn_v2 import DCN
class DeformConv(nn.Module):
def __init__(self, chi, cho):
super(DeformConv, self).__init__()
self.actf = nn.Sequential(
nn.BatchNorm2d(cho, momentum=0.1),
nn.ReLU(inplace=True)
)
self.conv = DCN(chi, cho, kernel_size=(3, 3), stride=1, padding=1, dilation=1, deformable_groups=1)
def forward(self, x):
x = self.conv(x)
x = self.actf(x)
return x
class IDAUp(nn.Module):
def __init__(self, o, channels, up_f):
super(IDAUp, self).__init__()
for i in range(1, len(channels)):
c = channels[i]
f = int(up_f[i])
proj = DeformConv(c, o)
node = DeformConv(o, o)
up = nn.ConvTranspose2d(o, o, f * 2, stride=f,
padding=f // 2, output_padding=0,
groups=o, bias=False)
fill_up_weights(up)
setattr(self, 'proj_' + str(i), proj)
setattr(self, 'up_' + str(i), up)
setattr(self, 'node_' + str(i), node)
def forward(self, layers, startp, endp):
for i in range(startp + 1, endp):
upsample = getattr(self, 'up_' + str(i - startp))
project = getattr(self, 'proj_' + str(i - startp))
layers[i] = upsample(project(layers[i]))
node = getattr(self, 'node_' + str(i - startp))
layers[i] = node(layers[i] + layers[i - 1])
class hswish(nn.Module):
def forward(self, x):
out = x * F.relu6(x + 3, inplace=True) / 6
return out
class hsigmoid(nn.Module):
def forward(self, x):
out = F.relu6(x + 3, inplace=True) / 6
return out
class SeModule(nn.Module):
def __init__(self, in_size, reduction=4):
super(SeModule, self).__init__()
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(in_size, in_size // reduction, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(in_size // reduction),
nn.ReLU(inplace=True),
nn.Conv2d(in_size // reduction, in_size, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(in_size),
hsigmoid()
)
def forward(self, x):
return x * self.se(x)
class Block(nn.Module):
'''expand + depthwise + pointwise'''
def __init__(self, kernel_size, in_size, expand_size, out_size, nolinear, semodule, stride):
super(Block, self).__init__()
self.stride = stride
self.se = semodule
self.conv1 = nn.Conv2d(in_size, expand_size, kernel_size=1, stride=1, padding=0, bias=False)
self.bn1 = nn.BatchNorm2d(expand_size)
self.nolinear1 = nolinear
self.conv2 = nn.Conv2d(expand_size, expand_size, kernel_size=kernel_size, stride=stride,
padding=kernel_size // 2, groups=expand_size, bias=False)
self.bn2 = nn.BatchNorm2d(expand_size)
self.nolinear2 = nolinear
self.conv3 = nn.Conv2d(expand_size, out_size, kernel_size=1, stride=1, padding=0, bias=False)
self.bn3 = nn.BatchNorm2d(out_size)
self.shortcut = nn.Sequential()
if stride == 1 and in_size != out_size:
self.shortcut = nn.Sequential(
nn.Conv2d(in_size, out_size, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(out_size),
)
def forward(self, x):
out = self.nolinear1(self.bn1(self.conv1(x)))
out = self.nolinear2(self.bn2(self.conv2(out)))
out = self.bn3(self.conv3(out))
if self.se != None:
out = self.se(out)
out = out + self.shortcut(x) if self.stride == 1 else out
return out
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
class MobileNetV3(nn.Module):
def __init__(self, final_kernel):
super(MobileNetV3, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=2, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.hs1 = hswish()
self.bneck0 = nn.Sequential(
Block(3, 16, 16, 16, nn.ReLU(inplace=True), None, 1),
Block(3, 16, 64, 24, nn.ReLU(inplace=True), None, 2),
Block(3, 24, 72, 24, nn.ReLU(inplace=True), None, 1),
)
self.bneck1 = nn.Sequential(
Block(5, 24, 72, 40, nn.ReLU(inplace=True), SeModule(40), 2),
Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
Block(5, 40, 120, 40, nn.ReLU(inplace=True), SeModule(40), 1),
)
self.bneck2 = nn.Sequential(
Block(3, 40, 240, 80, hswish(), None, 2),
Block(3, 80, 200, 80, hswish(), None, 1),
Block(3, 80, 184, 80, hswish(), None, 1),
Block(3, 80, 184, 80, hswish(), None, 1),
Block(3, 80, 480, 112, hswish(), SeModule(112), 1),
Block(3, 112, 672, 112, hswish(), SeModule(112), 1),
Block(5, 112, 672, 160, hswish(), SeModule(160), 1),
)
self.bneck3 = nn.Sequential(
Block(5, 160, 672, 160, hswish(), SeModule(160), 2),
Block(5, 160, 960, 160, hswish(), SeModule(160), 1),
)
self.conv2 = nn.Conv2d(160, 960, kernel_size=1, stride=1, padding=0, bias=False)
self.bn2 = nn.BatchNorm2d(960)
self.hs2 = hswish()
self.ida_up = IDAUp(24, [24, 40, 160, 960],
[2 ** i for i in range(4)])
self.init_params()
def init_params(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
out = self.hs1(self.bn1(self.conv1(x)))
out0 = self.bneck0(out)
out1 = self.bneck1(out0)
out2 = self.bneck2(out1)
out3 = self.bneck3(out2)
out3 = self.hs2(self.bn2(self.conv2(out3)))
out = [out0, out1, out2, out3]
y = []
for i in range(4):
y.append(out[i].clone())
self.ida_up(y, 0, len(y))
return y[-1]
def get_mobilev3_pose_net(num_layers, cfg):
model = MobileNetV3(final_kernel=1)
return model
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# Modified by Xingyi Zhou
# ------------------------------------------------------------------------------
from __future__ import absolute_import, division, print_function
import os
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
BN_MOMENTUM = 0.1
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion,
momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
def fill_fc_weights(layers):
for m in layers.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, std=0.001)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
class PoseResNet(nn.Module):
def __init__(self, block, layers, **kwargs):
self.inplanes = 64
self.deconv_with_bias = False
super(PoseResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
# used for deconv layers
self.deconv_layers = self._make_deconv_layer(
3,
[256, 256, 256],
[4, 4, 4],
)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def _get_deconv_cfg(self, deconv_kernel, index):
if deconv_kernel == 4:
padding = 1
output_padding = 0
elif deconv_kernel == 3:
padding = 1
output_padding = 1
elif deconv_kernel == 2:
padding = 0
output_padding = 0
return deconv_kernel, padding, output_padding
def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
assert num_layers == len(num_filters), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
assert num_layers == len(num_kernels), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
layers = []
for i in range(num_layers):
kernel, padding, output_padding = \
self._get_deconv_cfg(num_kernels[i], i)
planes = num_filters[i]
layers.append(
nn.ConvTranspose2d(
in_channels=self.inplanes,
out_channels=planes,
kernel_size=kernel,
stride=2,
padding=padding,
output_padding=output_padding,
bias=self.deconv_with_bias))
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
layers.append(nn.ReLU(inplace=True))
self.inplanes = planes
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.deconv_layers(x)
return x
def init_weights(self, num_layers, pretrained=True):
if pretrained:
# print('=> init resnet deconv weights from normal distribution')
for _, m in self.deconv_layers.named_modules():
if isinstance(m, nn.ConvTranspose2d):
# print('=> init {}.weight as normal(0, 0.001)'.format(name))
# print('=> init {}.bias as 0'.format(name))
nn.init.normal_(m.weight, std=0.001)
if self.deconv_with_bias:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
# print('=> init {}.weight as 1'.format(name))
# print('=> init {}.bias as 0'.format(name))
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
#pretrained_state_dict = torch.load(pretrained)
url = model_urls['resnet{}'.format(num_layers)]
pretrained_state_dict = model_zoo.load_url(url)
print('=> loading pretrained model {}'.format(url))
self.load_state_dict(pretrained_state_dict, strict=False)
else:
print('=> imagenet pretrained model dose not exist')
print('=> please download it first')
raise ValueError('imagenet pretrained model does not exist')
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
34: (BasicBlock, [3, 4, 6, 3]),
50: (Bottleneck, [3, 4, 6, 3]),
101: (Bottleneck, [3, 4, 23, 3]),
152: (Bottleneck, [3, 8, 36, 3])}
def get_resnet(num_layers, cfg):
block_class, layers = resnet_spec[num_layers]
model = PoseResNet(block_class, layers)
model.init_weights(num_layers, pretrained=True)
return model
from __future__ import absolute_import, division, print_function
import logging
import math
import os
from os.path import join
import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
from torch import nn
from .DCNv2.dcn_v2 import DCN
BN_MOMENTUM = 0.1
logger = logging.getLogger(__name__)
def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):
return join('http://dl.yf.io/dla/models', data, '{}-{}.pth'.format(name, hash))
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3,
stride=stride, padding=dilation,
bias=False, dilation=dilation)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=dilation,
bias=False, dilation=dilation)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 2
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(Bottleneck, self).__init__()
expansion = Bottleneck.expansion
bottle_planes = planes // expansion
self.conv1 = nn.Conv2d(inplanes, bottle_planes,
kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
stride=stride, padding=dilation,
bias=False, dilation=dilation)
self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(bottle_planes, planes,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class BottleneckX(nn.Module):
expansion = 2
cardinality = 32
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(BottleneckX, self).__init__()
cardinality = BottleneckX.cardinality
# dim = int(math.floor(planes * (BottleneckV5.expansion / 64.0)))
# bottle_planes = dim * cardinality
bottle_planes = planes * cardinality // 32
self.conv1 = nn.Conv2d(inplanes, bottle_planes,
kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
stride=stride, padding=dilation, bias=False,
dilation=dilation, groups=cardinality)
self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(bottle_planes, planes,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class Root(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, residual):
super(Root, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, 1,
stride=1, bias=False, padding=(kernel_size - 1) // 2)
self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.residual = residual
def forward(self, *x):
children = x
x = self.conv(torch.cat(x, 1))
x = self.bn(x)
if self.residual:
x += children[0]
x = self.relu(x)
return x
class Tree(nn.Module):
def __init__(self, levels, block, in_channels, out_channels, stride=1,
level_root=False, root_dim=0, root_kernel_size=1,
dilation=1, root_residual=False):
super(Tree, self).__init__()
if root_dim == 0:
root_dim = 2 * out_channels
if level_root:
root_dim += in_channels
if levels == 1:
self.tree1 = block(in_channels, out_channels, stride,
dilation=dilation)
self.tree2 = block(out_channels, out_channels, 1,
dilation=dilation)
else:
self.tree1 = Tree(levels - 1, block, in_channels, out_channels,
stride, root_dim=0,
root_kernel_size=root_kernel_size,
dilation=dilation, root_residual=root_residual)
self.tree2 = Tree(levels - 1, block, out_channels, out_channels,
root_dim=root_dim + out_channels,
root_kernel_size=root_kernel_size,
dilation=dilation, root_residual=root_residual)
if levels == 1:
self.root = Root(root_dim, out_channels, root_kernel_size,
root_residual)
self.level_root = level_root
self.root_dim = root_dim
self.downsample = None
self.project = None
self.levels = levels
if stride > 1:
self.downsample = nn.MaxPool2d(stride, stride=stride)
if in_channels != out_channels:
self.project = nn.Sequential(
nn.Conv2d(in_channels, out_channels,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
)
def forward(self, x, residual=None, children=None):
children = [] if children is None else children
bottom = self.downsample(x) if self.downsample else x
residual = self.project(bottom) if self.project else bottom
if self.level_root:
children.append(bottom)
x1 = self.tree1(x, residual)
if self.levels == 1:
x2 = self.tree2(x1)
x = self.root(x2, x1, *children)
else:
children.append(x1)
x = self.tree2(x1, children=children)
return x
class DLA(nn.Module):
def __init__(self, levels, channels, num_classes=1000,
block=BasicBlock, residual_root=False, linear_root=False):
super(DLA, self).__init__()
self.channels = channels
self.num_classes = num_classes
self.base_layer = nn.Sequential(
nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
padding=3, bias=False),
nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM),
nn.ReLU(inplace=True))
self.level0 = self._make_conv_level(
channels[0], channels[0], levels[0])
self.level1 = self._make_conv_level(
channels[0], channels[1], levels[1], stride=2)
self.level2 = Tree(levels[2], block, channels[1], channels[2], 2,
level_root=False,
root_residual=residual_root)
self.level3 = Tree(levels[3], block, channels[2], channels[3], 2,
level_root=True, root_residual=residual_root)
self.level4 = Tree(levels[4], block, channels[3], channels[4], 2,
level_root=True, root_residual=residual_root)
self.level5 = Tree(levels[5], block, channels[4], channels[5], 2,
level_root=True, root_residual=residual_root)
# for m in self.modules():
# if isinstance(m, nn.Conv2d):
# n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
# m.weight.data.normal_(0, math.sqrt(2. / n))
# elif isinstance(m, nn.BatchNorm2d):
# m.weight.data.fill_(1)
# m.bias.data.zero_()
def _make_level(self, block, inplanes, planes, blocks, stride=1):
downsample = None
if stride != 1 or inplanes != planes:
downsample = nn.Sequential(
nn.MaxPool2d(stride, stride=stride),
nn.Conv2d(inplanes, planes,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(inplanes, planes, stride, downsample=downsample))
for i in range(1, blocks):
layers.append(block(inplanes, planes))
return nn.Sequential(*layers)
def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
modules = []
for i in range(convs):
modules.extend([
nn.Conv2d(inplanes, planes, kernel_size=3,
stride=stride if i == 0 else 1,
padding=dilation, bias=False, dilation=dilation),
nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)])
inplanes = planes
return nn.Sequential(*modules)
def forward(self, x):
y = []
x = self.base_layer(x)
for i in range(6):
x = getattr(self, 'level{}'.format(i))(x)
y.append(x)
return y
def load_pretrained_model(self, data='imagenet', name='dla34', hash='ba72cf86'):
# fc = self.fc
if name.endswith('.pth'):
model_weights = torch.load(data + name)
else:
model_url = get_model_url(data, name, hash)
model_weights = model_zoo.load_url(model_url)
num_classes = len(model_weights[list(model_weights.keys())[-1]])
self.fc = nn.Conv2d(
self.channels[-1], num_classes,
kernel_size=1, stride=1, padding=0, bias=True)
self.load_state_dict(model_weights)
# self.fc = fc
def dla34(pretrained=True, **kwargs): # DLA-34
model = DLA([1, 1, 1, 2, 2, 1],
[16, 32, 64, 128, 256, 512],
block=BasicBlock, **kwargs)
if pretrained:
model.load_pretrained_model(data='imagenet', name='dla34', hash='ba72cf86')
return model
class Identity(nn.Module):
def __init__(self):
super(Identity, self).__init__()
def forward(self, x):
return x
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
class DeformConv(nn.Module):
def __init__(self, chi, cho):
super(DeformConv, self).__init__()
self.actf = nn.Sequential(
nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
self.conv = DCN(chi, cho, kernel_size=(3,3), stride=1, padding=1, dilation=1, deformable_groups=1)
def forward(self, x):
x = self.conv(x)
x = self.actf(x)
return x
class IDAUp(nn.Module):
def __init__(self, o, channels, up_f):
super(IDAUp, self).__init__()
for i in range(1, len(channels)):
c = channels[i]
f = int(up_f[i])
proj = DeformConv(c, o)
node = DeformConv(o, o)
up = nn.ConvTranspose2d(o, o, f * 2, stride=f,
padding=f // 2, output_padding=0,
groups=o, bias=False)
fill_up_weights(up)
setattr(self, 'proj_' + str(i), proj)
setattr(self, 'up_' + str(i), up)
setattr(self, 'node_' + str(i), node)
def forward(self, layers, startp, endp):
for i in range(startp + 1, endp):
upsample = getattr(self, 'up_' + str(i - startp))
project = getattr(self, 'proj_' + str(i - startp))
layers[i] = upsample(project(layers[i]))
node = getattr(self, 'node_' + str(i - startp))
layers[i] = node(layers[i] + layers[i - 1])
class DLAUp(nn.Module):
def __init__(self, startp, channels, scales, in_channels=None):
super(DLAUp, self).__init__()
self.startp = startp
if in_channels is None:
in_channels = channels
self.channels = channels
channels = list(channels)
scales = np.array(scales, dtype=int)
for i in range(len(channels) - 1):
j = -i - 2
setattr(self, 'ida_{}'.format(i),
IDAUp(channels[j], in_channels[j:],
scales[j:] // scales[j]))
scales[j + 1:] = scales[j]
in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]
def forward(self, layers):
out = [layers[-1]] # start with 32
for i in range(len(layers) - self.startp - 1):
ida = getattr(self, 'ida_{}'.format(i))
ida(layers, len(layers) -i - 2, len(layers))
out.insert(0, layers[-1])
return out
class Interpolate(nn.Module):
def __init__(self, scale, mode):
super(Interpolate, self).__init__()
self.scale = scale
self.mode = mode
def forward(self, x):
x = F.interpolate(x, scale_factor=self.scale, mode=self.mode, align_corners=False)
return x
class DLASeg(nn.Module):
def __init__(self, base_name, pretrained, down_ratio, final_kernel,
last_level, out_channel=0):
super(DLASeg, self).__init__()
assert down_ratio in [2, 4, 8, 16]
self.first_level = int(np.log2(down_ratio))
self.last_level = last_level
self.base = globals()[base_name](pretrained=pretrained)
channels = self.base.channels
scales = [2 ** i for i in range(len(channels[self.first_level:]))]
self.dla_up = DLAUp(self.first_level, channels[self.first_level:], scales)
if out_channel == 0:
out_channel = channels[self.first_level]
self.ida_up = IDAUp(out_channel, channels[self.first_level:self.last_level],
[2 ** i for i in range(self.last_level - self.first_level)])
def forward(self, x):
x = self.base(x)
x = self.dla_up(x)
y = []
for i in range(self.last_level - self.first_level):
y.append(x[i].clone())
self.ida_up(y, 0, len(y))
x = y[-1]
return x
def get_pose_net(num_layers, cfg=None, down_ratio=4):
model = DLASeg('dla{}'.format(num_layers),
pretrained=True,
down_ratio=down_ratio,
final_kernel=1,
last_level=5)
return model
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (leoxiaobin@gmail.com)
# Modified by Bowen Cheng (bcheng9@illinois.edu)
# ------------------------------------------------------------------------------
from __future__ import absolute_import, division, print_function
import logging
import os
import torch
import torch.nn as nn
BN_MOMENTUM = 0.1
logger = logging.getLogger(__name__)
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion,
momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class HighResolutionModule(nn.Module):
def __init__(self, num_branches, blocks, num_blocks, num_inchannels,
num_channels, fuse_method, multi_scale_output=True):
super(HighResolutionModule, self).__init__()
self._check_branches(
num_branches, blocks, num_blocks, num_inchannels, num_channels)
self.num_inchannels = num_inchannels
self.fuse_method = fuse_method
self.num_branches = num_branches
self.multi_scale_output = multi_scale_output
self.branches = self._make_branches(
num_branches, blocks, num_blocks, num_channels)
self.fuse_layers = self._make_fuse_layers()
self.relu = nn.ReLU(True)
def _check_branches(self, num_branches, blocks, num_blocks,
num_inchannels, num_channels):
if num_branches != len(num_blocks):
error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format(
num_branches, len(num_blocks))
logger.error(error_msg)
raise ValueError(error_msg)
if num_branches != len(num_channels):
error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format(
num_branches, len(num_channels))
logger.error(error_msg)
raise ValueError(error_msg)
if num_branches != len(num_inchannels):
error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(
num_branches, len(num_inchannels))
logger.error(error_msg)
raise ValueError(error_msg)
def _make_one_branch(self, branch_index, block, num_blocks, num_channels,
stride=1):
downsample = None
if stride != 1 or \
self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.num_inchannels[branch_index],
num_channels[branch_index] * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(num_channels[branch_index] * block.expansion,
momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(self.num_inchannels[branch_index],
num_channels[branch_index], stride, downsample))
self.num_inchannels[branch_index] = \
num_channels[branch_index] * block.expansion
for i in range(1, num_blocks[branch_index]):
layers.append(block(self.num_inchannels[branch_index],
num_channels[branch_index]))
return nn.Sequential(*layers)
def _make_branches(self, num_branches, block, num_blocks, num_channels):
branches = []
for i in range(num_branches):
branches.append(
self._make_one_branch(i, block, num_blocks, num_channels))
return nn.ModuleList(branches)
def _make_fuse_layers(self):
if self.num_branches == 1:
return None
num_branches = self.num_branches
num_inchannels = self.num_inchannels
fuse_layers = []
for i in range(num_branches if self.multi_scale_output else 1):
fuse_layer = []
for j in range(num_branches):
if j > i:
fuse_layer.append(nn.Sequential(
nn.Conv2d(num_inchannels[j],
num_inchannels[i],
1,
1,
0,
bias=False),
nn.BatchNorm2d(num_inchannels[i]),
nn.Upsample(scale_factor=2**(j-i), mode='nearest')))
elif j == i:
fuse_layer.append(None)
else:
conv3x3s = []
for k in range(i-j):
if k == i - j - 1:
num_outchannels_conv3x3 = num_inchannels[i]
conv3x3s.append(nn.Sequential(
nn.Conv2d(num_inchannels[j],
num_outchannels_conv3x3,
3, 2, 1, bias=False),
nn.BatchNorm2d(num_outchannels_conv3x3)))
else:
num_outchannels_conv3x3 = num_inchannels[j]
conv3x3s.append(nn.Sequential(
nn.Conv2d(num_inchannels[j],
num_outchannels_conv3x3,
3, 2, 1, bias=False),
nn.BatchNorm2d(num_outchannels_conv3x3),
nn.ReLU(True)))
fuse_layer.append(nn.Sequential(*conv3x3s))
fuse_layers.append(nn.ModuleList(fuse_layer))
return nn.ModuleList(fuse_layers)
def get_num_inchannels(self):
return self.num_inchannels
def forward(self, x):
if self.num_branches == 1:
return [self.branches[0](x[0])]
for i in range(self.num_branches):
x[i] = self.branches[i](x[i])
x_fuse = []
for i in range(len(self.fuse_layers)):
y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
for j in range(1, self.num_branches):
if i == j:
y = y + x[j]
else:
y = y + self.fuse_layers[i][j](x[j])
x_fuse.append(self.relu(y))
return x_fuse
blocks_dict = {
'BASIC': BasicBlock,
'BOTTLENECK': Bottleneck
}
class PoseHigherResolutionNet(nn.Module):
def __init__(self, cfg, **kwargs):
self.inplanes = 64
extra = cfg.MODEL.EXTRA
super(PoseHigherResolutionNet, self).__init__()
# stem net
self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1,
bias=False)
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1,
bias=False)
self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self._make_layer(Bottleneck, 64, 4)
self.stage2_cfg = cfg['MODEL']['EXTRA']['STAGE2']
num_channels = self.stage2_cfg['NUM_CHANNELS']
block = blocks_dict[self.stage2_cfg['BLOCK']]
num_channels = [
num_channels[i] * block.expansion for i in range(len(num_channels))
]
self.transition1 = self._make_transition_layer([256], num_channels)
self.stage2, pre_stage_channels = self._make_stage(
self.stage2_cfg, num_channels)
self.stage3_cfg = cfg['MODEL']['EXTRA']['STAGE3']
num_channels = self.stage3_cfg['NUM_CHANNELS']
block = blocks_dict[self.stage3_cfg['BLOCK']]
num_channels = [
num_channels[i] * block.expansion for i in range(len(num_channels))
]
self.transition2 = self._make_transition_layer(
pre_stage_channels, num_channels)
self.stage3, pre_stage_channels = self._make_stage(
self.stage3_cfg, num_channels)
self.stage4_cfg = cfg['MODEL']['EXTRA']['STAGE4']
num_channels = self.stage4_cfg['NUM_CHANNELS']
block = blocks_dict[self.stage4_cfg['BLOCK']]
num_channels = [
num_channels[i] * block.expansion for i in range(len(num_channels))
]
self.transition3 = self._make_transition_layer(
pre_stage_channels, num_channels)
self.stage4, pre_stage_channels = self._make_stage(
self.stage4_cfg, num_channels, multi_scale_output=False)
#self.final_layers = self._make_final_layers(cfg, pre_stage_channels[0])
#self.deconv_layers = self._make_deconv_layers(
# cfg, pre_stage_channels[0])
self.num_deconvs = extra.DECONV.NUM_DECONVS
self.deconv_config = cfg.MODEL.EXTRA.DECONV
self.loss_config = cfg.LOSS
self.pretrained_layers = cfg['MODEL']['EXTRA']['PRETRAINED_LAYERS']
def _make_final_layers(self, cfg, input_channels):
dim_tag = cfg.MODEL.NUM_JOINTS if cfg.MODEL.TAG_PER_JOINT else 1
extra = cfg.MODEL.EXTRA
final_layers = []
output_channels = cfg.MODEL.NUM_JOINTS + dim_tag \
if cfg.LOSS.WITH_AE_LOSS[0] else cfg.MODEL.NUM_JOINTS
final_layers.append(nn.Conv2d(
in_channels=input_channels,
out_channels=output_channels,
kernel_size=extra.FINAL_CONV_KERNEL,
stride=1,
padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0
))
deconv_cfg = extra.DECONV
for i in range(deconv_cfg.NUM_DECONVS):
input_channels = deconv_cfg.NUM_CHANNELS[i]
output_channels = cfg.MODEL.NUM_JOINTS + dim_tag \
if cfg.LOSS.WITH_AE_LOSS[i+1] else cfg.MODEL.NUM_JOINTS
final_layers.append(nn.Conv2d(
in_channels=input_channels,
out_channels=output_channels,
kernel_size=extra.FINAL_CONV_KERNEL,
stride=1,
padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0
))
return nn.ModuleList(final_layers)
def _make_deconv_layers(self, cfg, input_channels):
dim_tag = cfg.MODEL.NUM_JOINTS if cfg.MODEL.TAG_PER_JOINT else 1
extra = cfg.MODEL.EXTRA
deconv_cfg = extra.DECONV
deconv_layers = []
for i in range(deconv_cfg.NUM_DECONVS):
if deconv_cfg.CAT_OUTPUT[i]:
final_output_channels = cfg.MODEL.NUM_JOINTS + dim_tag \
if cfg.LOSS.WITH_AE_LOSS[i] else cfg.MODEL.NUM_JOINTS
input_channels += final_output_channels
output_channels = deconv_cfg.NUM_CHANNELS[i]
deconv_kernel, padding, output_padding = \
self._get_deconv_cfg(deconv_cfg.KERNEL_SIZE[i])
layers = []
layers.append(nn.Sequential(
nn.ConvTranspose2d(
in_channels=input_channels,
out_channels=output_channels,
kernel_size=deconv_kernel,
stride=2,
padding=padding,
output_padding=output_padding,
bias=False),
nn.BatchNorm2d(output_channels, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
))
for _ in range(cfg.MODEL.EXTRA.DECONV.NUM_BASIC_BLOCKS):
layers.append(nn.Sequential(
BasicBlock(output_channels, output_channels),
))
deconv_layers.append(nn.Sequential(*layers))
input_channels = output_channels
return nn.ModuleList(deconv_layers)
def _get_deconv_cfg(self, deconv_kernel):
if deconv_kernel == 4:
padding = 1
output_padding = 0
elif deconv_kernel == 3:
padding = 1
output_padding = 1
elif deconv_kernel == 2:
padding = 0
output_padding = 0
return deconv_kernel, padding, output_padding
def _make_transition_layer(
self, num_channels_pre_layer, num_channels_cur_layer):
num_branches_cur = len(num_channels_cur_layer)
num_branches_pre = len(num_channels_pre_layer)
transition_layers = []
for i in range(num_branches_cur):
if i < num_branches_pre:
if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
transition_layers.append(nn.Sequential(
nn.Conv2d(num_channels_pre_layer[i],
num_channels_cur_layer[i],
3,
1,
1,
bias=False),
nn.BatchNorm2d(num_channels_cur_layer[i]),
nn.ReLU(inplace=True)))
else:
transition_layers.append(None)
else:
conv3x3s = []
for j in range(i+1-num_branches_pre):
inchannels = num_channels_pre_layer[-1]
outchannels = num_channels_cur_layer[i] \
if j == i-num_branches_pre else inchannels
conv3x3s.append(nn.Sequential(
nn.Conv2d(
inchannels, outchannels, 3, 2, 1, bias=False),
nn.BatchNorm2d(outchannels),
nn.ReLU(inplace=True)))
transition_layers.append(nn.Sequential(*conv3x3s))
return nn.ModuleList(transition_layers)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def _make_stage(self, layer_config, num_inchannels,
multi_scale_output=True):
num_modules = layer_config['NUM_MODULES']
num_branches = layer_config['NUM_BRANCHES']
num_blocks = layer_config['NUM_BLOCKS']
num_channels = layer_config['NUM_CHANNELS']
block = blocks_dict[layer_config['BLOCK']]
fuse_method = layer_config['FUSE_METHOD']
modules = []
for i in range(num_modules):
# multi_scale_output is only used last module
if not multi_scale_output and i == num_modules - 1:
reset_multi_scale_output = False
else:
reset_multi_scale_output = True
modules.append(
HighResolutionModule(
num_branches,
block,
num_blocks,
num_inchannels,
num_channels,
fuse_method,
reset_multi_scale_output)
)
num_inchannels = modules[-1].get_num_inchannels()
return nn.Sequential(*modules), num_inchannels
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu(x)
x = self.layer1(x)
x_list = []
for i in range(self.stage2_cfg['NUM_BRANCHES']):
if self.transition1[i] is not None:
x_list.append(self.transition1[i](x))
else:
x_list.append(x)
y_list = self.stage2(x_list)
x_list = []
for i in range(self.stage3_cfg['NUM_BRANCHES']):
if self.transition2[i] is not None:
x_list.append(self.transition2[i](y_list[-1]))
else:
x_list.append(y_list[i])
y_list = self.stage3(x_list)
x_list = []
for i in range(self.stage4_cfg['NUM_BRANCHES']):
if self.transition3[i] is not None:
x_list.append(self.transition3[i](y_list[-1]))
else:
x_list.append(y_list[i])
y_list = self.stage4(x_list)
x = y_list[0]
return x
def init_weights(self, pretrained='', verbose=True):
logger.info('=> init weights from normal distribution')
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, std=0.001)
for name, _ in m.named_parameters():
if name in ['bias']:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.ConvTranspose2d):
nn.init.normal_(m.weight, std=0.001)
for name, _ in m.named_parameters():
if name in ['bias']:
nn.init.constant_(m.bias, 0)
parameters_names = set()
for name, _ in self.named_parameters():
parameters_names.add(name)
buffers_names = set()
for name, _ in self.named_buffers():
buffers_names.add(name)
if os.path.isfile(pretrained):
pretrained_state_dict = torch.load(pretrained)
logger.info('=> loading pretrained model {}'.format(pretrained))
need_init_state_dict = {}
for name, m in pretrained_state_dict.items():
if name.split('.')[0] in self.pretrained_layers \
or self.pretrained_layers[0] is '*':
if name in parameters_names or name in buffers_names:
logger.info( '=> init {} from {}'.format(name, pretrained))
need_init_state_dict[name] = m
self.load_state_dict(need_init_state_dict, strict=False)
print('High Resolution Network Trained on ImageNet loaded')
def get_hrpose_net(num_layers, cfg, **kwargs):
model = PoseHigherResolutionNet(cfg, **kwargs)
if cfg.MODEL.INIT_WEIGHTS:
model.init_weights(cfg.MODEL.PRETRAINED)
return model
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# Modified by Dequan Wang and Xingyi Zhou
# ------------------------------------------------------------------------------
from __future__ import absolute_import, division, print_function
import logging
import math
import os
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
from .DCNv2.dcn_v2 import DCN
BN_MOMENTUM = 0.1
logger = logging.getLogger(__name__)
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion,
momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
def fill_fc_weights(layers):
for m in layers.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, std=0.001)
# torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
# torch.nn.init.xavier_normal_(m.weight.data)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
class PoseResNet(nn.Module):
def __init__(self, block, layers, heads, head_conv):
self.inplanes = 64
self.heads = heads
self.deconv_with_bias = False
super(PoseResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
# used for deconv layers
self.deconv_layers = self._make_deconv_layer(
3,
[256, 128, 64],
[4, 4, 4],
)
for head in self.heads:
classes = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(64, head_conv,
kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, classes,
kernel_size=1, stride=1,
padding=0, bias=True))
if 'hm' in head:
fc[-1].bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
else:
fc = nn.Conv2d(64, classes,
kernel_size=1, stride=1,
padding=0, bias=True)
if 'hm' in head:
fc.bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
self.__setattr__(head, fc)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def _get_deconv_cfg(self, deconv_kernel, index):
if deconv_kernel == 4:
padding = 1
output_padding = 0
elif deconv_kernel == 3:
padding = 1
output_padding = 1
elif deconv_kernel == 2:
padding = 0
output_padding = 0
return deconv_kernel, padding, output_padding
def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
assert num_layers == len(num_filters), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
assert num_layers == len(num_kernels), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
layers = []
for i in range(num_layers):
kernel, padding, output_padding = \
self._get_deconv_cfg(num_kernels[i], i)
planes = num_filters[i]
fc = DCN(self.inplanes, planes,
kernel_size=(3,3), stride=1,
padding=1, dilation=1, deformable_groups=1)
# fc = nn.Conv2d(self.inplanes, planes,
# kernel_size=3, stride=1,
# padding=1, dilation=1, bias=False)
# fill_fc_weights(fc)
up = nn.ConvTranspose2d(
in_channels=planes,
out_channels=planes,
kernel_size=kernel,
stride=2,
padding=padding,
output_padding=output_padding,
bias=self.deconv_with_bias)
fill_up_weights(up)
layers.append(fc)
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
layers.append(nn.ReLU(inplace=True))
layers.append(up)
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
layers.append(nn.ReLU(inplace=True))
self.inplanes = planes
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.deconv_layers(x)
ret = {}
for head in self.heads:
ret[head] = self.__getattr__(head)(x)
return [ret]
def init_weights(self, num_layers):
if 1:
url = model_urls['resnet{}'.format(num_layers)]
pretrained_state_dict = model_zoo.load_url(url)
print('=> loading pretrained model {}'.format(url))
self.load_state_dict(pretrained_state_dict, strict=False)
print('=> init deconv weights from normal distribution')
for name, m in self.deconv_layers.named_modules():
if isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
34: (BasicBlock, [3, 4, 6, 3]),
50: (Bottleneck, [3, 4, 6, 3]),
101: (Bottleneck, [3, 4, 23, 3]),
152: (Bottleneck, [3, 8, 36, 3])}
def get_pose_net(num_layers, heads, head_conv=256):
block_class, layers = resnet_spec[num_layers]
model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
model.init_weights(num_layers)
return model
import math
from collections import OrderedDict
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.nn import init
from .DCNv2.dcn_v2 import DCN
BN_MOMENTUM = 0.1
def conv_bn(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU(inplace=True)
)
def conv_1x1_bn(inp, oup):
return nn.Sequential(
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU(inplace=True)
)
def channel_shuffle(x, groups):
batchsize, num_channels, height, width = x.data.size()
channels_per_group = num_channels // groups
# reshape
x = x.view(batchsize, groups,
channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
# flatten
x = x.view(batchsize, -1, height, width)
return x
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, benchmodel):
super(InvertedResidual, self).__init__()
self.benchmodel = benchmodel
self.stride = stride
assert stride in [1, 2]
oup_inc = oup//2
if self.benchmodel == 1:
#assert inp == oup_inc
self.banch2 = nn.Sequential(
# pw
nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup_inc),
nn.ReLU(inplace=True),
# dw
nn.Conv2d(oup_inc, oup_inc, 3, stride, 1, groups=oup_inc, bias=False),
nn.BatchNorm2d(oup_inc),
# pw-linear
nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup_inc),
nn.ReLU(inplace=True),
)
else:
self.banch1 = nn.Sequential(
# dw
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
nn.BatchNorm2d(inp),
# pw-linear
nn.Conv2d(inp, oup_inc, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup_inc),
nn.ReLU(inplace=True),
)
self.banch2 = nn.Sequential(
# pw
nn.Conv2d(inp, oup_inc, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup_inc),
nn.ReLU(inplace=True),
# dw
nn.Conv2d(oup_inc, oup_inc, 3, stride, 1, groups=oup_inc, bias=False),
nn.BatchNorm2d(oup_inc),
# pw-linear
nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup_inc),
nn.ReLU(inplace=True),
)
@staticmethod
def _concat(x, out):
# concatenate along channel axis
return torch.cat((x, out), 1)
def forward(self, x):
if 1==self.benchmodel:
x1 = x[:, :(x.shape[1]//2), :, :]
x2 = x[:, (x.shape[1]//2):, :, :]
out = self._concat(x1, self.banch2(x2))
elif 2==self.benchmodel:
out = self._concat(self.banch1(x), self.banch2(x))
return channel_shuffle(out, 2)
class ShuffleNetV2(nn.Module):
def __init__(self, input_size=512, width_mult=1.):
super(ShuffleNetV2, self).__init__()
self.inplanes = 24
self.deconv_with_bias = False
assert input_size % 32 == 0
self.stage_repeats = [4, 8, 4]
#self.stage_repeats = [2, 3, 2]
# index 0 is invalid and should never be called.
# only used for indexing convenience.
if width_mult == 0.5:
self.stage_out_channels = [-1, 24, 48, 96, 192, 1024]
elif width_mult == 1.0:
self.stage_out_channels = [-1, 24, 116, 232, 464, 1024]
elif width_mult == 1.5:
self.stage_out_channels = [-1, 24, 176, 352, 704, 1024]
elif width_mult == 2.0:
self.stage_out_channels = [-1, 24, 224, 488, 976, 2048]
else:
raise ValueError(
"""{} groups is not supported for
1x1 Grouped Convolutions""".format(num_groups))
# building first layer
input_channel = self.stage_out_channels[1]
self.conv1 = conv_bn(3, input_channel, 2)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.features = []
# building inverted residual blocks
for idxstage in range(len(self.stage_repeats)):
numrepeat = self.stage_repeats[idxstage]
output_channel = self.stage_out_channels[idxstage+2]
for i in range(numrepeat):
if i == 0:
#inp, oup, stride, benchmodel):
self.features.append(InvertedResidual(input_channel, output_channel, 2, 2))
else:
self.features.append(InvertedResidual(input_channel, output_channel, 1, 1))
input_channel = output_channel
self.inplanes = output_channel
# make it nn.Sequential
self.features = nn.Sequential(*self.features)
# consider here to add the last sevearal layers
# building last several layers
# self.conv_last = conv_1x1_bn(input_channel, self.stage_out_channels[-1])
# self.globalpool = nn.Sequential(nn.AvgPool2d(int(input_size/32)))
# used for deconv layers
self.deconv_layers = self._make_deconv_layer(
3,
[256, 256, 256],
[4, 4, 4],
)
def _get_deconv_cfg(self, deconv_kernel, index):
if deconv_kernel == 4:
padding = 1
output_padding = 0
elif deconv_kernel == 3:
padding = 1
output_padding = 1
elif deconv_kernel == 2:
padding = 0
output_padding = 0
return deconv_kernel, padding, output_padding
def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
assert num_layers == len(num_filters), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
assert num_layers == len(num_kernels), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
layers = []
for i in range(num_layers):
kernel, padding, output_padding = \
self._get_deconv_cfg(num_kernels[i], i)
planes = num_filters[i]
fc = DCN(self.inplanes, planes,
kernel_size=(3,3), stride=1,
padding=1, dilation=1, deformable_groups=1)
# fc = nn.Conv2d(self.inplanes, planes,
# kernel_size=3, stride=1,
# padding=1, dilation=1, bias=False)
# fill_fc_weights(fc)
up = nn.ConvTranspose2d(
in_channels=planes,
out_channels=planes,
kernel_size=kernel,
stride=2,
padding=padding,
output_padding=output_padding,
bias=self.deconv_with_bias)
fill_up_weights(up)
layers.append(fc)
layers.append(nn.BatchNorm2d(planes))
layers.append(nn.ReLU(inplace=True))
layers.append(up)
layers.append(nn.BatchNorm2d(planes))
layers.append(nn.ReLU(inplace=True))
self.inplanes = planes
return nn.Sequential(*layers)
def init_weights(self, pretrained=True):
if pretrained:
# print('=> init resnet deconv weights from normal distribution')
print('=> init deconv weights from normal distribution')
for name, m in self.deconv_layers.named_modules():
if isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
#pretrained_state_dict = torch.load(pretrained)
#address = "/data/pretrained_model/shufflenetv2_x1_69.390_88.412.pth.tar"
#pretrained_state_dict = torch.load(address)
#self.load_state_dict(pretrained_state_dict, strict=False)
def forward(self, x):
#import pdb; pdb.set_trace()
x = self.conv1(x)
x = self.maxpool(x)
x = self.features(x)
x = self.deconv_layers(x)
return x
def shufflenetv2(width_mult=1.):
model = ShuffleNetV2(width_mult=width_mult)
return model
def get_shufflev2_net(num_layers, cfg):
model = ShuffleNetV2()
model.init_weights( pretrained=True)
return model
import torch
from torch.nn.modules import Module
from torch.nn.parallel.scatter_gather import gather
from torch.nn.parallel.replicate import replicate
from torch.nn.parallel.parallel_apply import parallel_apply
from .scatter_gather import scatter_kwargs
class _DataParallel(Module):
r"""Implements data parallelism at the module level.
This container parallelizes the application of the given module by
splitting the input across the specified devices by chunking in the batch
dimension. In the forward pass, the module is replicated on each device,
and each replica handles a portion of the input. During the backwards
pass, gradients from each replica are summed into the original module.
The batch size should be larger than the number of GPUs used. It should
also be an integer multiple of the number of GPUs so that each chunk is the
same size (so that each GPU processes the same number of samples).
See also: :ref:`cuda-nn-dataparallel-instead`
Arbitrary positional and keyword inputs are allowed to be passed into
DataParallel EXCEPT Tensors. All variables will be scattered on dim
specified (default 0). Primitive types will be broadcasted, but all
other types will be a shallow copy and can be corrupted if written to in
the model's forward pass.
Args:
module: module to be parallelized
device_ids: CUDA devices (default: all devices)
output_device: device location of output (default: device_ids[0])
Example::
>>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2])
>>> output = net(input_var)
"""
# TODO: update notes/cuda.rst when this class handles 8+ GPUs well
def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
super(_DataParallel, self).__init__()
if not torch.cuda.is_available():
self.module = module
self.device_ids = []
return
if device_ids is None:
device_ids = list(range(torch.cuda.device_count()))
if output_device is None:
output_device = device_ids[0]
self.dim = dim
self.module = module
self.device_ids = device_ids
self.chunk_sizes = chunk_sizes
self.output_device = output_device
if len(self.device_ids) == 1:
self.module.cuda(device_ids[0])
def forward(self, *inputs, **kwargs):
if not self.device_ids:
return self.module(*inputs, **kwargs)
inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
if len(self.device_ids) == 1:
return self.module(*inputs[0], **kwargs[0])
replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
outputs = self.parallel_apply(replicas, inputs, kwargs)
return self.gather(outputs, self.output_device)
def replicate(self, module, device_ids):
return replicate(module, device_ids)
def scatter(self, inputs, kwargs, device_ids, chunk_sizes):
return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes)
def parallel_apply(self, replicas, inputs, kwargs):
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
def gather(self, outputs, output_device):
return gather(outputs, output_device, dim=self.dim)
def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
r"""Evaluates module(input) in parallel across the GPUs given in device_ids.
This is the functional version of the DataParallel module.
Args:
module: the module to evaluate in parallel
inputs: inputs to the module
device_ids: GPU ids on which to replicate module
output_device: GPU location of the output Use -1 to indicate the CPU.
(default: device_ids[0])
Returns:
a Variable containing the result of module(input) located on
output_device
"""
if not isinstance(inputs, tuple):
inputs = (inputs,)
if device_ids is None:
device_ids = list(range(torch.cuda.device_count()))
if output_device is None:
output_device = device_ids[0]
inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
if len(device_ids) == 1:
return module(*inputs[0], **module_kwargs[0])
used_device_ids = device_ids[:len(inputs)]
replicas = replicate(module, used_device_ids)
outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
return gather(outputs, output_device, dim)
def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None):
if chunk_sizes is None:
return torch.nn.DataParallel(module, device_ids, output_device, dim)
standard_size = True
for i in range(1, len(chunk_sizes)):
if chunk_sizes[i] != chunk_sizes[0]:
standard_size = False
if standard_size:
return torch.nn.DataParallel(module, device_ids, output_device, dim)
return _DataParallel(module, device_ids, output_device, dim, chunk_sizes)
\ No newline at end of file
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
from .utils import _gather_feat, _tranpose_and_gather_feat
import numpy as np
def _nms(heat, kernel=3):
pad = (kernel - 1) // 2
hmax = nn.functional.max_pool2d(
heat, (kernel, kernel), stride=1, padding=pad)
keep = (hmax == heat).float()
return heat * keep
def _left_aggregate(heat):
'''
heat: batchsize x channels x h x w
'''
shape = heat.shape
heat = heat.reshape(-1, heat.shape[3])
heat = heat.transpose(1, 0).contiguous()
ret = heat.clone()
for i in range(1, heat.shape[0]):
inds = (heat[i] >= heat[i - 1])
ret[i] += ret[i - 1] * inds.float()
return (ret - heat).transpose(1, 0).reshape(shape)
def _right_aggregate(heat):
'''
heat: batchsize x channels x h x w
'''
shape = heat.shape
heat = heat.reshape(-1, heat.shape[3])
heat = heat.transpose(1, 0).contiguous()
ret = heat.clone()
for i in range(heat.shape[0] - 2, -1, -1):
inds = (heat[i] >= heat[i +1])
ret[i] += ret[i + 1] * inds.float()
return (ret - heat).transpose(1, 0).reshape(shape)
def _top_aggregate(heat):
'''
heat: batchsize x channels x h x w
'''
heat = heat.transpose(3, 2)
shape = heat.shape
heat = heat.reshape(-1, heat.shape[3])
heat = heat.transpose(1, 0).contiguous()
ret = heat.clone()
for i in range(1, heat.shape[0]):
inds = (heat[i] >= heat[i - 1])
ret[i] += ret[i - 1] * inds.float()
return (ret - heat).transpose(1, 0).reshape(shape).transpose(3, 2)
def _bottom_aggregate(heat):
'''
heat: batchsize x channels x h x w
'''
heat = heat.transpose(3, 2)
shape = heat.shape
heat = heat.reshape(-1, heat.shape[3])
heat = heat.transpose(1, 0).contiguous()
ret = heat.clone()
for i in range(heat.shape[0] - 2, -1, -1):
inds = (heat[i] >= heat[i + 1])
ret[i] += ret[i + 1] * inds.float()
return (ret - heat).transpose(1, 0).reshape(shape).transpose(3, 2)
def _h_aggregate(heat, aggr_weight=0.1):
return aggr_weight * _left_aggregate(heat) + \
aggr_weight * _right_aggregate(heat) + heat
def _v_aggregate(heat, aggr_weight=0.1):
return aggr_weight * _top_aggregate(heat) + \
aggr_weight * _bottom_aggregate(heat) + heat
'''
# Slow for large number of categories
def _topk(scores, K=40):
batch, cat, height, width = scores.size()
topk_scores, topk_inds = torch.topk(scores.view(batch, -1), K)
topk_clses = (topk_inds / (height * width)).int()
topk_inds = topk_inds % (height * width)
topk_ys = (topk_inds / width).int().float()
topk_xs = (topk_inds % width).int().float()
return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs
'''
def _topk_channel(scores, K=40):
batch, cat, height, width = scores.size()
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K)
topk_inds = topk_inds % (height * width)
topk_ys = (topk_inds / width).int().float()
topk_xs = (topk_inds % width).int().float()
return topk_scores, topk_inds, topk_ys, topk_xs
def _topk(scores, K=40):
batch, cat, height, width = scores.size()
topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) # 前100个点
topk_inds = topk_inds % (height * width)
topk_ys = (topk_inds / width).int().float()
topk_xs = (topk_inds % width).int().float()
topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
topk_clses = (topk_ind / K).int()
topk_inds = _gather_feat(
topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
def agnex_ct_decode(
t_heat, l_heat, b_heat, r_heat, ct_heat,
t_regr=None, l_regr=None, b_regr=None, r_regr=None,
K=40, scores_thresh=0.1, center_thresh=0.1, aggr_weight=0.0, num_dets=1000
):
batch, cat, height, width = t_heat.size()
'''
t_heat = torch.sigmoid(t_heat)
l_heat = torch.sigmoid(l_heat)
b_heat = torch.sigmoid(b_heat)
r_heat = torch.sigmoid(r_heat)
ct_heat = torch.sigmoid(ct_heat)
'''
if aggr_weight > 0:
t_heat = _h_aggregate(t_heat, aggr_weight=aggr_weight)
l_heat = _v_aggregate(l_heat, aggr_weight=aggr_weight)
b_heat = _h_aggregate(b_heat, aggr_weight=aggr_weight)
r_heat = _v_aggregate(r_heat, aggr_weight=aggr_weight)
# perform nms on heatmaps
t_heat = _nms(t_heat)
l_heat = _nms(l_heat)
b_heat = _nms(b_heat)
r_heat = _nms(r_heat)
t_heat[t_heat > 1] = 1
l_heat[l_heat > 1] = 1
b_heat[b_heat > 1] = 1
r_heat[r_heat > 1] = 1
t_scores, t_inds, _, t_ys, t_xs = _topk(t_heat, K=K)
l_scores, l_inds, _, l_ys, l_xs = _topk(l_heat, K=K)
b_scores, b_inds, _, b_ys, b_xs = _topk(b_heat, K=K)
r_scores, r_inds, _, r_ys, r_xs = _topk(r_heat, K=K)
ct_heat_agn, ct_clses = torch.max(ct_heat, dim=1, keepdim=True)
# import pdb; pdb.set_trace()
t_ys = t_ys.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
t_xs = t_xs.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
l_ys = l_ys.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
l_xs = l_xs.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
b_ys = b_ys.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
b_xs = b_xs.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
r_ys = r_ys.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
r_xs = r_xs.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
box_ct_xs = ((l_xs + r_xs + 0.5) / 2).long()
box_ct_ys = ((t_ys + b_ys + 0.5) / 2).long()
ct_inds = box_ct_ys * width + box_ct_xs
ct_inds = ct_inds.view(batch, -1)
ct_heat_agn = ct_heat_agn.view(batch, -1, 1)
ct_clses = ct_clses.view(batch, -1, 1)
ct_scores = _gather_feat(ct_heat_agn, ct_inds)
clses = _gather_feat(ct_clses, ct_inds)
t_scores = t_scores.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
l_scores = l_scores.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
b_scores = b_scores.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
r_scores = r_scores.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
ct_scores = ct_scores.view(batch, K, K, K, K)
scores = (t_scores + l_scores + b_scores + r_scores + 2 * ct_scores) / 6
# reject boxes based on classes
top_inds = (t_ys > l_ys) + (t_ys > b_ys) + (t_ys > r_ys)
top_inds = (top_inds > 0)
left_inds = (l_xs > t_xs) + (l_xs > b_xs) + (l_xs > r_xs)
left_inds = (left_inds > 0)
bottom_inds = (b_ys < t_ys) + (b_ys < l_ys) + (b_ys < r_ys)
bottom_inds = (bottom_inds > 0)
right_inds = (r_xs < t_xs) + (r_xs < l_xs) + (r_xs < b_xs)
right_inds = (right_inds > 0)
sc_inds = (t_scores < scores_thresh) + (l_scores < scores_thresh) + \
(b_scores < scores_thresh) + (r_scores < scores_thresh) + \
(ct_scores < center_thresh)
sc_inds = (sc_inds > 0)
scores = scores - sc_inds.float()
scores = scores - top_inds.float()
scores = scores - left_inds.float()
scores = scores - bottom_inds.float()
scores = scores - right_inds.float()
scores = scores.view(batch, -1)
scores, inds = torch.topk(scores, num_dets)
scores = scores.unsqueeze(2)
if t_regr is not None and l_regr is not None \
and b_regr is not None and r_regr is not None:
t_regr = _tranpose_and_gather_feat(t_regr, t_inds)
t_regr = t_regr.view(batch, K, 1, 1, 1, 2)
l_regr = _tranpose_and_gather_feat(l_regr, l_inds)
l_regr = l_regr.view(batch, 1, K, 1, 1, 2)
b_regr = _tranpose_and_gather_feat(b_regr, b_inds)
b_regr = b_regr.view(batch, 1, 1, K, 1, 2)
r_regr = _tranpose_and_gather_feat(r_regr, r_inds)
r_regr = r_regr.view(batch, 1, 1, 1, K, 2)
t_xs = t_xs + t_regr[..., 0]
t_ys = t_ys + t_regr[..., 1]
l_xs = l_xs + l_regr[..., 0]
l_ys = l_ys + l_regr[..., 1]
b_xs = b_xs + b_regr[..., 0]
b_ys = b_ys + b_regr[..., 1]
r_xs = r_xs + r_regr[..., 0]
r_ys = r_ys + r_regr[..., 1]
else:
t_xs = t_xs + 0.5
t_ys = t_ys + 0.5
l_xs = l_xs + 0.5
l_ys = l_ys + 0.5
b_xs = b_xs + 0.5
b_ys = b_ys + 0.5
r_xs = r_xs + 0.5
r_ys = r_ys + 0.5
bboxes = torch.stack((l_xs, t_ys, r_xs, b_ys), dim=5)
bboxes = bboxes.view(batch, -1, 4)
bboxes = _gather_feat(bboxes, inds)
clses = clses.contiguous().view(batch, -1, 1)
clses = _gather_feat(clses, inds).float()
t_xs = t_xs.contiguous().view(batch, -1, 1)
t_xs = _gather_feat(t_xs, inds).float()
t_ys = t_ys.contiguous().view(batch, -1, 1)
t_ys = _gather_feat(t_ys, inds).float()
l_xs = l_xs.contiguous().view(batch, -1, 1)
l_xs = _gather_feat(l_xs, inds).float()
l_ys = l_ys.contiguous().view(batch, -1, 1)
l_ys = _gather_feat(l_ys, inds).float()
b_xs = b_xs.contiguous().view(batch, -1, 1)
b_xs = _gather_feat(b_xs, inds).float()
b_ys = b_ys.contiguous().view(batch, -1, 1)
b_ys = _gather_feat(b_ys, inds).float()
r_xs = r_xs.contiguous().view(batch, -1, 1)
r_xs = _gather_feat(r_xs, inds).float()
r_ys = r_ys.contiguous().view(batch, -1, 1)
r_ys = _gather_feat(r_ys, inds).float()
detections = torch.cat([bboxes, scores, t_xs, t_ys, l_xs, l_ys,
b_xs, b_ys, r_xs, r_ys, clses], dim=2)
return detections
def exct_decode(
t_heat, l_heat, b_heat, r_heat, ct_heat,
t_regr=None, l_regr=None, b_regr=None, r_regr=None,
K=40, scores_thresh=0.1, center_thresh=0.1, aggr_weight=0.0, num_dets=1000
):
batch, cat, height, width = t_heat.size()
'''
t_heat = torch.sigmoid(t_heat)
l_heat = torch.sigmoid(l_heat)
b_heat = torch.sigmoid(b_heat)
r_heat = torch.sigmoid(r_heat)
ct_heat = torch.sigmoid(ct_heat)
'''
if aggr_weight > 0:
t_heat = _h_aggregate(t_heat, aggr_weight=aggr_weight)
l_heat = _v_aggregate(l_heat, aggr_weight=aggr_weight)
b_heat = _h_aggregate(b_heat, aggr_weight=aggr_weight)
r_heat = _v_aggregate(r_heat, aggr_weight=aggr_weight)
# perform nms on heatmaps
t_heat = _nms(t_heat)
l_heat = _nms(l_heat)
b_heat = _nms(b_heat)
r_heat = _nms(r_heat)
t_heat[t_heat > 1] = 1
l_heat[l_heat > 1] = 1
b_heat[b_heat > 1] = 1
r_heat[r_heat > 1] = 1
t_scores, t_inds, t_clses, t_ys, t_xs = _topk(t_heat, K=K)
l_scores, l_inds, l_clses, l_ys, l_xs = _topk(l_heat, K=K)
b_scores, b_inds, b_clses, b_ys, b_xs = _topk(b_heat, K=K)
r_scores, r_inds, r_clses, r_ys, r_xs = _topk(r_heat, K=K)
t_ys = t_ys.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
t_xs = t_xs.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
l_ys = l_ys.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
l_xs = l_xs.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
b_ys = b_ys.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
b_xs = b_xs.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
r_ys = r_ys.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
r_xs = r_xs.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
t_clses = t_clses.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
l_clses = l_clses.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
b_clses = b_clses.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
r_clses = r_clses.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
box_ct_xs = ((l_xs + r_xs + 0.5) / 2).long()
box_ct_ys = ((t_ys + b_ys + 0.5) / 2).long()
ct_inds = t_clses.long() * (height * width) + box_ct_ys * width + box_ct_xs
ct_inds = ct_inds.view(batch, -1)
ct_heat = ct_heat.view(batch, -1, 1)
ct_scores = _gather_feat(ct_heat, ct_inds)
t_scores = t_scores.view(batch, K, 1, 1, 1).expand(batch, K, K, K, K)
l_scores = l_scores.view(batch, 1, K, 1, 1).expand(batch, K, K, K, K)
b_scores = b_scores.view(batch, 1, 1, K, 1).expand(batch, K, K, K, K)
r_scores = r_scores.view(batch, 1, 1, 1, K).expand(batch, K, K, K, K)
ct_scores = ct_scores.view(batch, K, K, K, K)
scores = (t_scores + l_scores + b_scores + r_scores + 2 * ct_scores) / 6
# reject boxes based on classes
cls_inds = (t_clses != l_clses) + (t_clses != b_clses) + \
(t_clses != r_clses)
cls_inds = (cls_inds > 0)
top_inds = (t_ys > l_ys) + (t_ys > b_ys) + (t_ys > r_ys)
top_inds = (top_inds > 0)
left_inds = (l_xs > t_xs) + (l_xs > b_xs) + (l_xs > r_xs)
left_inds = (left_inds > 0)
bottom_inds = (b_ys < t_ys) + (b_ys < l_ys) + (b_ys < r_ys)
bottom_inds = (bottom_inds > 0)
right_inds = (r_xs < t_xs) + (r_xs < l_xs) + (r_xs < b_xs)
right_inds = (right_inds > 0)
sc_inds = (t_scores < scores_thresh) + (l_scores < scores_thresh) + \
(b_scores < scores_thresh) + (r_scores < scores_thresh) + \
(ct_scores < center_thresh)
sc_inds = (sc_inds > 0)
scores = scores - sc_inds.float()
scores = scores - cls_inds.float()
scores = scores - top_inds.float()
scores = scores - left_inds.float()
scores = scores - bottom_inds.float()
scores = scores - right_inds.float()
scores = scores.view(batch, -1)
scores, inds = torch.topk(scores, num_dets)
scores = scores.unsqueeze(2)
if t_regr is not None and l_regr is not None \
and b_regr is not None and r_regr is not None:
t_regr = _tranpose_and_gather_feat(t_regr, t_inds)
t_regr = t_regr.view(batch, K, 1, 1, 1, 2)
l_regr = _tranpose_and_gather_feat(l_regr, l_inds)
l_regr = l_regr.view(batch, 1, K, 1, 1, 2)
b_regr = _tranpose_and_gather_feat(b_regr, b_inds)
b_regr = b_regr.view(batch, 1, 1, K, 1, 2)
r_regr = _tranpose_and_gather_feat(r_regr, r_inds)
r_regr = r_regr.view(batch, 1, 1, 1, K, 2)
t_xs = t_xs + t_regr[..., 0]
t_ys = t_ys + t_regr[..., 1]
l_xs = l_xs + l_regr[..., 0]
l_ys = l_ys + l_regr[..., 1]
b_xs = b_xs + b_regr[..., 0]
b_ys = b_ys + b_regr[..., 1]
r_xs = r_xs + r_regr[..., 0]
r_ys = r_ys + r_regr[..., 1]
else:
t_xs = t_xs + 0.5
t_ys = t_ys + 0.5
l_xs = l_xs + 0.5
l_ys = l_ys + 0.5
b_xs = b_xs + 0.5
b_ys = b_ys + 0.5
r_xs = r_xs + 0.5
r_ys = r_ys + 0.5
bboxes = torch.stack((l_xs, t_ys, r_xs, b_ys), dim=5)
bboxes = bboxes.view(batch, -1, 4)
bboxes = _gather_feat(bboxes, inds)
clses = t_clses.contiguous().view(batch, -1, 1)
clses = _gather_feat(clses, inds).float()
t_xs = t_xs.contiguous().view(batch, -1, 1)
t_xs = _gather_feat(t_xs, inds).float()
t_ys = t_ys.contiguous().view(batch, -1, 1)
t_ys = _gather_feat(t_ys, inds).float()
l_xs = l_xs.contiguous().view(batch, -1, 1)
l_xs = _gather_feat(l_xs, inds).float()
l_ys = l_ys.contiguous().view(batch, -1, 1)
l_ys = _gather_feat(l_ys, inds).float()
b_xs = b_xs.contiguous().view(batch, -1, 1)
b_xs = _gather_feat(b_xs, inds).float()
b_ys = b_ys.contiguous().view(batch, -1, 1)
b_ys = _gather_feat(b_ys, inds).float()
r_xs = r_xs.contiguous().view(batch, -1, 1)
r_xs = _gather_feat(r_xs, inds).float()
r_ys = r_ys.contiguous().view(batch, -1, 1)
r_ys = _gather_feat(r_ys, inds).float()
detections = torch.cat([bboxes, scores, t_xs, t_ys, l_xs, l_ys,
b_xs, b_ys, r_xs, r_ys, clses], dim=2)
return detections
def ddd_decode(heat, rot, depth, dim, wh=None, reg=None, K=40):
batch, cat, height, width = heat.size()
# heat = torch.sigmoid(heat)
# perform nms on heatmaps
heat = _nms(heat)
scores, inds, clses, ys, xs = _topk(heat, K=K)
if reg is not None:
reg = _tranpose_and_gather_feat(reg, inds)
reg = reg.view(batch, K, 2)
xs = xs.view(batch, K, 1) + reg[:, :, 0:1]
ys = ys.view(batch, K, 1) + reg[:, :, 1:2]
else:
xs = xs.view(batch, K, 1) + 0.5
ys = ys.view(batch, K, 1) + 0.5
rot = _tranpose_and_gather_feat(rot, inds)
rot = rot.view(batch, K, 8)
depth = _tranpose_and_gather_feat(depth, inds)
depth = depth.view(batch, K, 1)
dim = _tranpose_and_gather_feat(dim, inds)
dim = dim.view(batch, K, 3)
clses = clses.view(batch, K, 1).float()
scores = scores.view(batch, K, 1)
xs = xs.view(batch, K, 1)
ys = ys.view(batch, K, 1)
if wh is not None:
wh = _tranpose_and_gather_feat(wh, inds)
wh = wh.view(batch, K, 2)
detections = torch.cat(
[xs, ys, scores, rot, depth, dim, wh, clses], dim=2)
else:
detections = torch.cat(
[xs, ys, scores, rot, depth, dim, clses], dim=2)
return detections
def ctdet_decode(heat, wh, reg=None, cat_spec_wh=False, K=100):
batch, cat, height, width = heat.size()
# heat = torch.sigmoid(heat)
# perform nms on heatmaps
heat = _nms(heat) # 3 * 3 区域的最大值滤波
scores, inds, clses, ys, xs = _topk(heat, K=K)
if reg is not None:
reg = _tranpose_and_gather_feat(reg, inds)
reg = reg.view(batch, K, 2)
xs = xs.view(batch, K, 1) + reg[:, :, 0:1]
ys = ys.view(batch, K, 1) + reg[:, :, 1:2]
else:
xs = xs.view(batch, K, 1) + 0.5
ys = ys.view(batch, K, 1) + 0.5
wh = _tranpose_and_gather_feat(wh, inds)
if cat_spec_wh:
wh = wh.view(batch, K, cat, 2)
clses_ind = clses.view(batch, K, 1, 1).expand(batch, K, 1, 2).long()
wh = wh.gather(2, clses_ind).view(batch, K, 2)
else:
wh = wh.view(batch, K, 2)
clses = clses.view(batch, K, 1).float()
scores = scores.view(batch, K, 1)
bboxes = torch.cat([xs - wh[..., 0:1] / 2,
ys - wh[..., 1:2] / 2,
xs + wh[..., 0:1] / 2,
ys + wh[..., 1:2] / 2], dim=2)
detections = torch.cat([bboxes, scores, clses], dim=2)
return detections
def multi_pose_decode(
heat, wh, kps, reg=None, hm_hp=None, hp_offset=None, K=100):
batch, cat, height, width = heat.size()
num_joints = kps.shape[1] // 2
# heat = torch.sigmoid(heat)
# perform nms on heatmaps
heat = _nms(heat)
scores, inds, clses, ys, xs = _topk(heat, K=K)
kps = _tranpose_and_gather_feat(kps, inds)
kps = kps.view(batch, K, num_joints * 2)
kps[..., ::2] += xs.view(batch, K, 1).expand(batch, K, num_joints) # 第一次通过中心点偏移获得的关节点的坐标
kps[..., 1::2] += ys.view(batch, K, 1).expand(batch, K, num_joints)
if reg is not None: # 回归的中心点偏移量
reg = _tranpose_and_gather_feat(reg, inds)
reg = reg.view(batch, K, 2)
xs = xs.view(batch, K, 1) + reg[:, :, 0:1]
ys = ys.view(batch, K, 1) + reg[:, :, 1:2]
else:
xs = xs.view(batch, K, 1) + 0.5
ys = ys.view(batch, K, 1) + 0.5
wh = _tranpose_and_gather_feat(wh, inds) # 矩形框的宽高
wh = wh.view(batch, K, 2)
clses = clses.view(batch, K, 1).float()
scores = scores.view(batch, K, 1)
bboxes = torch.cat([xs - wh[..., 0:1] / 2,
ys - wh[..., 1:2] / 2,
xs + wh[..., 0:1] / 2,
ys + wh[..., 1:2] / 2], dim=2)
if hm_hp is not None:
hm_hp = _nms(hm_hp) # 第二次:通过关节点热力图求得关节点的中心点
thresh = 0.1
kps = kps.view(batch, K, num_joints, 2).permute(
0, 2, 1, 3).contiguous() # b x J x K x 2
reg_kps = kps.unsqueeze(3).expand(batch, num_joints, K, K, 2)
hm_score, hm_inds, hm_ys, hm_xs = _topk_channel(hm_hp, K=K) # b x J x K
if hp_offset is not None: # 关节点的中心的偏移
hp_offset = _tranpose_and_gather_feat(
hp_offset, hm_inds.view(batch, -1))
hp_offset = hp_offset.view(batch, num_joints, K, 2)
hm_xs = hm_xs + hp_offset[:, :, :, 0]
hm_ys = hm_ys + hp_offset[:, :, :, 1]
else:
hm_xs = hm_xs + 0.5
hm_ys = hm_ys + 0.5
mask = (hm_score > thresh).float() # 选置信度大于0.1的
hm_score = (1 - mask) * -1 + mask * hm_score
hm_ys = (1 - mask) * (-10000) + mask * hm_ys
hm_xs = (1 - mask) * (-10000) + mask * hm_xs
hm_kps = torch.stack([hm_xs, hm_ys], dim=-1).unsqueeze(
2).expand(batch, num_joints, K, K, 2)
dist = (((reg_kps - hm_kps) ** 2).sum(dim=4) ** 0.5) # 两次求解的关节点求距离
min_dist, min_ind = dist.min(dim=3) # b x J x K
hm_score = hm_score.gather(2, min_ind).unsqueeze(-1) # b x J x K x 1
min_dist = min_dist.unsqueeze(-1)
min_ind = min_ind.view(batch, num_joints, K, 1, 1).expand(
batch, num_joints, K, 1, 2)
hm_kps = hm_kps.gather(3, min_ind)
hm_kps = hm_kps.view(batch, num_joints, K, 2)
# 如果在bboxes中则用第二种方法的关节点,在bboxes外用第一种方法提取的关节点,就是优先选第二种方法
l = bboxes[:, :, 0].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
t = bboxes[:, :, 1].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
r = bboxes[:, :, 2].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
b = bboxes[:, :, 3].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
mask = (hm_kps[..., 0:1] < l) + (hm_kps[..., 0:1] > r) + \
(hm_kps[..., 1:2] < t) + (hm_kps[..., 1:2] > b) + \
(hm_score < thresh) + (min_dist > (torch.max(b - t, r - l) * 0.3))
mask = (mask > 0).float().expand(batch, num_joints, K, 2)
kps = (1 - mask) * hm_kps + mask * kps
kps = kps.permute(0, 2, 1, 3).contiguous().view(
batch, K, num_joints * 2)
detections = torch.cat([bboxes, scores, kps, clses], dim=2) # box:4+score:1+kpoints:10+class:1=16
return detections
def threshold_choose(scores, threshold):
mask = scores.gt(threshold)
topk_scores = scores[mask]
topk_inds = torch.range(0, scores.numel()-1)[mask.squeeze().flatten()]
topk_inds = topk_inds.cuda().to(torch.int64)
batch, cat, height, width = scores.size()
# topk_scores, topk_inds = torch.topk(scores.view(batch, cat, -1), K) # 前100个点
topk_inds = topk_inds % (height * width)
topk_ys = (topk_inds / width).int().float()
topk_xs = (topk_inds % width).int().float()
K = topk_inds.numel()
topk_score, topk_ind = torch.topk(topk_scores.view(batch, -1), K)
topk_clses = (topk_ind / K).int()
topk_inds = _gather_feat(
topk_inds.view(batch, -1, 1), topk_ind).view(batch, K)
topk_ys = _gather_feat(topk_ys.view(batch, -1, 1), topk_ind).view(batch, K)
topk_xs = _gather_feat(topk_xs.view(batch, -1, 1), topk_ind).view(batch, K)
return topk_score, topk_inds, topk_clses, topk_ys, topk_xs, K
def centerface_decode(
heat, wh, kps, reg=None, hm_hp=None, hp_offset=None, K=100):
batch, cat, height, width = heat.size()
num_joints = kps.shape[1] // 2
# heat = torch.sigmoid(heat)
# perform nms on heatmaps
heat = _nms(heat)
scores, inds, clses, ys_int, xs_int = _topk(heat, K=K)
# scores, inds, clses, ys_int, xs_int, K = threshold_choose(heat, threshold=0.05)
if reg is not None: # 回归的中心点偏移量
reg = _tranpose_and_gather_feat(reg, inds)
reg = reg.view(batch, K, 2)
xs = xs_int.view(batch, K, 1) + reg[:, :, 0:1] # 1. 中心点,后面乘了4
ys = ys_int.view(batch, K, 1) + reg[:, :, 1:2]
# xs = (xs_int.view(batch, K, 1) + reg[:, :, 0:1] + 0.5)
# ys = (ys_int.view(batch, K, 1) + reg[:, :, 1:2] + 0.5) # 1. 中心点,按centerface的方式计算
else:
xs = xs_int.view(batch, K, 1) + 0.5
ys = ys_int.view(batch, K, 1) + 0.5
wh = _tranpose_and_gather_feat(wh, inds) # 人脸bbox矩形框的宽高
wh = wh.view(batch, K, 2) # 2. wh,第一种方式
wh = wh.exp() * 4. # 2. wh,第二种式式
clses = clses.view(batch, K, 1).float()
scores = scores.view(batch, K, 1)
bboxes = torch.cat([xs - wh[..., 0:1] / 2,
ys - wh[..., 1:2] / 2,
xs + wh[..., 0:1] / 2,
ys + wh[..., 1:2] / 2], dim=2)
kps = _tranpose_and_gather_feat(kps, inds) # 3. 人脸关键点
kps = kps.view(batch, K, num_joints * 2)
kps[..., ::2] += xs.view(batch, K, 1).expand(batch, K, num_joints) # 第一次通过中心点偏移获得的关节点的坐标
kps[..., 1::2] += ys.view(batch, K, 1).expand(batch, K, num_joints)
if hm_hp is not None:
hm_hp = _nms(hm_hp) # 第二次:通过关节点热力图求得关节点的中心点
thresh = 0.1
kps = kps.view(batch, K, num_joints, 2).permute(
0, 2, 1, 3).contiguous() # b x J x K x 2
reg_kps = kps.unsqueeze(3).expand(batch, num_joints, K, K, 2)
hm_score, hm_inds, hm_ys, hm_xs = _topk_channel(hm_hp, K=K) # b x J x K
if hp_offset is not None: # 关节点的中心的偏移
hp_offset = _tranpose_and_gather_feat(
hp_offset, hm_inds.view(batch, -1))
hp_offset = hp_offset.view(batch, num_joints, K, 2)
hm_xs = hm_xs + hp_offset[:, :, :, 0]
hm_ys = hm_ys + hp_offset[:, :, :, 1]
else:
hm_xs = hm_xs + 0.5
hm_ys = hm_ys + 0.5
mask = (hm_score > thresh).float() # 选置信度大于0.1的
hm_score = (1 - mask) * -1 + mask * hm_score
hm_ys = (1 - mask) * (-10000) + mask * hm_ys
hm_xs = (1 - mask) * (-10000) + mask * hm_xs
hm_kps = torch.stack([hm_xs, hm_ys], dim=-1).unsqueeze(
2).expand(batch, num_joints, K, K, 2)
dist = (((reg_kps - hm_kps) ** 2).sum(dim=4) ** 0.5) # 两次求解的关节点求距离
min_dist, min_ind = dist.min(dim=3) # b x J x K
hm_score = hm_score.gather(2, min_ind).unsqueeze(-1) # b x J x K x 1
min_dist = min_dist.unsqueeze(-1)
min_ind = min_ind.view(batch, num_joints, K, 1, 1).expand(
batch, num_joints, K, 1, 2)
hm_kps = hm_kps.gather(3, min_ind)
hm_kps = hm_kps.view(batch, num_joints, K, 2)
# 如果在bboxes中则用第二种方法的关节点,在bboxes外用第一种方法提取的关节点,就是优先选第二种方法
l = bboxes[:, :, 0].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
t = bboxes[:, :, 1].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
r = bboxes[:, :, 2].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
b = bboxes[:, :, 3].view(batch, 1, K, 1).expand(batch, num_joints, K, 1)
mask = (hm_kps[..., 0:1] < l) + (hm_kps[..., 0:1] > r) + \
(hm_kps[..., 1:2] < t) + (hm_kps[..., 1:2] > b) + \
(hm_score < thresh) + (min_dist > (torch.max(b - t, r - l) * 0.3))
mask = (mask > 0).float().expand(batch, num_joints, K, 2)
kps = (1 - mask) * hm_kps + mask * kps
kps = kps.permute(0, 2, 1, 3).contiguous().view(
batch, K, num_joints * 2)
detections = torch.cat([bboxes, scores, kps, clses], dim=2) # box:4+score:1+kpoints:10+class:1=16
return detections
# ------------------------------------------------------------------------------
# Portions of this code are from
# CornerNet (https://github.com/princeton-vl/CornerNet)
# Copyright (c) 2018, University of Michigan
# Licensed under the BSD 3-Clause License
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
from .utils import _tranpose_and_gather_feat
import torch.nn.functional as F
def _slow_neg_loss(pred, gt):
'''focal loss from CornerNet'''
pos_inds = gt.eq(1)
neg_inds = gt.lt(1)
neg_weights = torch.pow(1 - gt[neg_inds], 4)
loss = 0
pos_pred = pred[pos_inds]
neg_pred = pred[neg_inds]
pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2)
neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights
num_pos = pos_inds.float().sum()
pos_loss = pos_loss.sum()
neg_loss = neg_loss.sum()
if pos_pred.nelement() == 0:
loss = loss - neg_loss
else:
loss = loss - (pos_loss + neg_loss) / num_pos
return loss
def _neg_loss(pred, gt):
''' Modified focal loss. Exactly the same as CornerNet.
Runs faster and costs a little bit more memory
Arguments:
pred (batch x c x h x w)
gt_regr (batch x c x h x w)
'''
pos_inds = gt.eq(1).float()
neg_inds = gt.lt(1).float()
neg_weights = torch.pow(1 - gt, 4)
loss = 0
pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds
neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds
num_pos = pos_inds.float().sum()
pos_loss = pos_loss.sum()
neg_loss = neg_loss.sum()
if num_pos == 0:
loss = loss - neg_loss
else:
loss = loss - (pos_loss + neg_loss) / num_pos
return loss
def _not_faster_neg_loss(pred, gt):
pos_inds = gt.eq(1).float()
neg_inds = gt.lt(1).float()
num_pos = pos_inds.float().sum()
neg_weights = torch.pow(1 - gt, 4)
loss = 0
trans_pred = pred * neg_inds + (1 - pred) * pos_inds
weight = neg_weights * neg_inds + pos_inds
all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight
all_loss = all_loss.sum()
if num_pos > 0:
all_loss /= num_pos
loss -= all_loss
return loss
def _slow_reg_loss(regr, gt_regr, mask):
num = mask.float().sum()
mask = mask.unsqueeze(2).expand_as(gt_regr)
regr = regr[mask]
gt_regr = gt_regr[mask]
# regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, reduction='sum')
regr_loss = regr_loss / (num + 1e-4)
return regr_loss
def _reg_loss(regr, gt_regr, mask, wight_=None):
''' L1 regression loss
Arguments:
regr (batch x max_objects x dim)
gt_regr (batch x max_objects x dim)
mask (batch x max_objects)
'''
num = mask.float().sum()
mask = mask.unsqueeze(2).expand_as(gt_regr).float()
regr = regr * mask
gt_regr = gt_regr * mask
if wight_ is not None:
wight_ = wight_.unsqueeze(2).expand_as(gt_regr).float()
# regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, reduce=False)
regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, reduction='none')
regr_loss *= wight_
regr_loss = regr_loss.sum()
else:
regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, reduction='sum')
# regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False)
regr_loss = regr_loss / (num + 1e-4)
return regr_loss
class FocalLoss(nn.Module):
'''nn.Module warpper for focal loss'''
def __init__(self):
super(FocalLoss, self).__init__()
self.neg_loss = _neg_loss
def forward(self, out, target):
return self.neg_loss(out, target)
class RegLoss(nn.Module):
'''Regression loss for an output tensor
Arguments:
output (batch x dim x h x w)
mask (batch x max_objects)
ind (batch x max_objects)
target (batch x max_objects x dim)
'''
def __init__(self):
super(RegLoss, self).__init__()
def forward(self, output, mask, ind, target, wight_=None):
pred = _tranpose_and_gather_feat(output, ind)
loss = _reg_loss(pred, target, mask, wight_)
return loss
class RegL1Loss(nn.Module):
def __init__(self):
super(RegL1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _tranpose_and_gather_feat(output, ind)
mask = mask.unsqueeze(2).expand_as(pred).float()
# loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
loss = F.l1_loss(pred * mask, target * mask, reduction='sum')
# loss = F.l1_loss(pred * mask, target * mask, size_average=False)
loss = loss / (mask.sum() + 1e-4)
return loss
class NormRegL1Loss(nn.Module):
def __init__(self):
super(NormRegL1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _tranpose_and_gather_feat(output, ind)
mask = mask.unsqueeze(2).expand_as(pred).float()
# loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
pred = pred / (target + 1e-4)
target = target * 0 + 1
loss = F.l1_loss(pred * mask, target * mask, reduction='sum')
# loss = F.l1_loss(pred * mask, target * mask, size_average=False)
loss = loss / (mask.sum() + 1e-4)
return loss
class RegWeightedL1Loss(nn.Module):
def __init__(self):
super(RegWeightedL1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _tranpose_and_gather_feat(output, ind)
mask = mask.float()
# loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
loss = F.l1_loss(pred * mask, target * mask, reduction='sum')
# loss = F.l1_loss(pred * mask, target * mask, size_average=False)
loss = loss / (mask.sum() + 1e-4)
return loss
class L1Loss(nn.Module):
def __init__(self):
super(L1Loss, self).__init__()
def forward(self, output, mask, ind, target):
pred = _tranpose_and_gather_feat(output, ind)
mask = mask.unsqueeze(2).expand_as(pred).float()
loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
return loss
class BinRotLoss(nn.Module):
def __init__(self):
super(BinRotLoss, self).__init__()
def forward(self, output, mask, ind, rotbin, rotres):
pred = _tranpose_and_gather_feat(output, ind)
loss = compute_rot_loss(pred, rotbin, rotres, mask)
return loss
def compute_res_loss(output, target):
return F.smooth_l1_loss(output, target, reduction='elementwise_mean')
# TODO: weight
def compute_bin_loss(output, target, mask):
mask = mask.expand_as(output)
output = output * mask.float()
return F.cross_entropy(output, target, reduction='elementwise_mean')
def compute_rot_loss(output, target_bin, target_res, mask):
# output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos,
# bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
# target_bin: (B, 128, 2) [bin1_cls, bin2_cls]
# target_res: (B, 128, 2) [bin1_res, bin2_res]
# mask: (B, 128, 1)
# import pdb; pdb.set_trace()
output = output.view(-1, 8)
target_bin = target_bin.view(-1, 2)
target_res = target_res.view(-1, 2)
mask = mask.view(-1, 1)
loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask)
loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask)
loss_res = torch.zeros_like(loss_bin1)
if target_bin[:, 0].nonzero().shape[0] > 0:
idx1 = target_bin[:, 0].nonzero()[:, 0]
valid_output1 = torch.index_select(output, 0, idx1.long())
valid_target_res1 = torch.index_select(target_res, 0, idx1.long())
loss_sin1 = compute_res_loss(
valid_output1[:, 2], torch.sin(valid_target_res1[:, 0]))
loss_cos1 = compute_res_loss(
valid_output1[:, 3], torch.cos(valid_target_res1[:, 0]))
loss_res += loss_sin1 + loss_cos1
if target_bin[:, 1].nonzero().shape[0] > 0:
idx2 = target_bin[:, 1].nonzero()[:, 0]
valid_output2 = torch.index_select(output, 0, idx2.long())
valid_target_res2 = torch.index_select(target_res, 0, idx2.long())
loss_sin2 = compute_res_loss(
valid_output2[:, 6], torch.sin(valid_target_res2[:, 1]))
loss_cos2 = compute_res_loss(
valid_output2[:, 7], torch.cos(valid_target_res2[:, 1]))
loss_res += loss_sin2 + loss_cos2
return loss_bin1 + loss_bin2 + loss_res
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torchvision.models as models
import torch
import torch.nn as nn
import os
from .networks.msra_resnet import get_pose_net
# from .networks.dlav0 import get_pose_net as get_dlav0
# from .networks.pose_dla_dcn import get_pose_net as get_dla_dcn
# from .networks.resnet_dcn import get_pose_net as get_pose_net_dcn
from .networks.large_hourglass import get_large_hourglass_net
# from .Backbone.mobilenetv2 import get_mobile_pose_netv2
# from .Backbone.mobilenet_v2 import get_mobile_net
# from .Backbone.centerface_mobilenet_v2 import get_mobile_net
from .Backbone.centerface_mobilenet_v2_fpn import get_mobile_net
_model_factory = {
'res': get_pose_net, # default Resnet with deconv
# 'dlav0': get_dlav0, # default DLAup
# 'dla': get_dla_dcn,
# 'resdcn': get_pose_net_dcn,
'hourglass': get_large_hourglass_net,
'mobilev2': get_mobile_net,
}
def create_model(arch, heads, head_conv):
num_layers = int(arch[arch.find('_') + 1:]) if '_' in arch else 0
arch = arch[:arch.find('_')] if '_' in arch else arch
get_model = _model_factory[arch]
model = get_model(num_layers=num_layers, heads=heads, head_conv=head_conv)
return model
def load_model(model, model_path, optimizer=None, resume=False,
lr=None, lr_step=None):
start_epoch = 0
checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
state_dict_ = checkpoint['state_dict']
state_dict = {}
# convert data_parallal to model
for k in state_dict_:
if k.startswith('module') and not k.startswith('module_list'):
state_dict[k[7:]] = state_dict_[k]
else:
state_dict[k] = state_dict_[k]
model_state_dict = model.state_dict()
# check loaded parameters and created model parameters
for k in state_dict:
if k in model_state_dict:
if state_dict[k].shape != model_state_dict[k].shape:
print('Skip loading parameter {}, required shape{}, '\
'loaded shape{}.'.format(
k, model_state_dict[k].shape, state_dict[k].shape))
state_dict[k] = model_state_dict[k]
else:
print('Drop parameter {}.'.format(k))
for k in model_state_dict:
if not (k in state_dict):
print('No param {}.'.format(k))
state_dict[k] = model_state_dict[k]
model.load_state_dict(state_dict, strict=False)
# resume optimizer parameters
if optimizer is not None and resume:
if 'optimizer' in checkpoint:
optimizer.load_state_dict(checkpoint['optimizer'])
start_epoch = checkpoint['epoch']
start_lr = lr
for step in lr_step:
if start_epoch >= step:
start_lr *= 0.1
for param_group in optimizer.param_groups:
param_group['lr'] = start_lr
print('Resumed optimizer with start lr', start_lr)
else:
print('No optimizer parameters in checkpoint.')
if optimizer is not None:
return model, optimizer, start_epoch
else:
return model
def save_model(path, epoch, model, optimizer=None):
if isinstance(model, torch.nn.DataParallel):
state_dict = model.module.state_dict()
else:
state_dict = model.state_dict()
data = {'epoch': epoch,
'state_dict': state_dict}
if not (optimizer is None):
data['optimizer'] = optimizer.state_dict()
torch.save(data, path)
.vscode
.idea
*.so
*.o
*pyc
_ext
\ No newline at end of file
BSD 3-Clause License
Copyright (c) 2019, Charles Shang
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
## Deformable Convolutional Networks V2 with Pytorch
### Build
```bash
./make.sh # build
python test.py # run examples and gradient check
```
### An Example
- deformable conv
```python
from dcn_v2 import DCN
input = torch.randn(2, 64, 128, 128).cuda()
# wrap all things (offset and mask) in DCN
dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
output = dcn(input)
print(output.shape)
```
- deformable roi pooling
```python
from dcn_v2 import DCNPooling
input = torch.randn(2, 32, 64, 64).cuda()
batch_inds = torch.randint(2, (20, 1)).cuda().float()
x = torch.randint(256, (20, 1)).cuda().float()
y = torch.randint(256, (20, 1)).cuda().float()
w = torch.randint(64, (20, 1)).cuda().float()
h = torch.randint(64, (20, 1)).cuda().float()
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
# mdformable pooling (V2)
# wrap all things (offset and mask) in DCNPooling
dpooling = DCNPooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=False,
group_size=1,
trans_std=0.1).cuda()
dout = dpooling(input, rois)
```
### Known Issues:
- [x] Gradient check w.r.t offset (solved)
- [ ] Backward is not reentrant (minor)
This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op).
<s>I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes.
However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some
non-differential points? </s>
Update: all gradient check passes with double precision.
Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for
float `<1e-15` for double),
so it may not be a serious problem (?)
Please post an issue or PR if you have any comments.
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment