Commit b952e97b authored by chenych's avatar chenych
Browse files

First Commit.

parents
void dcn_v2_forward(THDoubleTensor *input, THDoubleTensor *weight,
THDoubleTensor *bias, THDoubleTensor *ones,
THDoubleTensor *offset, THDoubleTensor *mask,
THDoubleTensor *output, THDoubleTensor *columns,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
const int dilation_h, const int dilation_w,
const int deformable_group);
void dcn_v2_backward(THDoubleTensor *input, THDoubleTensor *weight,
THDoubleTensor *bias, THDoubleTensor *ones,
THDoubleTensor *offset, THDoubleTensor *mask,
THDoubleTensor *output, THDoubleTensor *columns,
THDoubleTensor *grad_input, THDoubleTensor *grad_weight,
THDoubleTensor *grad_bias, THDoubleTensor *grad_offset,
THDoubleTensor *grad_mask, THDoubleTensor *grad_output,
int kernel_h, int kernel_w,
int stride_h, int stride_w,
int pad_h, int pad_w,
int dilation_h, int dilation_w,
int deformable_group);
\ No newline at end of file
#!/usr/bin/env python
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import time
import torch
import torch.nn as nn
from torch.autograd import gradcheck
from dcn_v2 import DCNv2
from dcn_v2_func import DCNv2Function
from dcn_v2 import DCNv2Pooling
from dcn_v2_func import DCNv2PoolingFunction
deformable_groups = 1
N, inC, inH, inW = 2, 2, 4, 4
outC = 2
kH, kW = 3, 3
def conv_identify(weight, bias):
weight.data.zero_()
bias.data.zero_()
o, i, h, w = weight.shape
y = h//2
x = w//2
for p in range(i):
for q in range(o):
if p == q:
weight.data[q, p, y, x] = 1.0
def check_zero_offset():
conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW,
kernel_size=(kH, kW),
stride=(1, 1),
padding=(1, 1),
bias=True).cuda()
conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW,
kernel_size=(kH, kW),
stride=(1, 1),
padding=(1, 1),
bias=True).cuda()
dcn_v2 = DCNv2(inC, outC, (kH, kW),
stride=1, padding=1, dilation=1,
deformable_groups=deformable_groups).cuda()
conv_offset.weight.data.zero_()
conv_offset.bias.data.zero_()
conv_mask.weight.data.zero_()
conv_mask.bias.data.zero_()
conv_identify(dcn_v2.weight, dcn_v2.bias)
input = torch.randn(N, inC, inH, inW).cuda()
offset = conv_offset(input)
mask = conv_mask(input)
mask = torch.sigmoid(mask)
output = dcn_v2(input, offset, mask)
output *= 2
d = (input - output).abs().max()
if d < 1e-10:
print('Zero offset passed')
else:
print('Zero offset failed')
def check_gradient_dconv_double():
input = torch.randn(N, inC, inH, inW, dtype=torch.float64).cuda()
input.requires_grad = True
offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW, dtype=torch.float64).cuda()
# offset.data.zero_()
# offset.data -= 0.00001
offset.requires_grad = True
mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW, dtype=torch.float64).cuda()
# mask.data.zero_()
mask.requires_grad = True
mask = torch.sigmoid(mask)
weight = torch.randn(outC, inC, kH, kW, dtype=torch.float64).cuda()
weight.requires_grad = True
bias = torch.rand(outC, dtype=torch.float64).cuda()
bias.requires_grad = True
func = DCNv2Function(stride=1, padding=1, dilation=1, deformable_groups=deformable_groups)
print(gradcheck(func, (input, offset, mask, weight, bias), eps=1e-6, atol=1e-5, rtol=1e-3))
def check_gradient_dconv():
input = torch.randn(N, inC, inH, inW).cuda()
input.requires_grad = True
offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda()
# offset.data.zero_()
# offset.data -= 0.5
offset.requires_grad = True
mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda()
# mask.data.zero_()
mask.requires_grad = True
mask = torch.sigmoid(mask)
weight = torch.randn(outC, inC, kH, kW).cuda()
weight.requires_grad = True
bias = torch.rand(outC).cuda()
bias.requires_grad = True
func = DCNv2Function(stride=1, padding=1, dilation=1, deformable_groups=deformable_groups)
print(gradcheck(func, (input, offset, mask, weight, bias), eps=1e-3, atol=1e-3, rtol=1e-2))
def check_pooling_zero_offset():
from dcn_v2 import DCNv2Pooling
input = torch.randn(2, 16, 64, 64).cuda().zero_()
input[0, :, 16:26, 16:26] = 1.
input[1, :, 10:20, 20:30] = 2.
rois = torch.tensor([
[0, 65, 65, 103, 103],
[1, 81, 41, 119, 79],
]).cuda().float()
pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=16,
no_trans=True,
group_size=1,
trans_std=0.1).cuda()
out = pooling(input, rois, input.new())
s = ', '.join(['%f' % out[i, :, :, :].mean().item() for i in range(rois.shape[0])])
print(s)
dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=16,
no_trans=False,
group_size=1,
trans_std=0.1).cuda()
offset = torch.randn(20, 2, 7, 7).cuda().zero_()
dout = dpooling(input, rois, offset)
s = ', '.join(['%f' % dout[i, :, :, :].mean().item() for i in range(rois.shape[0])])
print(s)
def check_gradient_dpooling():
input = torch.randn(2, 3, 5, 5).cuda() * 0.01
N = 4
batch_inds = torch.randint(2, (N, 1)).cuda().float()
x = torch.rand((N, 1)).cuda().float() * 15
y = torch.rand((N, 1)).cuda().float() * 15
w = torch.rand((N, 1)).cuda().float() * 10
h = torch.rand((N, 1)).cuda().float() * 10
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
offset = torch.randn(N, 2, 3, 3).cuda()
dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=3,
output_dim=3,
no_trans=False,
group_size=1,
trans_std=0.0).cuda()
input.requires_grad = True
offset.requires_grad = True
print('check_gradient_dpooling', gradcheck(dpooling, (input, rois, offset), eps=1e-4))
def example_dconv():
from dcn_v2 import DCN
input = torch.randn(2, 64, 128, 128).cuda()
# wrap all things (offset and mask) in DCN
dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda()
output = dcn(input)
targert = output.new(*output.size())
targert.data.uniform_(-0.01, 0.01)
error = (targert - output).mean()
error.backward()
print(output.shape)
def example_dpooling():
from dcn_v2 import DCNv2Pooling
input = torch.randn(2, 32, 64, 64).cuda()
batch_inds = torch.randint(2, (20, 1)).cuda().float()
x = torch.randint(256, (20, 1)).cuda().float()
y = torch.randint(256, (20, 1)).cuda().float()
w = torch.randint(64, (20, 1)).cuda().float()
h = torch.randint(64, (20, 1)).cuda().float()
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
offset = torch.randn(20, 2, 7, 7).cuda()
input.requires_grad = True
offset.requires_grad = True
# normal roi_align
pooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=True,
group_size=1,
trans_std=0.1).cuda()
# deformable pooling
dpooling = DCNv2Pooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=False,
group_size=1,
trans_std=0.1).cuda()
out = pooling(input, rois, offset)
dout = dpooling(input, rois, offset)
print(out.shape)
print(dout.shape)
target_out = out.new(*out.size())
target_out.data.uniform_(-0.01, 0.01)
target_dout = dout.new(*dout.size())
target_dout.data.uniform_(-0.01, 0.01)
e = (target_out - out).mean()
e.backward()
e = (target_dout - dout).mean()
e.backward()
def example_mdpooling():
from dcn_v2 import DCNPooling
input = torch.randn(2, 32, 64, 64).cuda()
input.requires_grad = True
batch_inds = torch.randint(2, (20, 1)).cuda().float()
x = torch.randint(256, (20, 1)).cuda().float()
y = torch.randint(256, (20, 1)).cuda().float()
w = torch.randint(64, (20, 1)).cuda().float()
h = torch.randint(64, (20, 1)).cuda().float()
rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1)
# mdformable pooling (V2)
dpooling = DCNPooling(spatial_scale=1.0 / 4,
pooled_size=7,
output_dim=32,
no_trans=False,
group_size=1,
trans_std=0.1).cuda()
dout = dpooling(input, rois)
target = dout.new(*dout.size())
target.data.uniform_(-0.1, 0.1)
error = (target - dout).mean()
error.backward()
print(dout.shape)
if __name__ == '__main__':
example_dconv()
example_dpooling()
example_mdpooling()
check_pooling_zero_offset()
# zero offset check
if inC == outC:
check_zero_offset()
check_gradient_dpooling()
# # gradient check
# try:
# check_gradient_double()
# except TypeError:
# print('''****** You can swith to double precision in dcn_v2_func.py by (un)commenting these two lines:
# ****** from _ext import dcn_v2 as _backend
# ****** from _ext import dcn_v2_double as _backend''')
# print('****** Your tensor may not be **double** type')
# print('****** Switching to **float** type')
#
# check_gradient()
# finally:
# print('****** Note: backward is not reentrant error may not be a serious problem, '
# '****** since the max error is less than 1e-7\n'
# '****** Still looking for what trigger this problem')
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from os.path import join
import torch
from torch import nn
import torch.utils.model_zoo as model_zoo
import numpy as np
BatchNorm = nn.BatchNorm2d
def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):
return join('http://dl.yf.io/dla/models', data, '{}-{}.pth'.format(name, hash))
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3,
stride=stride, padding=dilation,
bias=False, dilation=dilation)
self.bn1 = BatchNorm(planes)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=dilation,
bias=False, dilation=dilation)
self.bn2 = BatchNorm(planes)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 2
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(Bottleneck, self).__init__()
expansion = Bottleneck.expansion
bottle_planes = planes // expansion
self.conv1 = nn.Conv2d(inplanes, bottle_planes,
kernel_size=1, bias=False)
self.bn1 = BatchNorm(bottle_planes)
self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
stride=stride, padding=dilation,
bias=False, dilation=dilation)
self.bn2 = BatchNorm(bottle_planes)
self.conv3 = nn.Conv2d(bottle_planes, planes,
kernel_size=1, bias=False)
self.bn3 = BatchNorm(planes)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class BottleneckX(nn.Module):
expansion = 2
cardinality = 32
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(BottleneckX, self).__init__()
cardinality = BottleneckX.cardinality
# dim = int(math.floor(planes * (BottleneckV5.expansion / 64.0)))
# bottle_planes = dim * cardinality
bottle_planes = planes * cardinality // 32
self.conv1 = nn.Conv2d(inplanes, bottle_planes,
kernel_size=1, bias=False)
self.bn1 = BatchNorm(bottle_planes)
self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
stride=stride, padding=dilation, bias=False,
dilation=dilation, groups=cardinality)
self.bn2 = BatchNorm(bottle_planes)
self.conv3 = nn.Conv2d(bottle_planes, planes,
kernel_size=1, bias=False)
self.bn3 = BatchNorm(planes)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class Root(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, residual):
super(Root, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, 1,
stride=1, bias=False, padding=(kernel_size - 1) // 2)
self.bn = BatchNorm(out_channels)
self.relu = nn.ReLU(inplace=True)
self.residual = residual
def forward(self, *x):
children = x
x = self.conv(torch.cat(x, 1))
x = self.bn(x)
if self.residual:
x += children[0]
x = self.relu(x)
return x
class Tree(nn.Module):
def __init__(self, levels, block, in_channels, out_channels, stride=1,
level_root=False, root_dim=0, root_kernel_size=1,
dilation=1, root_residual=False):
super(Tree, self).__init__()
if root_dim == 0:
root_dim = 2 * out_channels
if level_root:
root_dim += in_channels
if levels == 1:
self.tree1 = block(in_channels, out_channels, stride,
dilation=dilation)
self.tree2 = block(out_channels, out_channels, 1,
dilation=dilation)
else:
self.tree1 = Tree(levels - 1, block, in_channels, out_channels,
stride, root_dim=0,
root_kernel_size=root_kernel_size,
dilation=dilation, root_residual=root_residual)
self.tree2 = Tree(levels - 1, block, out_channels, out_channels,
root_dim=root_dim + out_channels,
root_kernel_size=root_kernel_size,
dilation=dilation, root_residual=root_residual)
if levels == 1:
self.root = Root(root_dim, out_channels, root_kernel_size,
root_residual)
self.level_root = level_root
self.root_dim = root_dim
self.downsample = None
self.project = None
self.levels = levels
if stride > 1:
self.downsample = nn.MaxPool2d(stride, stride=stride)
if in_channels != out_channels:
self.project = nn.Sequential(
nn.Conv2d(in_channels, out_channels,
kernel_size=1, stride=1, bias=False),
BatchNorm(out_channels)
)
def forward(self, x, residual=None, children=None):
children = [] if children is None else children
bottom = self.downsample(x) if self.downsample else x
residual = self.project(bottom) if self.project else bottom
if self.level_root:
children.append(bottom)
x1 = self.tree1(x, residual)
if self.levels == 1:
x2 = self.tree2(x1)
x = self.root(x2, x1, *children)
else:
children.append(x1)
x = self.tree2(x1, children=children)
return x
class DLA(nn.Module):
def __init__(self, levels, channels, num_classes=1000,
block=BasicBlock, residual_root=False, return_levels=False,
pool_size=7, linear_root=False):
super(DLA, self).__init__()
self.channels = channels
self.return_levels = return_levels
self.num_classes = num_classes
self.base_layer = nn.Sequential(
nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
padding=3, bias=False),
BatchNorm(channels[0]),
nn.ReLU(inplace=True))
self.level0 = self._make_conv_level(
channels[0], channels[0], levels[0])
self.level1 = self._make_conv_level(
channels[0], channels[1], levels[1], stride=2)
self.level2 = Tree(levels[2], block, channels[1], channels[2], 2,
level_root=False,
root_residual=residual_root)
self.level3 = Tree(levels[3], block, channels[2], channels[3], 2,
level_root=True, root_residual=residual_root)
self.level4 = Tree(levels[4], block, channels[3], channels[4], 2,
level_root=True, root_residual=residual_root)
self.level5 = Tree(levels[5], block, channels[4], channels[5], 2,
level_root=True, root_residual=residual_root)
self.avgpool = nn.AvgPool2d(pool_size)
self.fc = nn.Conv2d(channels[-1], num_classes, kernel_size=1,
stride=1, padding=0, bias=True)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, BatchNorm):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_level(self, block, inplanes, planes, blocks, stride=1):
downsample = None
if stride != 1 or inplanes != planes:
downsample = nn.Sequential(
nn.MaxPool2d(stride, stride=stride),
nn.Conv2d(inplanes, planes,
kernel_size=1, stride=1, bias=False),
BatchNorm(planes),
)
layers = []
layers.append(block(inplanes, planes, stride, downsample=downsample))
for i in range(1, blocks):
layers.append(block(inplanes, planes))
return nn.Sequential(*layers)
def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
modules = []
for i in range(convs):
modules.extend([
nn.Conv2d(inplanes, planes, kernel_size=3,
stride=stride if i == 0 else 1,
padding=dilation, bias=False, dilation=dilation),
BatchNorm(planes),
nn.ReLU(inplace=True)])
inplanes = planes
return nn.Sequential(*modules)
def forward(self, x):
y = []
x = self.base_layer(x)
for i in range(6):
x = getattr(self, 'level{}'.format(i))(x)
y.append(x)
if self.return_levels:
return y
else:
x = self.avgpool(x)
x = self.fc(x)
x = x.view(x.size(0), -1)
return x
def load_pretrained_model(self, data='imagenet', name='dla34', hash='ba72cf86'):
fc = self.fc
if name.endswith('.pth'):
model_weights = torch.load(data + name)
else:
model_url = get_model_url(data, name, hash)
model_weights = model_zoo.load_url(model_url)
num_classes = len(model_weights[list(model_weights.keys())[-1]])
self.fc = nn.Conv2d(
self.channels[-1], num_classes,
kernel_size=1, stride=1, padding=0, bias=True)
self.load_state_dict(model_weights)
self.fc = fc
def dla34(pretrained, **kwargs): # DLA-34
model = DLA([1, 1, 1, 2, 2, 1],
[16, 32, 64, 128, 256, 512],
block=BasicBlock, **kwargs)
if pretrained:
model.load_pretrained_model(data='imagenet', name='dla34', hash='ba72cf86')
return model
def dla46_c(pretrained=None, **kwargs): # DLA-46-C
Bottleneck.expansion = 2
model = DLA([1, 1, 1, 2, 2, 1],
[16, 32, 64, 64, 128, 256],
block=Bottleneck, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla46_c')
return model
def dla46x_c(pretrained=None, **kwargs): # DLA-X-46-C
BottleneckX.expansion = 2
model = DLA([1, 1, 1, 2, 2, 1],
[16, 32, 64, 64, 128, 256],
block=BottleneckX, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla46x_c')
return model
def dla60x_c(pretrained, **kwargs): # DLA-X-60-C
BottleneckX.expansion = 2
model = DLA([1, 1, 1, 2, 3, 1],
[16, 32, 64, 64, 128, 256],
block=BottleneckX, **kwargs)
if pretrained:
model.load_pretrained_model(data='imagenet', name='dla60x_c', hash='b870c45c')
return model
def dla60(pretrained=None, **kwargs): # DLA-60
Bottleneck.expansion = 2
model = DLA([1, 1, 1, 2, 3, 1],
[16, 32, 128, 256, 512, 1024],
block=Bottleneck, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla60')
return model
def dla60x(pretrained=None, **kwargs): # DLA-X-60
BottleneckX.expansion = 2
model = DLA([1, 1, 1, 2, 3, 1],
[16, 32, 128, 256, 512, 1024],
block=BottleneckX, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla60x')
return model
def dla102(pretrained=None, **kwargs): # DLA-102
Bottleneck.expansion = 2
model = DLA([1, 1, 1, 3, 4, 1], [16, 32, 128, 256, 512, 1024],
block=Bottleneck, residual_root=True, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla102')
return model
def dla102x(pretrained=None, **kwargs): # DLA-X-102
BottleneckX.expansion = 2
model = DLA([1, 1, 1, 3, 4, 1], [16, 32, 128, 256, 512, 1024],
block=BottleneckX, residual_root=True, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla102x')
return model
def dla102x2(pretrained=None, **kwargs): # DLA-X-102 64
BottleneckX.cardinality = 64
model = DLA([1, 1, 1, 3, 4, 1], [16, 32, 128, 256, 512, 1024],
block=BottleneckX, residual_root=True, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla102x2')
return model
def dla169(pretrained=None, **kwargs): # DLA-169
Bottleneck.expansion = 2
model = DLA([1, 1, 2, 3, 5, 1], [16, 32, 128, 256, 512, 1024],
block=Bottleneck, residual_root=True, **kwargs)
if pretrained is not None:
model.load_pretrained_model(pretrained, 'dla169')
return model
def set_bn(bn):
global BatchNorm
BatchNorm = bn
dla.BatchNorm = bn
class Identity(nn.Module):
def __init__(self):
super(Identity, self).__init__()
def forward(self, x):
return x
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
class IDAUp(nn.Module):
def __init__(self, node_kernel, out_dim, channels, up_factors):
super(IDAUp, self).__init__()
self.channels = channels
self.out_dim = out_dim
for i, c in enumerate(channels):
if c == out_dim:
proj = Identity()
else:
proj = nn.Sequential(
nn.Conv2d(c, out_dim,
kernel_size=1, stride=1, bias=False),
BatchNorm(out_dim),
nn.ReLU(inplace=True))
f = int(up_factors[i])
if f == 1:
up = Identity()
else:
up = nn.ConvTranspose2d(
out_dim, out_dim, f * 2, stride=f, padding=f // 2,
output_padding=0, groups=out_dim, bias=False)
fill_up_weights(up)
setattr(self, 'proj_' + str(i), proj)
setattr(self, 'up_' + str(i), up)
for i in range(1, len(channels)):
node = nn.Sequential(
nn.Conv2d(out_dim * 2, out_dim,
kernel_size=node_kernel, stride=1,
padding=node_kernel // 2, bias=False),
BatchNorm(out_dim),
nn.ReLU(inplace=True))
setattr(self, 'node_' + str(i), node)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, BatchNorm):
m.weight.data.fill_(1)
m.bias.data.zero_()
def forward(self, layers):
assert len(self.channels) == len(layers), \
'{} vs {} layers'.format(len(self.channels), len(layers))
layers = list(layers)
for i, l in enumerate(layers):
upsample = getattr(self, 'up_' + str(i))
project = getattr(self, 'proj_' + str(i))
layers[i] = upsample(project(l))
x = layers[0]
y = []
for i in range(1, len(layers)):
node = getattr(self, 'node_' + str(i))
x = node(torch.cat([x, layers[i]], 1))
y.append(x)
return x, y
class DLAUp(nn.Module):
def __init__(self, channels, scales=(1, 2, 4, 8, 16), in_channels=None):
super(DLAUp, self).__init__()
if in_channels is None:
in_channels = channels
self.channels = channels
channels = list(channels)
scales = np.array(scales, dtype=int)
for i in range(len(channels) - 1):
j = -i - 2
setattr(self, 'ida_{}'.format(i),
IDAUp(3, channels[j], in_channels[j:],
scales[j:] // scales[j]))
scales[j + 1:] = scales[j]
in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]
def forward(self, layers):
layers = list(layers)
assert len(layers) > 1
for i in range(len(layers) - 1):
ida = getattr(self, 'ida_{}'.format(i))
x, y = ida(layers[-i - 2:])
layers[-i - 1:] = y
return x
def fill_fc_weights(layers):
for m in layers.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, std=0.001)
# torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
# torch.nn.init.xavier_normal_(m.weight.data)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
class DLASeg(nn.Module):
def __init__(self, base_name, heads,
pretrained=True, down_ratio=4, head_conv=256):
super(DLASeg, self).__init__()
assert down_ratio in [2, 4, 8, 16]
self.heads = heads
self.first_level = int(np.log2(down_ratio))
self.base = globals()[base_name](
pretrained=pretrained, return_levels=True)
channels = self.base.channels
scales = [2 ** i for i in range(len(channels[self.first_level:]))]
self.dla_up = DLAUp(channels[self.first_level:], scales=scales)
'''
self.fc = nn.Sequential(
nn.Conv2d(channels[self.first_level], classes, kernel_size=1,
stride=1, padding=0, bias=True)
)
'''
for head in self.heads:
classes = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(channels[self.first_level], head_conv,
kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, classes,
kernel_size=1, stride=1,
padding=0, bias=True))
if 'hm' in head:
fc[-1].bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
else:
fc = nn.Conv2d(channels[self.first_level], classes,
kernel_size=1, stride=1,
padding=0, bias=True)
if 'hm' in head:
fc.bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
self.__setattr__(head, fc)
'''
up_factor = 2 ** self.first_level
if up_factor > 1:
up = nn.ConvTranspose2d(classes, classes, up_factor * 2,
stride=up_factor, padding=up_factor // 2,
output_padding=0, groups=classes,
bias=False)
fill_up_weights(up)
up.weight.requires_grad = False
else:
up = Identity()
self.up = up
self.softmax = nn.LogSoftmax(dim=1)
for m in self.fc.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, BatchNorm):
m.weight.data.fill_(1)
m.bias.data.zero_()
'''
def forward(self, x):
x = self.base(x)
x = self.dla_up(x[self.first_level:])
# x = self.fc(x)
# y = self.softmax(self.up(x))
ret = {}
for head in self.heads:
ret[head] = self.__getattr__(head)(x)
return [ret]
'''
def optim_parameters(self, memo=None):
for param in self.base.parameters():
yield param
for param in self.dla_up.parameters():
yield param
for param in self.fc.parameters():
yield param
'''
'''
def dla34up(classes, pretrained_base=None, **kwargs):
model = DLASeg('dla34', classes, pretrained_base=pretrained_base, **kwargs)
return model
def dla60up(classes, pretrained_base=None, **kwargs):
model = DLASeg('dla60', classes, pretrained_base=pretrained_base, **kwargs)
return model
def dla102up(classes, pretrained_base=None, **kwargs):
model = DLASeg('dla102', classes,
pretrained_base=pretrained_base, **kwargs)
return model
def dla169up(classes, pretrained_base=None, **kwargs):
model = DLASeg('dla169', classes,
pretrained_base=pretrained_base, **kwargs)
return model
'''
def get_pose_net(num_layers, heads, add_conv=256, down_ratio=4):
model = DLASeg('dla{}'.format(num_layers), heads,
pretrained=True,
down_ratio=down_ratio,
head_conv=head_conv)
return model
# ------------------------------------------------------------------------------
# This code is base on
# CornerNet (https://github.com/princeton-vl/CornerNet)
# Copyright (c) 2018, University of Michigan
# Licensed under the BSD 3-Clause License
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import torch
import torch.nn as nn
class convolution(nn.Module):
def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
super(convolution, self).__init__()
pad = (k - 1) // 2
self.conv = nn.Conv2d(inp_dim, out_dim, (k, k), padding=(pad, pad), stride=(stride, stride), bias=not with_bn)
self.bn = nn.BatchNorm2d(out_dim) if with_bn else nn.Sequential()
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
conv = self.conv(x)
bn = self.bn(conv)
relu = self.relu(bn)
return relu
class fully_connected(nn.Module):
def __init__(self, inp_dim, out_dim, with_bn=True):
super(fully_connected, self).__init__()
self.with_bn = with_bn
self.linear = nn.Linear(inp_dim, out_dim)
if self.with_bn:
self.bn = nn.BatchNorm1d(out_dim)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
linear = self.linear(x)
bn = self.bn(linear) if self.with_bn else linear
relu = self.relu(bn)
return relu
class residual(nn.Module):
def __init__(self, k, inp_dim, out_dim, stride=1, with_bn=True):
super(residual, self).__init__()
self.conv1 = nn.Conv2d(inp_dim, out_dim, (3, 3), padding=(1, 1), stride=(stride, stride), bias=False)
self.bn1 = nn.BatchNorm2d(out_dim)
self.relu1 = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(out_dim, out_dim, (3, 3), padding=(1, 1), bias=False)
self.bn2 = nn.BatchNorm2d(out_dim)
self.skip = nn.Sequential(
nn.Conv2d(inp_dim, out_dim, (1, 1), stride=(stride, stride), bias=False),
nn.BatchNorm2d(out_dim)
) if stride != 1 or inp_dim != out_dim else nn.Sequential()
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
conv1 = self.conv1(x)
bn1 = self.bn1(conv1)
relu1 = self.relu1(bn1)
conv2 = self.conv2(relu1)
bn2 = self.bn2(conv2)
skip = self.skip(x)
return self.relu(bn2 + skip)
def make_layer(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
layers = [layer(k, inp_dim, out_dim, **kwargs)]
for _ in range(1, modules):
layers.append(layer(k, out_dim, out_dim, **kwargs))
return nn.Sequential(*layers)
def make_layer_revr(k, inp_dim, out_dim, modules, layer=convolution, **kwargs):
layers = []
for _ in range(modules - 1):
layers.append(layer(k, inp_dim, inp_dim, **kwargs))
layers.append(layer(k, inp_dim, out_dim, **kwargs))
return nn.Sequential(*layers)
class MergeUp(nn.Module):
def forward(self, up1, up2):
return up1 + up2
def make_merge_layer(dim):
return MergeUp()
# def make_pool_layer(dim):
# return nn.MaxPool2d(kernel_size=2, stride=2)
def make_pool_layer(dim):
return nn.Sequential()
def make_unpool_layer(dim):
return nn.Upsample(scale_factor=2)
def make_kp_layer(cnv_dim, curr_dim, out_dim):
return nn.Sequential(
convolution(3, cnv_dim, curr_dim, with_bn=False),
nn.Conv2d(curr_dim, out_dim, (1, 1))
)
def make_inter_layer(dim):
return residual(3, dim, dim)
def make_cnv_layer(inp_dim, out_dim):
return convolution(3, inp_dim, out_dim)
class kp_module(nn.Module):
def __init__(
self, n, dims, modules, layer=residual,
make_up_layer=make_layer, make_low_layer=make_layer,
make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
make_merge_layer=make_merge_layer, **kwargs
):
super(kp_module, self).__init__()
self.n = n
curr_mod = modules[0]
next_mod = modules[1]
curr_dim = dims[0]
next_dim = dims[1]
self.up1 = make_up_layer(
3, curr_dim, curr_dim, curr_mod,
layer=layer, **kwargs
)
self.max1 = make_pool_layer(curr_dim)
self.low1 = make_hg_layer(
3, curr_dim, next_dim, curr_mod,
layer=layer, **kwargs
)
self.low2 = kp_module(
n - 1, dims[1:], modules[1:], layer=layer,
make_up_layer=make_up_layer,
make_low_layer=make_low_layer,
make_hg_layer=make_hg_layer,
make_hg_layer_revr=make_hg_layer_revr,
make_pool_layer=make_pool_layer,
make_unpool_layer=make_unpool_layer,
make_merge_layer=make_merge_layer,
**kwargs
) if self.n > 1 else \
make_low_layer(
3, next_dim, next_dim, next_mod,
layer=layer, **kwargs
)
self.low3 = make_hg_layer_revr(
3, next_dim, curr_dim, curr_mod,
layer=layer, **kwargs
)
self.up2 = make_unpool_layer(curr_dim)
self.merge = make_merge_layer(curr_dim)
def forward(self, x):
up1 = self.up1(x)
max1 = self.max1(x)
low1 = self.low1(max1)
low2 = self.low2(low1)
low3 = self.low3(low2)
up2 = self.up2(low3)
return self.merge(up1, up2)
class exkp(nn.Module):
def __init__(
self, n, nstack, dims, modules, heads, pre=None, cnv_dim=256,
make_tl_layer=None, make_br_layer=None,
make_cnv_layer=make_cnv_layer, make_heat_layer=make_kp_layer,
make_tag_layer=make_kp_layer, make_regr_layer=make_kp_layer,
make_up_layer=make_layer, make_low_layer=make_layer,
make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr,
make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer,
make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer,
kp_layer=residual
):
super(exkp, self).__init__()
self.nstack = nstack
self.heads = heads
curr_dim = dims[0]
self.pre = nn.Sequential(
convolution(7, 3, 128, stride=2),
residual(3, 128, 256, stride=2)
) if pre is None else pre
self.kps = nn.ModuleList([
kp_module(
n, dims, modules, layer=kp_layer,
make_up_layer=make_up_layer,
make_low_layer=make_low_layer,
make_hg_layer=make_hg_layer,
make_hg_layer_revr=make_hg_layer_revr,
make_pool_layer=make_pool_layer,
make_unpool_layer=make_unpool_layer,
make_merge_layer=make_merge_layer
) for _ in range(nstack)
])
self.cnvs = nn.ModuleList([
make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack)
])
self.inters = nn.ModuleList([
make_inter_layer(curr_dim) for _ in range(nstack - 1)
])
self.inters_ = nn.ModuleList([
nn.Sequential(
nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False),
nn.BatchNorm2d(curr_dim)
) for _ in range(nstack - 1)
])
self.cnvs_ = nn.ModuleList([
nn.Sequential(
nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False),
nn.BatchNorm2d(curr_dim)
) for _ in range(nstack - 1)
])
## keypoint heatmaps
for head in heads.keys():
if 'hm' in head:
module = nn.ModuleList([
make_heat_layer(
cnv_dim, curr_dim, heads[head]) for _ in range(nstack)
])
self.__setattr__(head, module)
for heat in self.__getattr__(head):
heat[-1].bias.data.fill_(-2.19)
else:
module = nn.ModuleList([
make_regr_layer(
cnv_dim, curr_dim, heads[head]) for _ in range(nstack)
])
self.__setattr__(head, module)
self.relu = nn.ReLU(inplace=True)
def forward(self, image):
# print('image shape', image.shape)
inter = self.pre(image)
outs = []
for ind in range(self.nstack):
kp_, cnv_ = self.kps[ind], self.cnvs[ind]
kp = kp_(inter)
cnv = cnv_(kp)
out = {}
for head in self.heads:
layer = self.__getattr__(head)[ind]
y = layer(cnv)
out[head] = y
outs.append(out)
if ind < self.nstack - 1:
inter = self.inters_[ind](inter) + self.cnvs_[ind](cnv)
inter = self.relu(inter)
inter = self.inters[ind](inter)
return outs
def make_hg_layer(kernel, dim0, dim1, mod, layer=convolution, **kwargs):
layers = [layer(kernel, dim0, dim1, stride=2)]
layers += [layer(kernel, dim1, dim1) for _ in range(mod - 1)]
return nn.Sequential(*layers)
class HourglassNet(exkp):
def __init__(self, heads, num_stacks=2):
n = 5
dims = [256, 256, 384, 384, 384, 512]
modules = [2, 2, 2, 2, 2, 4]
super(HourglassNet, self).__init__(
n, num_stacks, dims, modules, heads,
make_tl_layer=None,
make_br_layer=None,
make_pool_layer=make_pool_layer,
make_hg_layer=make_hg_layer,
kp_layer=residual, cnv_dim=256
)
def get_large_hourglass_net(num_layers, heads, head_conv):
model = HourglassNet(heads, 2)
return model
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# Modified by Xingyi Zhou
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import torch
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
BN_MOMENTUM = 0.1
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion,
momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class PoseResNet(nn.Module):
def __init__(self, block, layers, heads, head_conv, **kwargs):
self.inplanes = 64
self.deconv_with_bias = False
self.heads = heads
super(PoseResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
# used for deconv layers
self.deconv_layers = self._make_deconv_layer(
3,
[256, 256, 256],
[4, 4, 4],
)
# self.final_layer = []
for head in sorted(self.heads):
num_output = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(256, head_conv,
kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, num_output,
kernel_size=1, stride=1, padding=0))
else:
fc = nn.Conv2d(
in_channels=256,
out_channels=num_output,
kernel_size=1,
stride=1,
padding=0
)
self.__setattr__(head, fc)
# self.final_layer = nn.ModuleList(self.final_layer)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def _get_deconv_cfg(self, deconv_kernel, index):
if deconv_kernel == 4:
padding = 1
output_padding = 0
elif deconv_kernel == 3:
padding = 1
output_padding = 1
elif deconv_kernel == 2:
padding = 0
output_padding = 0
return deconv_kernel, padding, output_padding
def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
assert num_layers == len(num_filters), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
assert num_layers == len(num_kernels), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
layers = []
for i in range(num_layers):
kernel, padding, output_padding = \
self._get_deconv_cfg(num_kernels[i], i)
planes = num_filters[i]
layers.append(
nn.ConvTranspose2d(
in_channels=self.inplanes,
out_channels=planes,
kernel_size=kernel,
stride=2,
padding=padding,
output_padding=output_padding,
bias=self.deconv_with_bias))
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
layers.append(nn.ReLU(inplace=True))
self.inplanes = planes
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.deconv_layers(x)
ret = {}
for head in self.heads:
ret[head] = self.__getattr__(head)(x)
return [ret]
def init_weights(self, num_layers, pretrained=True):
if pretrained:
# print('=> init resnet deconv weights from normal distribution')
for _, m in self.deconv_layers.named_modules():
if isinstance(m, nn.ConvTranspose2d):
# print('=> init {}.weight as normal(0, 0.001)'.format(name))
# print('=> init {}.bias as 0'.format(name))
nn.init.normal_(m.weight, std=0.001)
if self.deconv_with_bias:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
# print('=> init {}.weight as 1'.format(name))
# print('=> init {}.bias as 0'.format(name))
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
# print('=> init final conv weights from normal distribution')
for head in self.heads:
final_layer = self.__getattr__(head)
for i, m in enumerate(final_layer.modules()):
if isinstance(m, nn.Conv2d):
# nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
# print('=> init {}.weight as normal(0, 0.001)'.format(name))
# print('=> init {}.bias as 0'.format(name))
if m.weight.shape[0] == self.heads[head]:
if 'hm' in head:
nn.init.constant_(m.bias, -2.19)
else:
nn.init.normal_(m.weight, std=0.001)
nn.init.constant_(m.bias, 0)
#pretrained_state_dict = torch.load(pretrained)
url = model_urls['resnet{}'.format(num_layers)]
pretrained_state_dict = model_zoo.load_url(url)
print('=> loading pretrained model {}'.format(url))
self.load_state_dict(pretrained_state_dict, strict=False)
else:
print('=> imagenet pretrained model dose not exist')
print('=> please download it first')
raise ValueError('imagenet pretrained model does not exist')
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
34: (BasicBlock, [3, 4, 6, 3]),
50: (Bottleneck, [3, 4, 6, 3]),
101: (Bottleneck, [3, 4, 23, 3]),
152: (Bottleneck, [3, 8, 36, 3])}
def get_pose_net(num_layers, heads, head_conv):
block_class, layers = resnet_spec[num_layers]
model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
model.init_weights(num_layers, pretrained=True)
return model
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import logging
import numpy as np
from os.path import join
import torch
from torch import nn
import torch.nn.functional as F
import torch.utils.model_zoo as model_zoo
from .DCNv2.dcn_v2 import DCN
BN_MOMENTUM = 0.1
logger = logging.getLogger(__name__)
def get_model_url(data='imagenet', name='dla34', hash='ba72cf86'):
return join('http://dl.yf.io/dla/models', data, '{}-{}.pth'.format(name, hash))
def conv3x3(in_planes, out_planes, stride=1):
"3x3 convolution with padding"
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3,
stride=stride, padding=dilation,
bias=False, dilation=dilation)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=dilation,
bias=False, dilation=dilation)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 2
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(Bottleneck, self).__init__()
expansion = Bottleneck.expansion
bottle_planes = planes // expansion
self.conv1 = nn.Conv2d(inplanes, bottle_planes,
kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
stride=stride, padding=dilation,
bias=False, dilation=dilation)
self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(bottle_planes, planes,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class BottleneckX(nn.Module):
expansion = 2
cardinality = 32
def __init__(self, inplanes, planes, stride=1, dilation=1):
super(BottleneckX, self).__init__()
cardinality = BottleneckX.cardinality
# dim = int(math.floor(planes * (BottleneckV5.expansion / 64.0)))
# bottle_planes = dim * cardinality
bottle_planes = planes * cardinality // 32
self.conv1 = nn.Conv2d(inplanes, bottle_planes,
kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(bottle_planes, bottle_planes, kernel_size=3,
stride=stride, padding=dilation, bias=False,
dilation=dilation, groups=cardinality)
self.bn2 = nn.BatchNorm2d(bottle_planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(bottle_planes, planes,
kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.stride = stride
def forward(self, x, residual=None):
if residual is None:
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += residual
out = self.relu(out)
return out
class Root(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size, residual):
super(Root, self).__init__()
self.conv = nn.Conv2d(
in_channels, out_channels, 1,
stride=1, bias=False, padding=(kernel_size - 1) // 2)
self.bn = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.residual = residual
def forward(self, *x):
children = x
x = self.conv(torch.cat(x, 1))
x = self.bn(x)
if self.residual:
x += children[0]
x = self.relu(x)
return x
class Tree(nn.Module):
def __init__(self, levels, block, in_channels, out_channels, stride=1,
level_root=False, root_dim=0, root_kernel_size=1,
dilation=1, root_residual=False):
super(Tree, self).__init__()
if root_dim == 0:
root_dim = 2 * out_channels
if level_root:
root_dim += in_channels
if levels == 1:
self.tree1 = block(in_channels, out_channels, stride,
dilation=dilation)
self.tree2 = block(out_channels, out_channels, 1,
dilation=dilation)
else:
self.tree1 = Tree(levels - 1, block, in_channels, out_channels,
stride, root_dim=0,
root_kernel_size=root_kernel_size,
dilation=dilation, root_residual=root_residual)
self.tree2 = Tree(levels - 1, block, out_channels, out_channels,
root_dim=root_dim + out_channels,
root_kernel_size=root_kernel_size,
dilation=dilation, root_residual=root_residual)
if levels == 1:
self.root = Root(root_dim, out_channels, root_kernel_size,
root_residual)
self.level_root = level_root
self.root_dim = root_dim
self.downsample = None
self.project = None
self.levels = levels
if stride > 1:
self.downsample = nn.MaxPool2d(stride, stride=stride)
if in_channels != out_channels:
self.project = nn.Sequential(
nn.Conv2d(in_channels, out_channels,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
)
def forward(self, x, residual=None, children=None):
children = [] if children is None else children
bottom = self.downsample(x) if self.downsample else x
residual = self.project(bottom) if self.project else bottom
if self.level_root:
children.append(bottom)
x1 = self.tree1(x, residual)
if self.levels == 1:
x2 = self.tree2(x1)
x = self.root(x2, x1, *children)
else:
children.append(x1)
x = self.tree2(x1, children=children)
return x
class DLA(nn.Module):
def __init__(self, levels, channels, num_classes=1000,
block=BasicBlock, residual_root=False, linear_root=False):
super(DLA, self).__init__()
self.channels = channels
self.num_classes = num_classes
self.base_layer = nn.Sequential(
nn.Conv2d(3, channels[0], kernel_size=7, stride=1,
padding=3, bias=False),
nn.BatchNorm2d(channels[0], momentum=BN_MOMENTUM),
nn.ReLU(inplace=True))
self.level0 = self._make_conv_level(
channels[0], channels[0], levels[0])
self.level1 = self._make_conv_level(
channels[0], channels[1], levels[1], stride=2)
self.level2 = Tree(levels[2], block, channels[1], channels[2], 2,
level_root=False,
root_residual=residual_root)
self.level3 = Tree(levels[3], block, channels[2], channels[3], 2,
level_root=True, root_residual=residual_root)
self.level4 = Tree(levels[4], block, channels[3], channels[4], 2,
level_root=True, root_residual=residual_root)
self.level5 = Tree(levels[5], block, channels[4], channels[5], 2,
level_root=True, root_residual=residual_root)
# for m in self.modules():
# if isinstance(m, nn.Conv2d):
# n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
# m.weight.data.normal_(0, math.sqrt(2. / n))
# elif isinstance(m, nn.BatchNorm2d):
# m.weight.data.fill_(1)
# m.bias.data.zero_()
def _make_level(self, block, inplanes, planes, blocks, stride=1):
downsample = None
if stride != 1 or inplanes != planes:
downsample = nn.Sequential(
nn.MaxPool2d(stride, stride=stride),
nn.Conv2d(inplanes, planes,
kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(inplanes, planes, stride, downsample=downsample))
for i in range(1, blocks):
layers.append(block(inplanes, planes))
return nn.Sequential(*layers)
def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
modules = []
for i in range(convs):
modules.extend([
nn.Conv2d(inplanes, planes, kernel_size=3,
stride=stride if i == 0 else 1,
padding=dilation, bias=False, dilation=dilation),
nn.BatchNorm2d(planes, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)])
inplanes = planes
return nn.Sequential(*modules)
def forward(self, x):
y = []
x = self.base_layer(x)
for i in range(6):
x = getattr(self, 'level{}'.format(i))(x)
y.append(x)
return y
def load_pretrained_model(self, data='imagenet', name='dla34', hash='ba72cf86'):
# fc = self.fc
if name.endswith('.pth'):
model_weights = torch.load(data + name)
else:
model_url = get_model_url(data, name, hash)
model_weights = model_zoo.load_url(model_url)
num_classes = len(model_weights[list(model_weights.keys())[-1]])
self.fc = nn.Conv2d(
self.channels[-1], num_classes,
kernel_size=1, stride=1, padding=0, bias=True)
self.load_state_dict(model_weights)
# self.fc = fc
def dla34(pretrained=True, **kwargs): # DLA-34
model = DLA([1, 1, 1, 2, 2, 1],
[16, 32, 64, 128, 256, 512],
block=BasicBlock, **kwargs)
if pretrained:
model.load_pretrained_model(data='imagenet', name='dla34', hash='ba72cf86')
return model
class Identity(nn.Module):
def __init__(self):
super(Identity, self).__init__()
def forward(self, x):
return x
def fill_fc_weights(layers):
for m in layers.modules():
if isinstance(m, nn.Conv2d):
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
class DeformConv(nn.Module):
def __init__(self, chi, cho):
super(DeformConv, self).__init__()
self.actf = nn.Sequential(
nn.BatchNorm2d(cho, momentum=BN_MOMENTUM),
nn.ReLU(inplace=True)
)
self.conv = DCN(chi, cho, kernel_size=(3,3), stride=1, padding=1, dilation=1, deformable_groups=1)
def forward(self, x):
x = self.conv(x)
x = self.actf(x)
return x
class IDAUp(nn.Module):
def __init__(self, o, channels, up_f):
super(IDAUp, self).__init__()
for i in range(1, len(channels)):
c = channels[i]
f = int(up_f[i])
proj = DeformConv(c, o)
node = DeformConv(o, o)
up = nn.ConvTranspose2d(o, o, f * 2, stride=f,
padding=f // 2, output_padding=0,
groups=o, bias=False)
fill_up_weights(up)
setattr(self, 'proj_' + str(i), proj)
setattr(self, 'up_' + str(i), up)
setattr(self, 'node_' + str(i), node)
def forward(self, layers, startp, endp):
for i in range(startp + 1, endp):
upsample = getattr(self, 'up_' + str(i - startp))
project = getattr(self, 'proj_' + str(i - startp))
layers[i] = upsample(project(layers[i]))
node = getattr(self, 'node_' + str(i - startp))
layers[i] = node(layers[i] + layers[i - 1])
class DLAUp(nn.Module):
def __init__(self, startp, channels, scales, in_channels=None):
super(DLAUp, self).__init__()
self.startp = startp
if in_channels is None:
in_channels = channels
self.channels = channels
channels = list(channels)
scales = np.array(scales, dtype=int)
for i in range(len(channels) - 1):
j = -i - 2
setattr(self, 'ida_{}'.format(i),
IDAUp(channels[j], in_channels[j:],
scales[j:] // scales[j]))
scales[j + 1:] = scales[j]
in_channels[j + 1:] = [channels[j] for _ in channels[j + 1:]]
def forward(self, layers):
out = [layers[-1]] # start with 16 * 16
for i in range(len(layers) - self.startp - 1):
ida = getattr(self, 'ida_{}'.format(i))
ida(layers, len(layers) -i - 2, len(layers))
out.insert(0, layers[-1])
return out
class Interpolate(nn.Module):
def __init__(self, scale, mode):
super(Interpolate, self).__init__()
self.scale = scale
self.mode = mode
def forward(self, x):
x = F.interpolate(x, scale_factor=self.scale, mode=self.mode, align_corners=False)
return x
class DLASeg(nn.Module):
def __init__(self, base_name, heads, pretrained, down_ratio, final_kernel,
last_level, head_conv, out_channel=0):
super(DLASeg, self).__init__()
assert down_ratio in [2, 4, 8, 16]
self.first_level = int(np.log2(down_ratio))
self.last_level = last_level
self.base = globals()[base_name](pretrained=pretrained)
channels = self.base.channels
scales = [2 ** i for i in range(len(channels[self.first_level:]))]
self.dla_up = DLAUp(self.first_level, channels[self.first_level:], scales)
if out_channel == 0:
out_channel = channels[self.first_level]
self.ida_up = IDAUp(out_channel, channels[self.first_level:self.last_level],
[2 ** i for i in range(self.last_level - self.first_level)])
self.heads = heads
for head in self.heads:
classes = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(channels[self.first_level], head_conv,
kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, classes,
kernel_size=final_kernel, stride=1,
padding=final_kernel // 2, bias=True))
if 'hm' in head:
fc[-1].bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
else:
fc = nn.Conv2d(channels[self.first_level], classes,
kernel_size=final_kernel, stride=1,
padding=final_kernel // 2, bias=True)
if 'hm' in head:
fc.bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
self.__setattr__(head, fc)
def forward(self, x):
x = self.base(x)
x = self.dla_up(x)
y = []
for i in range(self.last_level - self.first_level):
y.append(x[i].clone())
self.ida_up(y, 0, len(y))
z = {}
for head in self.heads:
z[head] = self.__getattr__(head)(y[-1])
return [z]
def get_pose_net(num_layers, heads, head_conv=256, down_ratio=4):
model = DLASeg('dla{}'.format(num_layers), heads,
pretrained=True,
down_ratio=down_ratio,
final_kernel=1,
last_level=5,
head_conv=head_conv)
return model
# ------------------------------------------------------------------------------
# Copyright (c) Microsoft
# Licensed under the MIT License.
# Written by Bin Xiao (Bin.Xiao@microsoft.com)
# Modified by Dequan Wang and Xingyi Zhou
# ------------------------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import math
import logging
import torch
import torch.nn as nn
from .DCNv2.dcn_v2 import DCN
import torch.utils.model_zoo as model_zoo
BN_MOMENTUM = 0.1
logger = logging.getLogger(__name__)
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}
def conv3x3(in_planes, out_planes, stride=1):
"""3x3 convolution with padding"""
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
padding=1, bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(inplanes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class Bottleneck(nn.Module):
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
bias=False)
self.bn3 = nn.BatchNorm2d(planes * self.expansion,
momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
def fill_up_weights(up):
w = up.weight.data
f = math.ceil(w.size(2) / 2)
c = (2 * f - 1 - f % 2) / (2. * f)
for i in range(w.size(2)):
for j in range(w.size(3)):
w[0, 0, i, j] = \
(1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
for c in range(1, w.size(0)):
w[c, 0, :, :] = w[0, 0, :, :]
def fill_fc_weights(layers):
for m in layers.modules():
if isinstance(m, nn.Conv2d):
nn.init.normal_(m.weight, std=0.001)
# torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
# torch.nn.init.xavier_normal_(m.weight.data)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
class PoseResNet(nn.Module):
def __init__(self, block, layers, heads, head_conv):
self.inplanes = 64
self.heads = heads
self.deconv_with_bias = False
super(PoseResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
# used for deconv layers
self.deconv_layers = self._make_deconv_layer(
3,
[256, 128, 64],
[4, 4, 4],
)
for head in self.heads:
classes = self.heads[head]
if head_conv > 0:
fc = nn.Sequential(
nn.Conv2d(64, head_conv,
kernel_size=3, padding=1, bias=True),
nn.ReLU(inplace=True),
nn.Conv2d(head_conv, classes,
kernel_size=1, stride=1,
padding=0, bias=True))
if 'hm' in head:
fc[-1].bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
else:
fc = nn.Conv2d(64, classes,
kernel_size=1, stride=1,
padding=0, bias=True)
if 'hm' in head:
fc.bias.data.fill_(-2.19)
else:
fill_fc_weights(fc)
self.__setattr__(head, fc)
def _make_layer(self, block, planes, blocks, stride=1):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
)
layers = []
layers.append(block(self.inplanes, planes, stride, downsample))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes))
return nn.Sequential(*layers)
def _get_deconv_cfg(self, deconv_kernel, index):
if deconv_kernel == 4:
padding = 1
output_padding = 0
elif deconv_kernel == 3:
padding = 1
output_padding = 1
elif deconv_kernel == 2:
padding = 0
output_padding = 0
return deconv_kernel, padding, output_padding
def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
assert num_layers == len(num_filters), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
assert num_layers == len(num_kernels), \
'ERROR: num_deconv_layers is different len(num_deconv_filters)'
layers = []
for i in range(num_layers):
kernel, padding, output_padding = \
self._get_deconv_cfg(num_kernels[i], i)
planes = num_filters[i]
fc = DCN(self.inplanes, planes,
kernel_size=(3,3), stride=1,
padding=1, dilation=1, deformable_groups=1)
# fc = nn.Conv2d(self.inplanes, planes,
# kernel_size=3, stride=1,
# padding=1, dilation=1, bias=False)
# fill_fc_weights(fc)
up = nn.ConvTranspose2d(
in_channels=planes,
out_channels=planes,
kernel_size=kernel,
stride=2,
padding=padding,
output_padding=output_padding,
bias=self.deconv_with_bias)
fill_up_weights(up)
layers.append(fc)
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
layers.append(nn.ReLU(inplace=True))
layers.append(up)
layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
layers.append(nn.ReLU(inplace=True))
self.inplanes = planes
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.deconv_layers(x)
ret = {}
for head in self.heads:
ret[head] = self.__getattr__(head)(x)
return [ret]
def init_weights(self, num_layers):
if 1:
url = model_urls['resnet{}'.format(num_layers)]
pretrained_state_dict = model_zoo.load_url(url)
print('=> loading pretrained model {}'.format(url))
self.load_state_dict(pretrained_state_dict, strict=False)
print('=> init deconv weights from normal distribution')
for name, m in self.deconv_layers.named_modules():
if isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
resnet_spec = {18: (BasicBlock, [2, 2, 2, 2]),
34: (BasicBlock, [3, 4, 6, 3]),
50: (Bottleneck, [3, 4, 6, 3]),
101: (Bottleneck, [3, 4, 23, 3]),
152: (Bottleneck, [3, 8, 36, 3])}
def get_pose_net(num_layers, heads, head_conv=256):
block_class, layers = resnet_spec[num_layers]
model = PoseResNet(block_class, layers, heads, head_conv=head_conv)
model.init_weights(num_layers)
return model
import torch
from torch.autograd import Variable
from torch.nn.parallel._functions import Scatter, Gather
def scatter(inputs, target_gpus, dim=0, chunk_sizes=None):
r"""
Slices variables into approximately equal chunks and
distributes them across given GPUs. Duplicates
references to objects that are not variables. Does not
support Tensors.
"""
def scatter_map(obj):
if isinstance(obj, Variable):
return Scatter.apply(target_gpus, chunk_sizes, dim, obj)
assert not torch.is_tensor(obj), "Tensors not supported in scatter."
if isinstance(obj, tuple):
return list(zip(*map(scatter_map, obj)))
if isinstance(obj, list):
return list(map(list, zip(*map(scatter_map, obj))))
if isinstance(obj, dict):
return list(map(type(obj), zip(*map(scatter_map, obj.items()))))
return [obj for targets in target_gpus]
return scatter_map(inputs)
def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None):
r"""Scatter with support for kwargs dictionary"""
inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else []
kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else []
if len(inputs) < len(kwargs):
inputs.extend([() for _ in range(len(kwargs) - len(inputs))])
elif len(kwargs) < len(inputs):
kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))])
inputs = tuple(inputs)
kwargs = tuple(kwargs)
return inputs, kwargs
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
def _sigmoid(x):
y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
return y
def _gather_feat(feat, ind, mask=None):
dim = feat.size(2)
ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim)
feat = feat.gather(1, ind)
if mask is not None:
mask = mask.unsqueeze(2).expand_as(feat)
feat = feat[mask]
feat = feat.view(-1, dim)
return feat
def _tranpose_and_gather_feat(feat, ind):
feat = feat.permute(0, 2, 3, 1).contiguous()
feat = feat.view(feat.size(0), -1, feat.size(3))
feat = _gather_feat(feat, ind)
return feat
def flip_tensor(x):
return torch.flip(x, [3])
# tmp = x.detach().cpu().numpy()[..., ::-1].copy()
# return torch.from_numpy(tmp).to(x.device)
def flip_lr(x, flip_idx):
tmp = x.detach().cpu().numpy()[..., ::-1].copy()
shape = tmp.shape
for e in flip_idx:
tmp[:, e[0], ...], tmp[:, e[1], ...] = \
tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
return torch.from_numpy(tmp.reshape(shape)).to(x.device)
def flip_lr_off(x, flip_idx):
tmp = x.detach().cpu().numpy()[..., ::-1].copy()
shape = tmp.shape
tmp = tmp.reshape(tmp.shape[0], 17, 2,
tmp.shape[2], tmp.shape[3])
tmp[:, :, 0, :, :] *= -1
for e in flip_idx:
tmp[:, e[0], ...], tmp[:, e[1], ...] = \
tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy()
return torch.from_numpy(tmp.reshape(shape)).to(x.device)
\ No newline at end of file
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import os
import sys
class opts(object):
def __init__(self):
self.parser = argparse.ArgumentParser()
# basic experiment setting
self.parser.add_argument('--task', default='multi_pose',
help='ctdet | ddd | multi_pose | exdet')
self.parser.add_argument('--dataset', default='facehp',
help='coco | kitti | coco_hp | pascal | pig | face | facehp')
self.parser.add_argument('--exp_id', default='dla')
self.parser.add_argument('--test', action='store_true')
self.parser.add_argument('--debug', type=int, default=0,
help='level of visualization.'
'-1: return the result image'
'1: only show the final detection results'
'2: show the network output features'
'3: use matplot to display' # useful when lunching training with ipython notebook
'4: save all visualizations to disk')
self.parser.add_argument('--demo', default='/path/WIDER_train/images/0--Parade/0_Parade_marchingband_1_80.jpg',
help='path to image/ image folders/ video. '
'or "webcam"')
self.parser.add_argument(
'--data_dir', type=str, default='')
self.parser.add_argument('--load_model', default='',
help='path to pretrained model')
self.parser.add_argument('--resume', action='store_true',
help='resume an experiment. '
'Reloaded the optimizer parameter and '
'set load_model to model_last.pth '
'in the exp dir if load_model is empty.')
self.parser.add_argument(
'--train_json', default=None, help='the train file labels')
self.parser.add_argument(
'--val_json', default=None, help='the test file labels')
self.parser.add_argument(
'--output_video', type=str, default='../output/res_3.mp4')
# system
self.parser.add_argument('--gpus', default='0',
help='-1 for CPU, use comma for multiple gpus, such as 1,2,3')
self.parser.add_argument('--num_workers', type=int, default=4,
help='dataloader threads. 0 for single-thread.')
self.parser.add_argument('--not_cuda_benchmark', action='store_true',
help='disable when the input size is not fixed.')
self.parser.add_argument('--seed', type=int, default=317,
help='random seed') # from CornerNet
# log
self.parser.add_argument('--print_iter', type=int, default=0,
help='disable progress bar and print to screen.')
self.parser.add_argument('--hide_data_time', action='store_true',
help='not display time during training.')
self.parser.add_argument('--save_all', action='store_true', default=True,
help='save model to disk every 5 epochs.')
self.parser.add_argument('--metric', default='loss',
help='main metric to save best model')
self.parser.add_argument('--vis_thresh', type=float, default=0.4,
help='visualization threshold.')
self.parser.add_argument('--debugger_theme', default='white',
choices=['white', 'black'])
# model
self.parser.add_argument('--arch', default='mobilev2_10',
help='model architecture. Currently tested'
'res_18 | res_101 | resdcn_18 | resdcn_101 |'
'dlav0_34 | dla_34 | hourglass | mobilev2_10')
self.parser.add_argument('--head_conv', type=int, default=-1,
help='conv layer channels for output head'
'0 for no conv layer'
'-1 for default setting'
'64 for resnets and 256 for dla.')
self.parser.add_argument('--down_ratio', type=int, default=4,
help='output stride. Currently only supports 4.')
# input
self.parser.add_argument('--input_res', type=int, default=-1,
help='input height and width. -1 for default from '
'dataset. Will be overriden by input_h | input_w')
self.parser.add_argument('--input_h', type=int, default=-1,
help='input height. -1 for default from dataset.')
self.parser.add_argument('--input_w', type=int, default=-1,
help='input width. -1 for default from dataset.')
# train
self.parser.add_argument('--lr', type=float, default=1.25e-4,
help='learning rate for batch size 32.')
self.parser.add_argument('--lr_step', type=str, default='30,80',
help='drop learning rate by 10.')
self.parser.add_argument('--num_epochs', type=int, default=140,
help='total training epochs.')
self.parser.add_argument('--batch_size', type=int, default=32,
help='batch size')
self.parser.add_argument('--master_batch_size', type=int, default=15,
help='batch size on the master gpu.')
self.parser.add_argument('--num_iters', type=int, default=-1,
help='default: #samples / batch_size.')
self.parser.add_argument('--val_intervals', type=int, default=5,
help='number of epochs to run validation.')
self.parser.add_argument('--trainval', action='store_true',
help='include validation in training and '
'test on test set')
# test
self.parser.add_argument('--flip_test', action='store_true',
help='flip data augmentation.')
self.parser.add_argument('--test_scales', type=str, default='1',
help='multi scale test augmentation.')
self.parser.add_argument('--nms', action='store_true',
help='run nms in testing.')
self.parser.add_argument('--K', type=int, default=200,
help='max number of output objects.')
self.parser.add_argument('--not_prefetch_test', action='store_true',
help='not use parallal data pre-processing.')
self.parser.add_argument('--fix_res', action='store_true',
help='fix testing resolution or keep '
'the original resolution')
self.parser.add_argument('--keep_res', action='store_true',
help='keep the original resolution'
' during validation.')
# dataset
self.parser.add_argument('--not_rand_crop', action='store_true',
help='not use the random crop data augmentation'
'from CornerNet.')
self.parser.add_argument('--shift', type=float, default=0.1,
help='when not using random crop'
'apply shift augmentation.')
self.parser.add_argument('--scale', type=float, default=0.4,
help='when not using random crop'
'apply scale augmentation.')
self.parser.add_argument('--rotate', type=float, default=0,
help='when not using random crop'
'apply rotation augmentation.')
self.parser.add_argument('--flip', type=float, default=0.5,
help='probability of applying flip augmentation.')
self.parser.add_argument('--no_color_aug', action='store_true',
help='not use the color augmenation '
'from CornerNet')
# multi_pose
self.parser.add_argument('--aug_rot', type=float, default=0,
help='probability of applying '
'rotation augmentation.')
# ddd
self.parser.add_argument('--aug_ddd', type=float, default=0.5,
help='probability of applying crop augmentation.')
self.parser.add_argument('--rect_mask', action='store_true',
help='for ignored object, apply mask on the '
'rectangular region or just center point.')
self.parser.add_argument('--kitti_split', default='3dop',
help='different validation split for kitti: '
'3dop | subcnn')
# loss
self.parser.add_argument('--mse_loss', action='store_true',
help='use mse loss or focal loss to train '
'keypoint heatmaps.')
# ctdet
self.parser.add_argument('--reg_loss', default='sl1',
help='regression loss: sl1 | l1 | l2')
self.parser.add_argument('--hm_weight', type=float, default=1,
help='loss weight for keypoint heatmaps.')
self.parser.add_argument('--off_weight', type=float, default=1,
help='loss weight for keypoint local offsets.')
self.parser.add_argument('--wh_weight', type=float, default=0.1,
help='loss weight for bounding box size.')
# multi_pose
self.parser.add_argument('--lm_weight', type=float, default=0.1,
help='loss weight for human pose offset.')
self.parser.add_argument('--hm_hp_weight', type=float, default=1,
help='loss weight for human keypoint heatmap.')
# ddd
self.parser.add_argument('--dep_weight', type=float, default=1,
help='loss weight for depth.')
self.parser.add_argument('--dim_weight', type=float, default=1,
help='loss weight for 3d bounding box size.')
self.parser.add_argument('--rot_weight', type=float, default=1,
help='loss weight for orientation.')
self.parser.add_argument('--peak_thresh', type=float, default=0.2)
# task
# ctdet
self.parser.add_argument('--norm_wh', action='store_true',
help='L1(\hat(y) / y, 1) or L1(\hat(y), y)')
self.parser.add_argument('--dense_wh', action='store_true',
help='apply weighted regression near center or '
'just apply regression on center point.')
self.parser.add_argument('--cat_spec_wh', action='store_true',
help='category specific bounding box size.')
self.parser.add_argument('--not_reg_offset', action='store_true',
help='not regress local offset.')
# exdet
self.parser.add_argument('--agnostic_ex', action='store_true',
help='use category agnostic extreme points.')
self.parser.add_argument('--scores_thresh', type=float, default=0.1,
help='threshold for extreme point heatmap.')
self.parser.add_argument('--center_thresh', type=float, default=0.1,
help='threshold for centermap.')
self.parser.add_argument('--aggr_weight', type=float, default=0.0,
help='edge aggregation weight.')
# multi_pose
self.parser.add_argument('--dense_hp', action='store_true',
help='apply weighted pose regression near center '
'or just apply regression on center point.')
self.parser.add_argument('--not_hm_hp', action='store_true',
help='not estimate human joint heatmap, '
'directly use the joint offset from center.')
self.parser.add_argument('--not_reg_hp_offset', action='store_true',
help='not regress local offset for '
'human joint heatmaps.')
self.parser.add_argument('--not_reg_bbox', action='store_true',
help='not regression bounding box size.')
# ground truth validation
self.parser.add_argument('--eval_oracle_hm', action='store_true',
help='use ground center heatmap.')
self.parser.add_argument('--eval_oracle_wh', action='store_true',
help='use ground truth bounding box size.')
self.parser.add_argument('--eval_oracle_offset', action='store_true',
help='use ground truth local heatmap offset.')
self.parser.add_argument('--eval_oracle_kps', action='store_true',
help='use ground truth human pose offset.')
self.parser.add_argument('--eval_oracle_hmhp', action='store_true',
help='use ground truth human joint heatmaps.')
self.parser.add_argument('--eval_oracle_hp_offset', action='store_true',
help='use ground truth human joint local offset.')
self.parser.add_argument('--eval_oracle_dep', action='store_true',
help='use ground truth depth.')
def parse(self, args=''):
if args == '':
opt = self.parser.parse_args()
else:
opt = self.parser.parse_args(args)
opt.gpus_str = opt.gpus
opt.gpus = [int(gpu) for gpu in opt.gpus.split(',')]
opt.gpus = [i for i in range(
len(opt.gpus))] if opt.gpus[0] >= 0 else [-1]
opt.lr_step = [int(i) for i in opt.lr_step.split(',')]
opt.test_scales = [float(i) for i in opt.test_scales.split(',')]
opt.fix_res = not opt.keep_res
print('Fix size testing.' if opt.fix_res else 'Keep resolution testing.')
opt.reg_offset = not opt.not_reg_offset
opt.reg_bbox = not opt.not_reg_bbox
opt.hm_hp = not opt.not_hm_hp
opt.reg_hp_offset = (not opt.not_reg_hp_offset) and opt.hm_hp
if opt.head_conv == -1: # init default head_conv
opt.head_conv = 256 if 'dla' in opt.arch else 64
opt.pad = 127 if 'hourglass' in opt.arch else 31
opt.num_stacks = 2 if opt.arch == 'hourglass' else 1
if opt.trainval:
opt.val_intervals = 100000000
if opt.debug > 0:
opt.num_workers = 0
opt.batch_size = 1
opt.gpus = [opt.gpus[0]]
opt.master_batch_size = -1
if opt.master_batch_size == -1:
opt.master_batch_size = opt.batch_size // len(opt.gpus)
rest_batch_size = (opt.batch_size - opt.master_batch_size)
opt.chunk_sizes = [opt.master_batch_size]
for i in range(len(opt.gpus) - 1):
slave_chunk_size = rest_batch_size // (len(opt.gpus) - 1)
if i < rest_batch_size % (len(opt.gpus) - 1):
slave_chunk_size += 1
opt.chunk_sizes.append(slave_chunk_size)
print('training chunk_sizes:', opt.chunk_sizes)
opt.root_dir = os.path.join(os.path.dirname(__file__), '..', '..')
# opt.data_dir = os.path.join(opt.root_dir, 'data')
opt.exp_dir = os.path.join(opt.root_dir, 'exp', opt.task)
opt.save_dir = os.path.join(opt.exp_dir, opt.arch)
opt.debug_dir = os.path.join(opt.save_dir, 'debug')
print('The output will be saved to ', opt.save_dir)
if opt.resume and opt.load_model == '':
model_path = opt.save_dir[:-4] if opt.save_dir.endswith('TEST') \
else opt.save_dir
opt.load_model = os.path.join(model_path, 'model_last.pth')
return opt
def update_dataset_info_and_set_heads(self, opt, dataset):
input_h, input_w = dataset.default_resolution
opt.mean, opt.std = dataset.mean, dataset.std
opt.num_classes = dataset.num_classes
# input_h(w): opt.input_h overrides opt.input_res overrides dataset default
input_h = opt.input_res if opt.input_res > 0 else input_h
input_w = opt.input_res if opt.input_res > 0 else input_w
opt.input_h = opt.input_h if opt.input_h > 0 else input_h
opt.input_w = opt.input_w if opt.input_w > 0 else input_w
opt.output_h = opt.input_h // opt.down_ratio
opt.output_w = opt.input_w // opt.down_ratio
opt.input_res = max(opt.input_h, opt.input_w)
opt.output_res = max(opt.output_h, opt.output_w)
if opt.task == 'exdet':
# assert opt.dataset in ['coco']
num_hm = 1 if opt.agnostic_ex else opt.num_classes
opt.heads = {'hm_t': num_hm, 'hm_l': num_hm,
'hm_b': num_hm, 'hm_r': num_hm,
'hm_c': opt.num_classes}
if opt.reg_offset:
opt.heads.update(
{'reg_t': 2, 'reg_l': 2, 'reg_b': 2, 'reg_r': 2})
elif opt.task == 'ddd':
# assert opt.dataset in ['gta', 'kitti', 'viper']
opt.heads = {'hm': opt.num_classes, 'dep': 1, 'rot': 8, 'dim': 3}
if opt.reg_bbox:
opt.heads.update(
{'wh': 2})
if opt.reg_offset:
opt.heads.update({'reg': 2})
elif opt.task == 'ctdet':
# assert opt.dataset in ['pascal', 'coco']
opt.heads = {'hm': opt.num_classes,
'wh': 2 if not opt.cat_spec_wh else 2 * opt.num_classes}
if opt.reg_offset:
opt.heads.update({'reg': 2})
# elif opt.task == 'multi_pose':
# # assert opt.dataset in ['coco_hp']
# opt.flip_idx = dataset.flip_idx
# opt.heads = {'hm': opt.num_classes, 'wh': 2, 'hps': dataset.num_joints*2}
# if opt.reg_offset:
# opt.heads.update({'reg': 2})
# if opt.hm_hp:
# opt.heads.update({'hm_hp': dataset.num_joints})
# if opt.reg_hp_offset:
# opt.heads.update({'hp_offset': 2})
elif opt.task == 'multi_pose':
opt.flip_idx = dataset.flip_idx
opt.heads = {'hm': opt.num_classes, 'wh': 2,
'hm_offset': 2, 'landmarks': dataset.num_joints * 2}
else:
assert 0, 'task not defined!'
print('heads', opt.heads)
return opt
def init(self, args=''):
default_dataset_info = {
'ctdet': {'default_resolution': [512, 512], 'num_classes': 1,
'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],
'dataset': 'pig'},
# 'ctdet': {'default_resolution': [512, 512], 'num_classes': 1,
# 'mean': [0.408, 0.447, 0.470], 'std': [0.289, 0.274, 0.278],
# 'dataset': 'coco'},
'exdet': {'default_resolution': [512, 512], 'num_classes': 80,
'mean': [0.408, 0.447, 0.470], 'std': [0.289, 0.274, 0.278],
'dataset': 'coco'},
'multi_pose': {
'default_resolution': [512, 512], 'num_classes': 1,
'mean': [0.408, 0.447, 0.470], 'std': [0.289, 0.274, 0.278],
'dataset': 'facehp', 'num_joints': 5,
'flip_idx': [[0, 1], [3, 4]]},
'ddd': {'default_resolution': [384, 1280], 'num_classes': 3,
'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225],
'dataset': 'kitti'},
}
class Struct:
def __init__(self, entries):
for k, v in entries.items():
self.__setattr__(k, v)
opt = self.parse(args)
dataset = Struct(default_dataset_info[opt.task])
opt.dataset = dataset.dataset
opt = self.update_dataset_info_and_set_heads(opt, dataset)
return opt
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import torch
from progress.bar import Bar
from models.data_parallel import DataParallel
from utils.utils import AverageMeter
class ModleWithLoss(torch.nn.Module):
def __init__(self, model, loss):
super(ModleWithLoss, self).__init__()
self.model = model
self.loss = loss
def forward(self, batch):
outputs = self.model(batch['input'])
loss, loss_stats = self.loss(outputs, batch) # 输入
return outputs[-1], loss, loss_stats
class BaseTrainer(object):
def __init__(self, opt, model, optimizer=None):
self.opt = opt
self.optimizer = optimizer
self.loss_stats, self.loss = self._get_losses(opt)
self.model_with_loss = ModleWithLoss(model, self.loss)
def set_device(self, gpus, chunk_sizes, device):
if len(gpus) > 1:
self.model_with_loss = DataParallel(
self.model_with_loss, device_ids=gpus,
chunk_sizes=chunk_sizes).to(device)
else:
self.model_with_loss = self.model_with_loss.to(device)
for state in self.optimizer.state.values():
for k, v in state.items():
if isinstance(v, torch.Tensor):
state[k] = v.to(device=device, non_blocking=True)
def run_epoch(self, phase, epoch, data_loader):
model_with_loss = self.model_with_loss
if phase == 'train':
model_with_loss.train()
else:
if len(self.opt.gpus) > 1:
model_with_loss = self.model_with_loss.module
model_with_loss.eval()
torch.cuda.empty_cache()
opt = self.opt
results = {}
data_time, batch_time = AverageMeter(), AverageMeter()
avg_loss_stats = {l: AverageMeter() for l in self.loss_stats}
num_iters = len(data_loader) if opt.num_iters < 0 else opt.num_iters
bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters)
end = time.time()
for iter_id, batch in enumerate(data_loader):
if iter_id >= num_iters:
break
data_time.update(time.time() - end)
for k in batch:
if k != 'meta':
batch[k] = batch[k].to(
device=opt.device, non_blocking=True)
output, loss, loss_stats = model_with_loss(batch)
loss = loss.mean()
if phase == 'train':
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
batch_time.update(time.time() - end)
end = time.time()
Bar.suffix = '{phase}: [{0}]|Tot: {total:} |ETA: {eta:} '.format(
epoch, phase=phase, total=bar.elapsed_td, eta=bar.eta_td)
for l in avg_loss_stats:
avg_loss_stats[l].update(
loss_stats[l].mean().item(), batch['input'].size(0))
Bar.suffix = Bar.suffix + \
'|{} {:.4f} '.format(l, avg_loss_stats[l].avg)
if not opt.hide_data_time:
Bar.suffix = Bar.suffix + '|Data {dt.val:.3f}s({dt.avg:.3f}s) ' \
'|Net {bt.avg:.3f}s'.format(dt=data_time, bt=batch_time)
if opt.print_iter > 0:
if iter_id % opt.print_iter == 0:
print('{}/{}| {}'.format(opt.task, opt.exp_id, Bar.suffix))
else:
bar.next()
if opt.debug > 0:
self.debug(batch, output, iter_id)
if opt.test:
self.save_result(output, batch, results)
del output, loss, loss_stats
bar.finish()
ret = {k: v.avg for k, v in avg_loss_stats.items()}
ret['time'] = bar.elapsed_td.total_seconds() / 60.
return ret, results
def debug(self, batch, output, iter_id):
raise NotImplementedError
def save_result(self, output, batch, results):
raise NotImplementedError
def _get_losses(self, opt):
raise NotImplementedError
def val(self, epoch, data_loader):
return self.run_epoch('val', epoch, data_loader)
def train(self, epoch, data_loader):
return self.run_epoch('train', epoch, data_loader)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import numpy as np
from models.losses import FocalLoss
from models.losses import RegL1Loss, RegLoss, NormRegL1Loss, RegWeightedL1Loss
from models.decode import ctdet_decode
from models.utils import _sigmoid
from utils.debugger import Debugger
from utils.post_process import ctdet_post_process
from utils.oracle_utils import gen_oracle_map
from .base_trainer import BaseTrainer
class CtdetLoss(torch.nn.Module):
def __init__(self, opt):
super(CtdetLoss, self).__init__()
self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \
RegLoss() if opt.reg_loss == 'sl1' else None
self.crit_wh = torch.nn.L1Loss(reduction='sum') if opt.dense_wh else \
NormRegL1Loss() if opt.norm_wh else \
RegWeightedL1Loss() if opt.cat_spec_wh else self.crit_reg
self.opt = opt
def forward(self, outputs, batch):
opt = self.opt
hm_loss, wh_loss, off_loss = 0, 0, 0
for s in range(opt.num_stacks):
output = outputs[s]
if not opt.mse_loss:
output['hm'] = _sigmoid(output['hm'])
if opt.eval_oracle_hm:
output['hm'] = batch['hm']
if opt.eval_oracle_wh:
output['wh'] = torch.from_numpy(gen_oracle_map(
batch['wh'].detach().cpu().numpy(),
batch['ind'].detach().cpu().numpy(),
output['wh'].shape[3], output['wh'].shape[2])).to(opt.device)
if opt.eval_oracle_offset:
output['reg'] = torch.from_numpy(gen_oracle_map(
batch['reg'].detach().cpu().numpy(),
batch['ind'].detach().cpu().numpy(),
output['reg'].shape[3], output['reg'].shape[2])).to(opt.device)
hm_loss += self.crit(output['hm'], batch['hm']) / \
opt.num_stacks # 热力图损失
if opt.wh_weight > 0:
if opt.dense_wh:
mask_weight = batch['dense_wh_mask'].sum() + 1e-4
wh_loss += (
self.crit_wh(output['wh'] * batch['dense_wh_mask'],
batch['dense_wh'] * batch['dense_wh_mask']) /
mask_weight) / opt.num_stacks
elif opt.cat_spec_wh:
wh_loss += self.crit_wh(
output['wh'], batch['cat_spec_mask'],
batch['ind'], batch['cat_spec_wh']) / opt.num_stacks
else:
wh_loss += self.crit_reg(
output['wh'], batch['reg_mask'],
batch['ind'], batch['wh']) / opt.num_stacks
if opt.reg_offset and opt.off_weight > 0:
off_loss += self.crit_reg(output['reg'], batch['reg_mask'],
batch['ind'], batch['reg']) / opt.num_stacks
loss = opt.hm_weight * hm_loss + opt.wh_weight * wh_loss + \
opt.off_weight * off_loss
loss_stats = {'loss': loss, 'hm_loss': hm_loss,
'wh_loss': wh_loss, 'off_loss': off_loss}
return loss, loss_stats
class CtdetTrainer(BaseTrainer):
def __init__(self, opt, model, optimizer=None):
super(CtdetTrainer, self).__init__(opt, model, optimizer=optimizer)
def _get_losses(self, opt):
loss_states = ['loss', 'hm_loss', 'wh_loss', 'off_loss']
loss = CtdetLoss(opt)
return loss_states, loss
def debug(self, batch, output, iter_id):
opt = self.opt
reg = output['reg'] if opt.reg_offset else None
dets = ctdet_decode(
output['hm'], output['wh'], reg=reg,
cat_spec_wh=opt.cat_spec_wh, K=opt.K)
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
dets[:, :, :4] *= opt.down_ratio
dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
dets_gt[:, :, :4] *= opt.down_ratio
for i in range(1):
debugger = Debugger(
dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme)
img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
img = np.clip(((
img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8)
pred = debugger.gen_colormap(
output['hm'][i].detach().cpu().numpy())
gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hm')
debugger.add_blend_img(img, gt, 'gt_hm')
debugger.add_img(img, img_id='out_pred')
for k in range(len(dets[i])):
if dets[i, k, 4] > opt.center_thresh:
debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1],
dets[i, k, 4], img_id='out_pred')
debugger.add_img(img, img_id='out_gt')
for k in range(len(dets_gt[i])):
if dets_gt[i, k, 4] > opt.center_thresh:
debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1],
dets_gt[i, k, 4], img_id='out_gt')
if opt.debug == 4:
debugger.save_all_imgs(
opt.debug_dir, prefix='{}'.format(iter_id))
else:
debugger.show_all_imgs(pause=True)
def save_result(self, output, batch, results):
reg = output['reg'] if self.opt.reg_offset else None
dets = ctdet_decode(
output['hm'], output['wh'], reg=reg,
cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K)
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
dets_out = ctdet_post_process(
dets.copy(), batch['meta']['c'].cpu().numpy(),
batch['meta']['s'].cpu().numpy(),
output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1])
results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import numpy as np
from models.losses import FocalLoss, L1Loss, BinRotLoss
from models.decode import ddd_decode
from models.utils import _sigmoid
from utils.debugger import Debugger
from utils.post_process import ddd_post_process
from utils.oracle_utils import gen_oracle_map
from .base_trainer import BaseTrainer
class DddLoss(torch.nn.Module):
def __init__(self, opt):
super(DddLoss, self).__init__()
self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
self.crit_reg = L1Loss()
self.crit_rot = BinRotLoss()
self.opt = opt
def forward(self, outputs, batch):
opt = self.opt
hm_loss, dep_loss, rot_loss, dim_loss = 0, 0, 0, 0
wh_loss, off_loss = 0, 0
for s in range(opt.num_stacks):
output = outputs[s]
output['hm'] = _sigmoid(output['hm'])
output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1.
if opt.eval_oracle_dep:
output['dep'] = torch.from_numpy(gen_oracle_map(
batch['dep'].detach().cpu().numpy(),
batch['ind'].detach().cpu().numpy(),
opt.output_w, opt.output_h)).to(opt.device)
hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks
if opt.dep_weight > 0:
dep_loss += self.crit_reg(output['dep'], batch['reg_mask'],
batch['ind'], batch['dep']) / opt.num_stacks
if opt.dim_weight > 0:
dim_loss += self.crit_reg(output['dim'], batch['reg_mask'],
batch['ind'], batch['dim']) / opt.num_stacks
if opt.rot_weight > 0:
rot_loss += self.crit_rot(output['rot'], batch['rot_mask'],
batch['ind'], batch['rotbin'],
batch['rotres']) / opt.num_stacks
if opt.reg_bbox and opt.wh_weight > 0:
wh_loss += self.crit_reg(output['wh'], batch['rot_mask'],
batch['ind'], batch['wh']) / opt.num_stacks
if opt.reg_offset and opt.off_weight > 0:
off_loss += self.crit_reg(output['reg'], batch['rot_mask'],
batch['ind'], batch['reg']) / opt.num_stacks
loss = opt.hm_weight * hm_loss + opt.dep_weight * dep_loss + \
opt.dim_weight * dim_loss + opt.rot_weight * rot_loss + \
opt.wh_weight * wh_loss + opt.off_weight * off_loss
loss_stats = {'loss': loss, 'hm_loss': hm_loss, 'dep_loss': dep_loss,
'dim_loss': dim_loss, 'rot_loss': rot_loss,
'wh_loss': wh_loss, 'off_loss': off_loss}
return loss, loss_stats
class DddTrainer(BaseTrainer):
def __init__(self, opt, model, optimizer=None):
super(DddTrainer, self).__init__(opt, model, optimizer=optimizer)
def _get_losses(self, opt):
loss_states = ['loss', 'hm_loss', 'dep_loss', 'dim_loss', 'rot_loss',
'wh_loss', 'off_loss']
loss = DddLoss(opt)
return loss_states, loss
def debug(self, batch, output, iter_id):
opt = self.opt
wh = output['wh'] if opt.reg_bbox else None
reg = output['reg'] if opt.reg_offset else None
dets = ddd_decode(output['hm'], output['rot'], output['dep'],
output['dim'], wh=wh, reg=reg, K=opt.K)
# x, y, score, r1-r8, depth, dim1-dim3, cls
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
calib = batch['meta']['calib'].detach().numpy()
# x, y, score, rot, depth, dim1, dim2, dim3
# if opt.dataset == 'gta':
# dets[:, 12:15] /= 3
dets_pred = ddd_post_process(
dets.copy(), batch['meta']['c'].detach().numpy(),
batch['meta']['s'].detach().numpy(), calib, opt)
dets_gt = ddd_post_process(
batch['meta']['gt_det'].detach().numpy().copy(),
batch['meta']['c'].detach().numpy(),
batch['meta']['s'].detach().numpy(), calib, opt)
# for i in range(input.size(0)):
for i in range(1):
debugger = Debugger(dataset=opt.dataset, ipynb=(opt.debug == 3),
theme=opt.debugger_theme)
img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
img = ((img * self.opt.std + self.opt.mean) * 255.).astype(np.uint8)
pred = debugger.gen_colormap(
output['hm'][i].detach().cpu().numpy())
gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'hm_pred')
debugger.add_blend_img(img, gt, 'hm_gt')
# decode
debugger.add_ct_detection(
img, dets[i], show_box=opt.reg_bbox, center_thresh=opt.center_thresh,
img_id='det_pred')
debugger.add_ct_detection(
img, batch['meta']['gt_det'][i].cpu().numpy().copy(),
show_box=opt.reg_bbox, img_id='det_gt')
debugger.add_3d_detection(
batch['meta']['image_path'][i], dets_pred[i], calib[i],
center_thresh=opt.center_thresh, img_id='add_pred')
debugger.add_3d_detection(
batch['meta']['image_path'][i], dets_gt[i], calib[i],
center_thresh=opt.center_thresh, img_id='add_gt')
# debugger.add_bird_view(
# dets_pred[i], center_thresh=opt.center_thresh, img_id='bird_pred')
# debugger.add_bird_view(dets_gt[i], img_id='bird_gt')
debugger.add_bird_views(
dets_pred[i], dets_gt[i],
center_thresh=opt.center_thresh, img_id='bird_pred_gt')
# debugger.add_blend_img(img, pred, 'out', white=True)
debugger.compose_vis_add(
batch['meta']['image_path'][i], dets_pred[i], calib[i],
opt.center_thresh, pred, 'bird_pred_gt', img_id='out')
# debugger.add_img(img, img_id='out')
if opt.debug == 4:
debugger.save_all_imgs(
opt.debug_dir, prefix='{}'.format(iter_id))
else:
debugger.show_all_imgs(pause=True)
def save_result(self, output, batch, results):
opt = self.opt
wh = output['wh'] if opt.reg_bbox else None
reg = output['reg'] if opt.reg_offset else None
dets = ddd_decode(output['hm'], output['rot'], output['dep'],
output['dim'], wh=wh, reg=reg, K=opt.K)
# x, y, score, r1-r8, depth, dim1-dim3, cls
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
calib = batch['meta']['calib'].detach().numpy()
# x, y, score, rot, depth, dim1, dim2, dim3
dets_pred = ddd_post_process(
dets.copy(), batch['meta']['c'].detach().numpy(),
batch['meta']['s'].detach().numpy(), calib, opt)
img_id = batch['meta']['img_id'].detach().numpy()[0]
results[img_id] = dets_pred[0]
for j in range(1, opt.num_classes + 1):
keep_inds = (results[img_id][j][:, -1] > opt.center_thresh)
results[img_id][j] = results[img_id][j][keep_inds]
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import numpy as np
import cv2
import sys
import time
from utils.debugger import Debugger
from models.data_parallel import DataParallel
from models.losses import FocalLoss, RegL1Loss
from models.decode import agnex_ct_decode, exct_decode
from models.utils import _sigmoid
from .base_trainer import BaseTrainer
class ExdetLoss(torch.nn.Module):
def __init__(self, opt):
super(ExdetLoss, self).__init__()
self.crit = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
self.crit_reg = RegL1Loss()
self.opt = opt
self.parts = ['t', 'l', 'b', 'r', 'c']
def forward(self, outputs, batch):
opt = self.opt
hm_loss, reg_loss = 0, 0
for s in range(opt.num_stacks):
output = outputs[s]
for p in self.parts:
tag = 'hm_{}'.format(p)
output[tag] = _sigmoid(output[tag])
hm_loss += self.crit(output[tag], batch[tag]) / opt.num_stacks
if p != 'c' and opt.reg_offset and opt.off_weight > 0:
reg_loss += self.crit_reg(output['reg_{}'.format(p)],
batch['reg_mask'],
batch['ind_{}'.format(p)],
batch['reg_{}'.format(p)]) / opt.num_stacks
loss = opt.hm_weight * hm_loss + opt.off_weight * reg_loss
loss_stats = {'loss': loss, 'off_loss': reg_loss, 'hm_loss': hm_loss}
return loss, loss_stats
class ExdetTrainer(BaseTrainer):
def __init__(self, opt, model, optimizer=None):
super(ExdetTrainer, self).__init__(opt, model, optimizer=optimizer)
self.decode = agnex_ct_decode if opt.agnostic_ex else exct_decode
def _get_losses(self, opt):
loss_states = ['loss', 'hm_loss', 'off_loss']
loss = ExdetLoss(opt)
return loss_states, loss
def debug(self, batch, output, iter_id):
opt = self.opt
detections = self.decode(output['hm_t'], output['hm_l'],
output['hm_b'], output['hm_r'],
output['hm_c']).detach().cpu().numpy()
detections[:, :, :4] *= opt.input_res / opt.output_res
for i in range(1):
debugger = Debugger(
dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme)
pred_hm = np.zeros(
(opt.input_res, opt.input_res, 3), dtype=np.uint8)
gt_hm = np.zeros((opt.input_res, opt.input_res, 3), dtype=np.uint8)
img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
img = ((img * self.opt.std + self.opt.mean) * 255.).astype(np.uint8)
for p in self.parts:
tag = 'hm_{}'.format(p)
pred = debugger.gen_colormap(
output[tag][i].detach().cpu().numpy())
gt = debugger.gen_colormap(
batch[tag][i].detach().cpu().numpy())
if p != 'c':
pred_hm = np.maximum(pred_hm, pred)
gt_hm = np.maximum(gt_hm, gt)
if p == 'c' or opt.debug > 2:
debugger.add_blend_img(img, pred, 'pred_{}'.format(p))
debugger.add_blend_img(img, gt, 'gt_{}'.format(p))
debugger.add_blend_img(img, pred_hm, 'pred')
debugger.add_blend_img(img, gt_hm, 'gt')
debugger.add_img(img, img_id='out')
for k in range(len(detections[i])):
if detections[i, k, 4] > 0.1:
debugger.add_coco_bbox(detections[i, k, :4], detections[i, k, -1],
detections[i, k, 4], img_id='out')
if opt.debug == 4:
debugger.save_all_imgs(
opt.debug_dir, prefix='{}'.format(iter_id))
else:
debugger.show_all_imgs(pause=True)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import numpy as np
from models.losses import FocalLoss, RegL1Loss, RegLoss, RegWeightedL1Loss
from models.decode import multi_pose_decode
from models.utils import _sigmoid, flip_tensor, flip_lr_off, flip_lr
from utils.debugger import Debugger
from utils.post_process import multi_pose_post_process
from utils.oracle_utils import gen_oracle_map
from .base_trainer import BaseTrainer
class MultiPoseLoss(torch.nn.Module):
def __init__(self, opt):
super(MultiPoseLoss, self).__init__()
self.crit = FocalLoss()
self.crit_hm_hp = torch.nn.MSELoss() if opt.mse_loss else FocalLoss()
self.crit_kp = RegWeightedL1Loss() if not opt.dense_hp else \
torch.nn.L1Loss(reduction='sum')
self.crit_reg = RegL1Loss() if opt.reg_loss == 'l1' else \
RegLoss() if opt.reg_loss == 'sl1' else None
self.opt = opt
def forward(self, outputs, batch):
opt = self.opt
hm_loss, wh_loss, off_loss = 0, 0, 0
lm_loss, off_loss, hm_hp_loss, hp_offset_loss = 0, 0, 0, 0
for s in range(opt.num_stacks):
output = outputs[s]
output['hm'] = output['hm']
# if opt.hm_hp and not opt.mse_loss:
# output['hm_hp'] = _sigmoid(output['hm_hp'])
if opt.eval_oracle_hmhp:
output['hm_hp'] = batch['hm_hp']
if opt.eval_oracle_hm:
output['hm'] = batch['hm']
if opt.eval_oracle_kps:
if opt.dense_hp:
output['hps'] = batch['dense_hps']
else:
output['hps'] = torch.from_numpy(gen_oracle_map(
batch['hps'].detach().cpu().numpy(),
batch['ind'].detach().cpu().numpy(),
opt.output_res, opt.output_res)).to(opt.device)
if opt.eval_oracle_hp_offset:
output['hp_offset'] = torch.from_numpy(gen_oracle_map(
batch['hp_offset'].detach().cpu().numpy(),
batch['hp_ind'].detach().cpu().numpy(),
opt.output_res, opt.output_res)).to(opt.device)
# 1. focal loss,求目标的中心,
hm_loss += self.crit(output['hm'], batch['hm']) / opt.num_stacks
if opt.wh_weight > 0:
wh_loss += self.crit_reg(output['wh'], batch['reg_mask'], # 2. 人脸bbox高度和宽度的loss
batch['ind'], batch['wh'], batch['wight_mask']) / opt.num_stacks
if opt.reg_offset and opt.off_weight > 0:
off_loss += self.crit_reg(output['hm_offset'], batch['reg_mask'], # 3. 人脸bbox中心点下采样,所需要的偏差补偿
batch['ind'], batch['hm_offset'], batch['wight_mask']) / opt.num_stacks
if opt.dense_hp:
mask_weight = batch['dense_hps_mask'].sum() + 1e-4
lm_loss += (self.crit_kp(output['hps'] * batch['dense_hps_mask'],
batch['dense_hps'] * batch['dense_hps_mask']) /
mask_weight) / opt.num_stacks
else:
lm_loss += self.crit_kp(output['landmarks'], batch['hps_mask'], # 4. 关节点的偏移
batch['ind'], batch['landmarks']) / opt.num_stacks
# if opt.reg_hp_offset and opt.off_weight > 0: # 关节点的中心偏移
# hp_offset_loss += self.crit_reg(
# output['hp_offset'], batch['hp_mask'],
# batch['hp_ind'], batch['hp_offset']) / opt.num_stacks
# if opt.hm_hp and opt.hm_hp_weight > 0: # 关节点的热力图
# hm_hp_loss += self.crit_hm_hp(
# output['hm_hp'], batch['hm_hp']) / opt.num_stacks
loss = opt.hm_weight * hm_loss + opt.wh_weight * wh_loss + \
opt.off_weight * off_loss + opt.lm_weight * lm_loss
loss_stats = {'loss': loss, 'hm_loss': hm_loss, 'lm_loss': lm_loss,
'wh_loss': wh_loss, 'off_loss': off_loss}
return loss, loss_stats
class MultiPoseTrainer(BaseTrainer):
def __init__(self, opt, model, optimizer=None):
super(MultiPoseTrainer, self).__init__(opt, model, optimizer=optimizer)
def _get_losses(self, opt):
loss_states = ['loss', 'hm_loss', 'lm_loss', 'wh_loss', 'off_loss']
loss = MultiPoseLoss(opt)
return loss_states, loss
def debug(self, batch, output, iter_id):
opt = self.opt
reg = output['reg'] if opt.reg_offset else None
hm_hp = output['hm_hp'] if opt.hm_hp else None
hp_offset = output['hp_offset'] if opt.reg_hp_offset else None
dets = multi_pose_decode(
output['hm'], output['wh'], output['hps'],
reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=opt.K)
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
dets[:, :, :4] *= opt.input_res / opt.output_res
dets[:, :, 5:39] *= opt.input_res / opt.output_res
dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2])
dets_gt[:, :, :4] *= opt.input_res / opt.output_res
dets_gt[:, :, 5:39] *= opt.input_res / opt.output_res
for i in range(1):
debugger = Debugger(
dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme)
img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0)
img = np.clip(((
img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8)
pred = debugger.gen_colormap(
output['hm'][i].detach().cpu().numpy())
gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hm')
debugger.add_blend_img(img, gt, 'gt_hm')
debugger.add_img(img, img_id='out_pred')
for k in range(len(dets[i])):
if dets[i, k, 4] > opt.center_thresh:
debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1],
dets[i, k, 4], img_id='out_pred')
debugger.add_coco_hp(dets[i, k, 5:39], img_id='out_pred')
debugger.add_img(img, img_id='out_gt')
for k in range(len(dets_gt[i])):
if dets_gt[i, k, 4] > opt.center_thresh:
debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1],
dets_gt[i, k, 4], img_id='out_gt')
debugger.add_coco_hp(dets_gt[i, k, 5:39], img_id='out_gt')
if opt.hm_hp:
pred = debugger.gen_colormap_hp(
output['hm_hp'][i].detach().cpu().numpy())
gt = debugger.gen_colormap_hp(
batch['hm_hp'][i].detach().cpu().numpy())
debugger.add_blend_img(img, pred, 'pred_hmhp')
debugger.add_blend_img(img, gt, 'gt_hmhp')
if opt.debug == 4:
debugger.save_all_imgs(
opt.debug_dir, prefix='{}'.format(iter_id))
else:
debugger.show_all_imgs(pause=True)
def save_result(self, output, batch, results):
reg = output['reg'] if self.opt.reg_offset else None
hm_hp = output['hm_hp'] if self.opt.hm_hp else None
hp_offset = output['hp_offset'] if self.opt.reg_hp_offset else None
dets = multi_pose_decode(
output['hm'], output['wh'], output['hps'],
reg=reg, hm_hp=hm_hp, hp_offset=hp_offset, K=self.opt.K)
dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])
dets_out = multi_pose_post_process(
dets.copy(), batch['meta']['c'].cpu().numpy(),
batch['meta']['s'].cpu().numpy(),
output['hm'].shape[2], output['hm'].shape[3])
results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0]
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from .ctdet import CtdetTrainer
from .ddd import DddTrainer
from .exdet import ExdetTrainer
from .multi_pose import MultiPoseTrainer
train_factory = {
'exdet': ExdetTrainer,
'ddd': DddTrainer,
'ctdet': CtdetTrainer,
'multi_pose': MultiPoseTrainer,
}
# code in this file is adpated from rpmcruz/autoaugment
# https://github.com/rpmcruz/autoaugment/blob/master/transformations.py
import random
import PIL, PIL.ImageOps, PIL.ImageEnhance, PIL.ImageDraw
import numpy as np
import torch
from PIL import Image
def ShearX(img, v): # [-0.3, 0.3]
assert -0.3 <= v <= 0.3
if random.random() > 0.5:
v = -v
return img.transform(img.size, PIL.Image.AFFINE, (1, v, 0, 0, 1, 0))
def ShearY(img, v): # [-0.3, 0.3]
assert -0.3 <= v <= 0.3
if random.random() > 0.5:
v = -v
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, v, 1, 0))
def TranslateX(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
assert -0.45 <= v <= 0.45
if random.random() > 0.5:
v = -v
v = v * img.size[0]
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))
def TranslateXabs(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
assert 0 <= v
if random.random() > 0.5:
v = -v
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, v, 0, 1, 0))
def TranslateY(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
assert -0.45 <= v <= 0.45
if random.random() > 0.5:
v = -v
v = v * img.size[1]
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))
def TranslateYabs(img, v): # [-150, 150] => percentage: [-0.45, 0.45]
assert 0 <= v
if random.random() > 0.5:
v = -v
return img.transform(img.size, PIL.Image.AFFINE, (1, 0, 0, 0, 1, v))
def Rotate(img, v): # [-30, 30]
assert -30 <= v <= 30
if random.random() > 0.5:
v = -v
return img.rotate(v)
def AutoContrast(img, _):
return PIL.ImageOps.autocontrast(img)
def Invert(img, _):
return PIL.ImageOps.invert(img)
def Equalize(img, _):
return PIL.ImageOps.equalize(img)
def Flip(img, _): # not from the paper
return PIL.ImageOps.mirror(img)
def Solarize(img, v): # [0, 256]
assert 0 <= v <= 256
return PIL.ImageOps.solarize(img, v)
def SolarizeAdd(img, addition=0, threshold=128):
img_np = np.array(img).astype(np.int)
img_np = img_np + addition
img_np = np.clip(img_np, 0, 255)
img_np = img_np.astype(np.uint8)
img = Image.fromarray(img_np)
return PIL.ImageOps.solarize(img, threshold)
def Posterize(img, v): # [4, 8]
v = int(v)
v = max(1, v)
return PIL.ImageOps.posterize(img, v)
def Contrast(img, v): # [0.1,1.9]
assert 0.1 <= v <= 1.9
return PIL.ImageEnhance.Contrast(img).enhance(v)
def Color(img, v): # [0.1,1.9]
assert 0.1 <= v <= 1.9
return PIL.ImageEnhance.Color(img).enhance(v)
def Brightness(img, v): # [0.1,1.9]
assert 0.1 <= v <= 1.9
return PIL.ImageEnhance.Brightness(img).enhance(v)
def Sharpness(img, v): # [0.1,1.9]
assert 0.1 <= v <= 1.9
return PIL.ImageEnhance.Sharpness(img).enhance(v)
def Cutout(img, v): # [0, 60] => percentage: [0, 0.2]
assert 0.0 <= v <= 0.2
if v <= 0.:
return img
v = v * img.size[0]
return CutoutAbs(img, v)
def CutoutAbs(img, v): # [0, 60] => percentage: [0, 0.2]
# assert 0 <= v <= 20
if v < 0:
return img
w, h = img.size
x0 = np.random.uniform(w)
y0 = np.random.uniform(h)
x0 = int(max(0, x0 - v / 2.))
y0 = int(max(0, y0 - v / 2.))
x1 = min(w, x0 + v)
y1 = min(h, y0 + v)
xy = (x0, y0, x1, y1)
color = (125, 123, 114)
# color = (0, 0, 0)
img = img.copy()
PIL.ImageDraw.Draw(img).rectangle(xy, color)
return img
def SamplePairing(imgs): # [0, 0.4]
def f(img1, v):
i = np.random.choice(len(imgs))
img2 = PIL.Image.fromarray(imgs[i])
return PIL.Image.blend(img1, img2, v)
return f
def Identity(img, v):
return img
def augment_list(): # 16 oeprations and their ranges
# https://github.com/google-research/uda/blob/master/image/randaugment/policies.py#L57
# l = [
# (Identity, 0., 1.0),
# (ShearX, 0., 0.3), # 0
# (ShearY, 0., 0.3), # 1
# (TranslateX, 0., 0.33), # 2
# (TranslateY, 0., 0.33), # 3
# (Rotate, 0, 30), # 4
# (AutoContrast, 0, 1), # 5
# (Invert, 0, 1), # 6
# (Equalize, 0, 1), # 7
# (Solarize, 0, 110), # 8
# (Posterize, 4, 8), # 9
# # (Contrast, 0.1, 1.9), # 10
# (Color, 0.1, 1.9), # 11
# (Brightness, 0.1, 1.9), # 12
# (Sharpness, 0.1, 1.9), # 13
# # (Cutout, 0, 0.2), # 14
# # (SamplePairing(imgs), 0, 0.4), # 15
# ]
# https://github.com/tensorflow/tpu/blob/8462d083dd89489a79e3200bcc8d4063bf362186/models/official/efficientnet/autoaugment.py#L505
l = [
(Identity, 0., 1.0),
(AutoContrast, 0, 1),
(Equalize, 0, 1),
(Invert, 0, 1),
# (Rotate, 0, 30),
(Posterize, 0, 4),
(Solarize, 0, 256),
(SolarizeAdd, 0, 110),
(Color, 0.1, 1.9),
(Contrast, 0.1, 1.9),
(Brightness, 0.1, 1.9),
(Sharpness, 0.1, 1.9),
# (ShearX, 0., 0.3), # x方向的扭曲
# (ShearY, 0., 0.3), # y方向的扭曲
(CutoutAbs, 0, 40),
# (TranslateXabs, 0., 100), # x方向移动
# (TranslateYabs, 0., 100),
]
return l
class Lighting(object):
"""Lighting noise(AlexNet - style PCA - based noise)"""
def __init__(self, alphastd, eigval, eigvec):
self.alphastd = alphastd
self.eigval = torch.Tensor(eigval)
self.eigvec = torch.Tensor(eigvec)
def __call__(self, img):
if self.alphastd == 0:
return img
alpha = img.new().resize_(3).normal_(0, self.alphastd)
rgb = self.eigvec.type_as(img).clone() \
.mul(alpha.view(1, 3).expand(3, 3)) \
.mul(self.eigval.view(1, 3).expand(3, 3)) \
.sum(1).squeeze()
return img.add(rgb.view(3, 1, 1).expand_as(img))
def lighting_(data_rng, image, alphastd, eigval, eigvec):
alpha = data_rng.normal(scale=alphastd, size=(3, ))
image += np.dot(eigvec, eigval * alpha)
class CutoutDefault(object):
"""
Reference : https://github.com/quark0/darts/blob/master/cnn/utils.py
"""
def __init__(self, length):
self.length = length
def __call__(self, img):
h, w = img.size(1), img.size(2)
mask = np.ones((h, w), np.float32)
y = np.random.randint(h)
x = np.random.randint(w)
y1 = np.clip(y - self.length // 2, 0, h)
y2 = np.clip(y + self.length // 2, 0, h)
x1 = np.clip(x - self.length // 2, 0, w)
x2 = np.clip(x + self.length // 2, 0, w)
mask[y1: y2, x1: x2] = 0.
mask = torch.from_numpy(mask)
mask = mask.expand_as(img)
img *= mask
return img
class RandAugment:
'''
n:每次选择几种增强方式
'''
def __init__(self, n, m=30):
self.n = n
self.m = m # [0, 30]
self.augment_list = augment_list()
def __call__(self, img):
ops = random.choices(self.augment_list, k=self.n)
for op, minval, maxval in ops:
aug_level = random.randint(0, self.m)
val = (float(aug_level) / 30) * float(maxval - minval) + minval
img = op(img, val)
return img
def Randaugment(data_rng, inp, eig_val, eig_vec):
img = Image.fromarray(inp)
augment = RandAugment(5, 30)
img = augment(img)
cv_img = np.asarray(img)
cv_img = (cv_img.astype(np.float32) / 255.)
lighting_(data_rng, cv_img, 0.1, eig_val, eig_vec)
return cv_img
if __name__ == '__main__':
img = Image.open("/path/0_Parade_marchingband_1_35.jpg")
# randaugment = RandAugment(5, 20)
randaugment = TranslateXabs
img1 = randaugment(img, 100)
img1.show()
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import cv2
def compute_box_3d(dim, location, rotation_y):
# dim: 3
# location: 3
# rotation_y: 1
# return: 8 x 3
c, s = np.cos(rotation_y), np.sin(rotation_y)
R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
l, w, h = dim[2], dim[1], dim[0]
x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2]
y_corners = [0,0,0,0,-h,-h,-h,-h]
z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2]
corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32)
corners_3d = np.dot(R, corners)
corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(3, 1)
return corners_3d.transpose(1, 0)
def project_to_image(pts_3d, P):
# pts_3d: n x 3
# P: 3 x 4
# return: n x 2
pts_3d_homo = np.concatenate(
[pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1)
pts_2d = np.dot(P, pts_3d_homo.transpose(1, 0)).transpose(1, 0)
pts_2d = pts_2d[:, :2] / pts_2d[:, 2:]
# import pdb; pdb.set_trace()
return pts_2d
def compute_orientation_3d(dim, location, rotation_y):
# dim: 3
# location: 3
# rotation_y: 1
# return: 2 x 3
c, s = np.cos(rotation_y), np.sin(rotation_y)
R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
orientation_3d = np.array([[0, dim[2]], [0, 0], [0, 0]], dtype=np.float32)
orientation_3d = np.dot(R, orientation_3d)
orientation_3d = orientation_3d + \
np.array(location, dtype=np.float32).reshape(3, 1)
return orientation_3d.transpose(1, 0)
def draw_box_3d(image, corners, c=(0, 0, 255)):
face_idx = [[0,1,5,4],
[1,2,6, 5],
[2,3,7,6],
[3,0,4,7]]
for ind_f in range(3, -1, -1):
f = face_idx[ind_f]
for j in range(4):
cv2.line(image, (corners[f[j], 0], corners[f[j], 1]),
(corners[f[(j+1)%4], 0], corners[f[(j+1)%4], 1]), c, 2, lineType=cv2.LINE_AA)
if ind_f == 0:
cv2.line(image, (corners[f[0], 0], corners[f[0], 1]),
(corners[f[2], 0], corners[f[2], 1]), c, 1, lineType=cv2.LINE_AA)
cv2.line(image, (corners[f[1], 0], corners[f[1], 1]),
(corners[f[3], 0], corners[f[3], 1]), c, 1, lineType=cv2.LINE_AA)
return image
def unproject_2d_to_3d(pt_2d, depth, P):
# pts_2d: 2
# depth: 1
# P: 3 x 4
# return: 3
z = depth - P[2, 3]
x = (pt_2d[0] * depth - P[0, 3] - P[0, 2] * z) / P[0, 0]
y = (pt_2d[1] * depth - P[1, 3] - P[1, 2] * z) / P[1, 1]
pt_3d = np.array([x, y, z], dtype=np.float32)
return pt_3d
def alpha2rot_y(alpha, x, cx, fx):
"""
Get rotation_y by alpha + theta - 180
alpha : Observation angle of object, ranging [-pi..pi]
x : Object center x to the camera center (x-W/2), in pixels
rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
"""
rot_y = alpha + np.arctan2(x - cx, fx)
if rot_y > np.pi:
rot_y -= 2 * np.pi
if rot_y < -np.pi:
rot_y += 2 * np.pi
return rot_y
def rot_y2alpha(rot_y, x, cx, fx):
"""
Get rotation_y by alpha + theta - 180
alpha : Observation angle of object, ranging [-pi..pi]
x : Object center x to the camera center (x-W/2), in pixels
rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
"""
alpha = rot_y - np.arctan2(x - cx, fx)
if alpha > np.pi:
alpha -= 2 * np.pi
if alpha < -np.pi:
alpha += 2 * np.pi
return alpha
def ddd2locrot(center, alpha, dim, depth, calib):
# single image
locations = unproject_2d_to_3d(center, depth, calib)
locations[1] += dim[0] / 2
rotation_y = alpha2rot_y(alpha, center[0], calib[0, 2], calib[0, 0])
return locations, rotation_y
def project_3d_bbox(location, dim, rotation_y, calib):
box_3d = compute_box_3d(dim, location, rotation_y)
box_2d = project_to_image(box_3d, calib)
return box_2d
if __name__ == '__main__':
calib = np.array(
[[7.070493000000e+02, 0.000000000000e+00, 6.040814000000e+02, 4.575831000000e+01],
[0.000000000000e+00, 7.070493000000e+02, 1.805066000000e+02, -3.454157000000e-01],
[0.000000000000e+00, 0.000000000000e+00, 1.000000000000e+00, 4.981016000000e-03]],
dtype=np.float32)
alpha = -0.20
tl = np.array([712.40, 143.00], dtype=np.float32)
br = np.array([810.73, 307.92], dtype=np.float32)
ct = (tl + br) / 2
rotation_y = 0.01
print('alpha2rot_y', alpha2rot_y(alpha, ct[0], calib[0, 2], calib[0, 0]))
print('rotation_y', rotation_y)
\ No newline at end of file
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
import cv2
from .ddd_utils import compute_box_3d, project_to_image, draw_box_3d
class Debugger(object):
def __init__(self, ipynb=False, theme='black',
num_classes=-1, dataset=None, down_ratio=4):
self.ipynb = ipynb
if not self.ipynb:
import matplotlib.pyplot as plt
self.plt = plt
self.imgs = {}
self.theme = theme
colors = [(color_list[_]).astype(np.uint8) \
for _ in range(len(color_list))]
self.colors = np.array(colors, dtype=np.uint8).reshape(len(colors), 1, 1, 3)
if self.theme == 'white':
self.colors = self.colors.reshape(-1)[::-1].reshape(len(colors), 1, 1, 3)
self.colors = np.clip(self.colors, 0., 0.6 * 255).astype(np.uint8)
self.dim_scale = 1
if dataset == 'facehp':
self.names = ['face']
self.num_class = 1
self.num_joints = 5
self.edges = [[0, 1], [0, 2], [1, 3], [2, 4]]
self.ec = [(0, 0, 255), (255, 0, 0), (0, 0, 255), (127, 255, 0),
(127, 255, 0), (0, 0, 255), (255, 0, 255),
(255, 0, 0), (255, 0, 0), (0, 0, 255), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 255),
(255, 0, 0), (255, 0, 0), (0, 0, 255), (0, 0, 255)]
self.colors_hp = [(0, 0, 255), (255, 0, 0), (0, 0, 255),
(127, 255, 0), (127, 255, 0), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255)]
if dataset == 'coco_hp':
self.names = ['p']
self.num_class = 1
self.num_joints = 17
self.edges = [[0, 1], [0, 2], [1, 3], [2, 4],
[3, 5], [4, 6], [5, 6],
[5, 7], [7, 9], [6, 8], [8, 10],
[5, 11], [6, 12], [11, 12],
[11, 13], [13, 15], [12, 14], [14, 16]]
self.ec = [(255, 0, 0), (0, 0, 255), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 255),
(255, 0, 0), (255, 0, 0), (0, 0, 255), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 255),
(255, 0, 0), (255, 0, 0), (0, 0, 255), (0, 0, 255)]
self.colors_hp = [(255, 0, 255), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255), (255, 0, 0), (0, 0, 255),
(255, 0, 0), (0, 0, 255)]
elif num_classes == 80 or dataset == 'coco':
self.names = coco_class_name
elif num_classes == 20 or dataset == 'pascal':
self.names = pascal_class_name
elif dataset == 'gta':
self.names = gta_class_name
self.focal_length = 935.3074360871937
self.W = 1920
self.H = 1080
self.dim_scale = 3
elif dataset == 'viper':
self.names = gta_class_name
self.focal_length = 1158
self.W = 1920
self.H = 1080
self.dim_scale = 3
elif num_classes == 3 or dataset == 'kitti':
self.names = kitti_class_name
self.focal_length = 721.5377
self.W = 1242
self.H = 375
elif num_classes == 1 or dataset == 'pig': # 自己的数据集
self.names = pig_class_name
elif num_classes == 1 or dataset == 'facehp':
self.names = face_class_name
num_classes = len(self.names)
self.down_ratio=down_ratio
# for bird view
self.world_size = 64
self.out_size = 384
def add_img(self, img, img_id='default', revert_color=False):
if revert_color:
img = 255 - img
self.imgs[img_id] = img.copy()
def add_mask(self, mask, bg, imgId = 'default', trans = 0.8):
self.imgs[imgId] = (mask.reshape(
mask.shape[0], mask.shape[1], 1) * 255 * trans + \
bg * (1 - trans)).astype(np.uint8)
def show_img(self, pause = False, imgId = 'default'):
cv2.imshow('{}'.format(imgId), self.imgs[imgId])
if pause:
cv2.waitKey()
def add_blend_img(self, back, fore, img_id='blend', trans=0.7):
if self.theme == 'white':
fore = 255 - fore
if fore.shape[0] != back.shape[0] or fore.shape[0] != back.shape[1]:
fore = cv2.resize(fore, (back.shape[1], back.shape[0]))
if len(fore.shape) == 2:
fore = fore.reshape(fore.shape[0], fore.shape[1], 1)
self.imgs[img_id] = (back * (1. - trans) + fore * trans)
self.imgs[img_id][self.imgs[img_id] > 255] = 255
self.imgs[img_id][self.imgs[img_id] < 0] = 0
self.imgs[img_id] = self.imgs[img_id].astype(np.uint8).copy()
'''
# slow version
def gen_colormap(self, img, output_res=None):
# num_classes = len(self.colors)
img[img < 0] = 0
h, w = img.shape[1], img.shape[2]
if output_res is None:
output_res = (h * self.down_ratio, w * self.down_ratio)
color_map = np.zeros((output_res[0], output_res[1], 3), dtype=np.uint8)
for i in range(img.shape[0]):
resized = cv2.resize(img[i], (output_res[1], output_res[0]))
resized = resized.reshape(output_res[0], output_res[1], 1)
cl = self.colors[i] if not (self.theme == 'white') \
else 255 - self.colors[i]
color_map = np.maximum(color_map, (resized * cl).astype(np.uint8))
return color_map
'''
def gen_colormap(self, img, output_res=None):
img = img.copy()
c, h, w = img.shape[0], img.shape[1], img.shape[2]
if output_res is None:
output_res = (h * self.down_ratio, w * self.down_ratio)
img = img.transpose(1, 2, 0).reshape(h, w, c, 1).astype(np.float32)
colors = np.array(
self.colors, dtype=np.float32).reshape(-1, 3)[:c].reshape(1, 1, c, 3)
if self.theme == 'white':
colors = 255 - colors
color_map = (img * colors).max(axis=2).astype(np.uint8)
color_map = cv2.resize(color_map, (output_res[0], output_res[1]))
return color_map
'''
# slow
def gen_colormap_hp(self, img, output_res=None):
# num_classes = len(self.colors)
# img[img < 0] = 0
h, w = img.shape[1], img.shape[2]
if output_res is None:
output_res = (h * self.down_ratio, w * self.down_ratio)
color_map = np.zeros((output_res[0], output_res[1], 3), dtype=np.uint8)
for i in range(img.shape[0]):
resized = cv2.resize(img[i], (output_res[1], output_res[0]))
resized = resized.reshape(output_res[0], output_res[1], 1)
cl = self.colors_hp[i] if not (self.theme == 'white') else \
(255 - np.array(self.colors_hp[i]))
color_map = np.maximum(color_map, (resized * cl).astype(np.uint8))
return color_map
'''
def gen_colormap_hp(self, img, output_res=None):
c, h, w = img.shape[0], img.shape[1], img.shape[2]
if output_res is None:
output_res = (h * self.down_ratio, w * self.down_ratio)
img = img.transpose(1, 2, 0).reshape(h, w, c, 1).astype(np.float32)
colors = np.array(
self.colors_hp, dtype=np.float32).reshape(-1, 3)[:c].reshape(1, 1, c, 3)
if self.theme == 'white':
colors = 255 - colors
color_map = (img * colors).max(axis=2).astype(np.uint8)
color_map = cv2.resize(color_map, (output_res[0], output_res[1]))
return color_map
def add_rect(self, rect1, rect2, c, conf=1, img_id='default'):
cv2.rectangle(
self.imgs[img_id], (rect1[0], rect1[1]), (rect2[0], rect2[1]), c, 2)
if conf < 1:
cv2.circle(self.imgs[img_id], (rect1[0], rect1[1]), int(10 * conf), c, 1)
cv2.circle(self.imgs[img_id], (rect2[0], rect2[1]), int(10 * conf), c, 1)
cv2.circle(self.imgs[img_id], (rect1[0], rect2[1]), int(10 * conf), c, 1)
cv2.circle(self.imgs[img_id], (rect2[0], rect1[1]), int(10 * conf), c, 1)
def add_coco_bbox(self, bbox, cat, conf=1, show_txt=True, img_id='default'):
bbox = np.array(bbox, dtype=np.int32)
# cat = (int(cat) + 1) % 80
cat = int(cat)
# print('cat', cat, self.names[cat])
c = self.colors[cat][0][0].tolist()
if self.theme == 'white':
c = (255 - np.array(c)).tolist()
txt = '{}{:.1f}'.format(self.names[cat], conf)
font = cv2.FONT_HERSHEY_SIMPLEX
cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
cv2.rectangle(
self.imgs[img_id], (bbox[0], bbox[1]), (bbox[2], bbox[3]), c, 2)
if show_txt:
cv2.rectangle(self.imgs[img_id],
(bbox[0], bbox[1] - cat_size[1] - 2),
(bbox[0] + cat_size[0], bbox[1] - 2), c, -1)
cv2.putText(self.imgs[img_id], txt, (bbox[0], bbox[1] - 2),
font, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA)
def add_coco_hp(self, points, img_id='default'):
points = np.array(points, dtype=np.int32).reshape(self.num_joints, 2)
for j in range(self.num_joints):
cv2.circle(self.imgs[img_id],
(points[j, 0], points[j, 1]), 3, self.colors_hp[j], -1)
# for person pose edege show
# for j, e in enumerate(self.edges):
# if points[e].min() > 0:
# cv2.line(self.imgs[img_id], (points[e[0], 0], points[e[0], 1]),
# (points[e[1], 0], points[e[1], 1]), self.ec[j], 2,
# lineType=cv2.LINE_AA)
def add_points(self, points, img_id='default'):
num_classes = len(points)
# assert num_classes == len(self.colors)
for i in range(num_classes):
for j in range(len(points[i])):
c = self.colors[i, 0, 0]
cv2.circle(self.imgs[img_id], (points[i][j][0] * self.down_ratio,
points[i][j][1] * self.down_ratio),
5, (255, 255, 255), -1)
cv2.circle(self.imgs[img_id], (points[i][j][0] * self.down_ratio,
points[i][j][1] * self.down_ratio),
3, (int(c[0]), int(c[1]), int(c[2])), -1)
def show_all_imgs(self, pause=False, time=0):
if not self.ipynb:
for i, v in self.imgs.items():
cv2.imshow('{}'.format(i), v)
if cv2.waitKey(0 if pause else 2000) == 27:
import sys
sys.exit(0)
else:
self.ax = None
nImgs = len(self.imgs)
fig=self.plt.figure(figsize=(nImgs * 10,10))
nCols = nImgs
nRows = nImgs // nCols
for i, (k, v) in enumerate(self.imgs.items()):
fig.add_subplot(1, nImgs, i + 1)
if len(v.shape) == 3:
self.plt.imshow(cv2.cvtColor(v, cv2.COLOR_BGR2RGB))
else:
self.plt.imshow(v)
self.plt.show()
def return_img(self, img_id='multi_pose'):
return self.imgs[img_id]
def save_img(self, imgId='default', path='./cache/debug/'):
cv2.imwrite(path + '{}.png'.format(imgId), self.imgs[imgId])
def save_all_imgs(self, path='./cache/debug/', prefix='', genID=False):
if genID:
try:
idx = int(np.loadtxt(path + '/id.txt'))
except:
idx = 0
prefix=idx
np.savetxt(path + '/id.txt', np.ones(1) * (idx + 1), fmt='%d')
for i, v in self.imgs.items():
cv2.imwrite(path + '/{}{}.png'.format(prefix, i), v)
def remove_side(self, img_id, img):
if not (img_id in self.imgs):
return
ws = img.sum(axis=2).sum(axis=0)
l = 0
while ws[l] == 0 and l < len(ws):
l+= 1
r = ws.shape[0] - 1
while ws[r] == 0 and r > 0:
r -= 1
hs = img.sum(axis=2).sum(axis=1)
t = 0
while hs[t] == 0 and t < len(hs):
t += 1
b = hs.shape[0] - 1
while hs[b] == 0 and b > 0:
b -= 1
self.imgs[img_id] = self.imgs[img_id][t:b+1, l:r+1].copy()
def project_3d_to_bird(self, pt):
pt[0] += self.world_size / 2
pt[1] = self.world_size - pt[1]
pt = pt * self.out_size / self.world_size
return pt.astype(np.int32)
def add_ct_detection(
self, img, dets, show_box=False, show_txt=True,
center_thresh=0.5, img_id='det'):
# dets: max_preds x 5
self.imgs[img_id] = img.copy()
if type(dets) == type({}):
for cat in dets:
for i in range(len(dets[cat])):
if dets[cat][i, 2] > center_thresh:
cl = (self.colors[cat, 0, 0]).tolist()
ct = dets[cat][i, :2].astype(np.int32)
if show_box:
w, h = dets[cat][i, -2], dets[cat][i, -1]
x, y = dets[cat][i, 0], dets[cat][i, 1]
bbox = np.array([x - w / 2, y - h / 2, x + w / 2, y + h / 2],
dtype=np.float32)
self.add_coco_bbox(
bbox, cat - 1, dets[cat][i, 2],
show_txt=show_txt, img_id=img_id)
else:
for i in range(len(dets)):
if dets[i, 2] > center_thresh:
# print('dets', dets[i])
cat = int(dets[i, -1])
cl = (self.colors[cat, 0, 0] if self.theme == 'black' else \
255 - self.colors[cat, 0, 0]).tolist()
ct = dets[i, :2].astype(np.int32) * self.down_ratio
cv2.circle(self.imgs[img_id], (ct[0], ct[1]), 3, cl, -1)
if show_box:
w, h = dets[i, -3] * self.down_ratio, dets[i, -2] * self.down_ratio
x, y = dets[i, 0] * self.down_ratio, dets[i, 1] * self.down_ratio
bbox = np.array([x - w / 2, y - h / 2, x + w / 2, y + h / 2],
dtype=np.float32)
self.add_coco_bbox(bbox, dets[i, -1], dets[i, 2], img_id=img_id)
def add_3d_detection(
self, image_or_path, dets, calib, show_txt=False,
center_thresh=0.5, img_id='det'):
if isinstance(image_or_path, np.ndarray):
self.imgs[img_id] = image_or_path
else:
self.imgs[img_id] = cv2.imread(image_or_path)
for cat in dets:
for i in range(len(dets[cat])):
cl = (self.colors[cat - 1, 0, 0]).tolist()
if dets[cat][i, -1] > center_thresh:
dim = dets[cat][i, 5:8]
loc = dets[cat][i, 8:11]
rot_y = dets[cat][i, 11]
# loc[1] = loc[1] - dim[0] / 2 + dim[0] / 2 / self.dim_scale
# dim = dim / self.dim_scale
if loc[2] > 1:
box_3d = compute_box_3d(dim, loc, rot_y)
box_2d = project_to_image(box_3d, calib)
self.imgs[img_id] = draw_box_3d(self.imgs[img_id], box_2d, cl)
def compose_vis_add(
self, img_path, dets, calib,
center_thresh, pred, bev, img_id='out'):
self.imgs[img_id] = cv2.imread(img_path)
# h, w = self.imgs[img_id].shape[:2]
# pred = cv2.resize(pred, (h, w))
h, w = pred.shape[:2]
hs, ws = self.imgs[img_id].shape[0] / h, self.imgs[img_id].shape[1] / w
self.imgs[img_id] = cv2.resize(self.imgs[img_id], (w, h))
self.add_blend_img(self.imgs[img_id], pred, img_id)
for cat in dets:
for i in range(len(dets[cat])):
cl = (self.colors[cat - 1, 0, 0]).tolist()
if dets[cat][i, -1] > center_thresh:
dim = dets[cat][i, 5:8]
loc = dets[cat][i, 8:11]
rot_y = dets[cat][i, 11]
# loc[1] = loc[1] - dim[0] / 2 + dim[0] / 2 / self.dim_scale
# dim = dim / self.dim_scale
if loc[2] > 1:
box_3d = compute_box_3d(dim, loc, rot_y)
box_2d = project_to_image(box_3d, calib)
box_2d[:, 0] /= hs
box_2d[:, 1] /= ws
self.imgs[img_id] = draw_box_3d(self.imgs[img_id], box_2d, cl)
self.imgs[img_id] = np.concatenate(
[self.imgs[img_id], self.imgs[bev]], axis=1)
def add_2d_detection(
self, img, dets, show_box=False, show_txt=True,
center_thresh=0.5, img_id='det'):
self.imgs[img_id] = img
for cat in dets:
for i in range(len(dets[cat])):
cl = (self.colors[cat - 1, 0, 0]).tolist()
if dets[cat][i, -1] > center_thresh:
bbox = dets[cat][i, 1:5]
self.add_coco_bbox(
bbox, cat - 1, dets[cat][i, -1],
show_txt=show_txt, img_id=img_id)
def add_bird_view(self, dets, center_thresh=0.3, img_id='bird'):
bird_view = np.ones((self.out_size, self.out_size, 3), dtype=np.uint8) * 230
for cat in dets:
cl = (self.colors[cat - 1, 0, 0]).tolist()
lc = (250, 152, 12)
for i in range(len(dets[cat])):
if dets[cat][i, -1] > center_thresh:
dim = dets[cat][i, 5:8]
loc = dets[cat][i, 8:11]
rot_y = dets[cat][i, 11]
rect = compute_box_3d(dim, loc, rot_y)[:4, [0, 2]]
for k in range(4):
rect[k] = self.project_3d_to_bird(rect[k])
# cv2.circle(bird_view, (rect[k][0], rect[k][1]), 2, lc, -1)
cv2.polylines(
bird_view,[rect.reshape(-1, 1, 2).astype(np.int32)],
True,lc,2,lineType=cv2.LINE_AA)
for e in [[0, 1]]:
t = 4 if e == [0, 1] else 1
cv2.line(bird_view, (rect[e[0]][0], rect[e[0]][1]),
(rect[e[1]][0], rect[e[1]][1]), lc, t,
lineType=cv2.LINE_AA)
self.imgs[img_id] = bird_view
def add_bird_views(self, dets_dt, dets_gt, center_thresh=0.3, img_id='bird'):
alpha = 0.5
bird_view = np.ones((self.out_size, self.out_size, 3), dtype=np.uint8) * 230
for ii, (dets, lc, cc) in enumerate(
[(dets_gt, (12, 49, 250), (0, 0, 255)),
(dets_dt, (250, 152, 12), (255, 0, 0))]):
# cc = np.array(lc, dtype=np.uint8).reshape(1, 1, 3)
for cat in dets:
cl = (self.colors[cat - 1, 0, 0]).tolist()
for i in range(len(dets[cat])):
if dets[cat][i, -1] > center_thresh:
dim = dets[cat][i, 5:8]
loc = dets[cat][i, 8:11]
rot_y = dets[cat][i, 11]
rect = compute_box_3d(dim, loc, rot_y)[:4, [0, 2]]
for k in range(4):
rect[k] = self.project_3d_to_bird(rect[k])
if ii == 0:
cv2.fillPoly(
bird_view,[rect.reshape(-1, 1, 2).astype(np.int32)],
lc,lineType=cv2.LINE_AA)
else:
cv2.polylines(
bird_view,[rect.reshape(-1, 1, 2).astype(np.int32)],
True,lc,2,lineType=cv2.LINE_AA)
# for e in [[0, 1], [1, 2], [2, 3], [3, 0]]:
for e in [[0, 1]]:
t = 4 if e == [0, 1] else 1
cv2.line(bird_view, (rect[e[0]][0], rect[e[0]][1]),
(rect[e[1]][0], rect[e[1]][1]), lc, t,
lineType=cv2.LINE_AA)
self.imgs[img_id] = bird_view
kitti_class_name = [
'p', 'v', 'b'
]
gta_class_name = [
'p', 'v'
]
pascal_class_name = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus",
"car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike",
"person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
coco_class_name = [
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
pig_class_name = ['pig']
face_class_name = ['face']
color_list = np.array(
[
1.000, 1.000, 1.000,
0.850, 0.325, 0.098,
0.929, 0.694, 0.125,
0.494, 0.184, 0.556,
0.466, 0.674, 0.188,
0.301, 0.745, 0.933,
0.635, 0.078, 0.184,
0.300, 0.300, 0.300,
0.600, 0.600, 0.600,
1.000, 0.000, 0.000,
1.000, 0.500, 0.000,
0.749, 0.749, 0.000,
0.000, 1.000, 0.000,
0.000, 0.000, 1.000,
0.667, 0.000, 1.000,
0.333, 0.333, 0.000,
0.333, 0.667, 0.000,
0.333, 1.000, 0.000,
0.667, 0.333, 0.000,
0.667, 0.667, 0.000,
0.667, 1.000, 0.000,
1.000, 0.333, 0.000,
1.000, 0.667, 0.000,
1.000, 1.000, 0.000,
0.000, 0.333, 0.500,
0.000, 0.667, 0.500,
0.000, 1.000, 0.500,
0.333, 0.000, 0.500,
0.333, 0.333, 0.500,
0.333, 0.667, 0.500,
0.333, 1.000, 0.500,
0.667, 0.000, 0.500,
0.667, 0.333, 0.500,
0.667, 0.667, 0.500,
0.667, 1.000, 0.500,
1.000, 0.000, 0.500,
1.000, 0.333, 0.500,
1.000, 0.667, 0.500,
1.000, 1.000, 0.500,
0.000, 0.333, 1.000,
0.000, 0.667, 1.000,
0.000, 1.000, 1.000,
0.333, 0.000, 1.000,
0.333, 0.333, 1.000,
0.333, 0.667, 1.000,
0.333, 1.000, 1.000,
0.667, 0.000, 1.000,
0.667, 0.333, 1.000,
0.667, 0.667, 1.000,
0.667, 1.000, 1.000,
1.000, 0.000, 1.000,
1.000, 0.333, 1.000,
1.000, 0.667, 1.000,
0.167, 0.000, 0.000,
0.333, 0.000, 0.000,
0.500, 0.000, 0.000,
0.667, 0.000, 0.000,
0.833, 0.000, 0.000,
1.000, 0.000, 0.000,
0.000, 0.167, 0.000,
0.000, 0.333, 0.000,
0.000, 0.500, 0.000,
0.000, 0.667, 0.000,
0.000, 0.833, 0.000,
0.000, 1.000, 0.000,
0.000, 0.000, 0.167,
0.000, 0.000, 0.333,
0.000, 0.000, 0.500,
0.000, 0.000, 0.667,
0.000, 0.000, 0.833,
0.000, 0.000, 1.000,
0.000, 0.000, 0.000,
0.143, 0.143, 0.143,
0.286, 0.286, 0.286,
0.429, 0.429, 0.429,
0.571, 0.571, 0.571,
0.714, 0.714, 0.714,
0.857, 0.857, 0.857,
0.000, 0.447, 0.741,
0.50, 0.5, 0
]
).astype(np.float32)
color_list = color_list.reshape((-1, 3)) * 255
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment