"torchvision/csrc/ops/cuda/nms_kernel.cu" did not exist on "5066d7153373d9aae902c32bce1298944505edc8"
Commit 62b7582f authored by myhloli's avatar myhloli
Browse files

Merge remote-tracking branch 'origin/dev' into dev

parents 978ef41c 41f1fb8a
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import, division, print_function
import torch
import torch.nn.functional as F
from torch import nn
from ..common import Activation
NET_CONFIG_det = {
"blocks2":
# k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
"blocks5": [
[3, 128, 256, 2, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
],
"blocks6": [
[5, 256, 512, 2, True],
[5, 512, 512, 1, True],
[5, 512, 512, 1, False],
[5, 512, 512, 1, False],
],
}
NET_CONFIG_rec = {
"blocks2":
# k, in_c, out_c, s, use_se
[[3, 16, 32, 1, False]],
"blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]],
"blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]],
"blocks5": [
[3, 128, 256, (1, 2), False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
[5, 256, 256, 1, False],
],
"blocks6": [
[5, 256, 512, (2, 1), True],
[5, 512, 512, 1, True],
[5, 512, 512, (2, 1), False],
[5, 512, 512, 1, False],
],
}
def make_divisible(v, divisor=16, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class LearnableAffineBlock(nn.Module):
def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, lab_lr=0.1):
super().__init__()
self.scale = nn.Parameter(torch.Tensor([scale_value]))
self.bias = nn.Parameter(torch.Tensor([bias_value]))
def forward(self, x):
return self.scale * x + self.bias
class ConvBNLayer(nn.Module):
def __init__(
self, in_channels, out_channels, kernel_size, stride, groups=1, lr_mult=1.0
):
super().__init__()
self.conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False,
)
self.bn = nn.BatchNorm2d(
out_channels,
)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return x
class Act(nn.Module):
def __init__(self, act="hswish", lr_mult=1.0, lab_lr=0.1):
super().__init__()
if act == "hswish":
self.act = nn.Hardswish(inplace=True)
else:
assert act == "relu"
self.act = Activation(act)
self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
def forward(self, x):
return self.lab(self.act(x))
class LearnableRepLayer(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
num_conv_branches=1,
lr_mult=1.0,
lab_lr=0.1,
):
super().__init__()
self.is_repped = False
self.groups = groups
self.stride = stride
self.kernel_size = kernel_size
self.in_channels = in_channels
self.out_channels = out_channels
self.num_conv_branches = num_conv_branches
self.padding = (kernel_size - 1) // 2
self.identity = (
nn.BatchNorm2d(
num_features=in_channels,
)
if out_channels == in_channels and stride == 1
else None
)
self.conv_kxk = nn.ModuleList(
[
ConvBNLayer(
in_channels,
out_channels,
kernel_size,
stride,
groups=groups,
lr_mult=lr_mult,
)
for _ in range(self.num_conv_branches)
]
)
self.conv_1x1 = (
ConvBNLayer(
in_channels, out_channels, 1, stride, groups=groups, lr_mult=lr_mult
)
if kernel_size > 1
else None
)
self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr)
def forward(self, x):
# for export
if self.is_repped:
out = self.lab(self.reparam_conv(x))
if self.stride != 2:
out = self.act(out)
return out
out = 0
if self.identity is not None:
out += self.identity(x)
if self.conv_1x1 is not None:
out += self.conv_1x1(x)
for conv in self.conv_kxk:
out += conv(x)
out = self.lab(out)
if self.stride != 2:
out = self.act(out)
return out
def rep(self):
if self.is_repped:
return
kernel, bias = self._get_kernel_bias()
self.reparam_conv = nn.Conv2d(
in_channels=self.in_channels,
out_channels=self.out_channels,
kernel_size=self.kernel_size,
stride=self.stride,
padding=self.padding,
groups=self.groups,
)
self.reparam_conv.weight.data = kernel
self.reparam_conv.bias.data = bias
self.is_repped = True
def _pad_kernel_1x1_to_kxk(self, kernel1x1, pad):
if not isinstance(kernel1x1, torch.Tensor):
return 0
else:
return nn.functional.pad(kernel1x1, [pad, pad, pad, pad])
def _get_kernel_bias(self):
kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1)
kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(
kernel_conv_1x1, self.kernel_size // 2
)
kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
kernel_conv_kxk = 0
bias_conv_kxk = 0
for conv in self.conv_kxk:
kernel, bias = self._fuse_bn_tensor(conv)
kernel_conv_kxk += kernel
bias_conv_kxk += bias
kernel_reparam = kernel_conv_kxk + kernel_conv_1x1 + kernel_identity
bias_reparam = bias_conv_kxk + bias_conv_1x1 + bias_identity
return kernel_reparam, bias_reparam
def _fuse_bn_tensor(self, branch):
if not branch:
return 0, 0
elif isinstance(branch, ConvBNLayer):
kernel = branch.conv.weight
running_mean = branch.bn._mean
running_var = branch.bn._variance
gamma = branch.bn.weight
beta = branch.bn.bias
eps = branch.bn._epsilon
else:
assert isinstance(branch, nn.BatchNorm2d)
if not hasattr(self, "id_tensor"):
input_dim = self.in_channels // self.groups
kernel_value = torch.zeros(
(self.in_channels, input_dim, self.kernel_size, self.kernel_size),
dtype=branch.weight.dtype,
)
for i in range(self.in_channels):
kernel_value[
i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2
] = 1
self.id_tensor = kernel_value
kernel = self.id_tensor
running_mean = branch._mean
running_var = branch._variance
gamma = branch.weight
beta = branch.bias
eps = branch._epsilon
std = (running_var + eps).sqrt()
t = (gamma / std).reshape((-1, 1, 1, 1))
return kernel * t, beta - running_mean * gamma / std
class SELayer(nn.Module):
def __init__(self, channel, reduction=4, lr_mult=1.0):
super().__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.conv1 = nn.Conv2d(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0,
)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0,
)
self.hardsigmoid = nn.Hardsigmoid(inplace=True)
def forward(self, x):
identity = x
x = self.avg_pool(x)
x = self.conv1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.hardsigmoid(x)
x = identity * x
return x
class LCNetV3Block(nn.Module):
def __init__(
self,
in_channels,
out_channels,
stride,
dw_size,
use_se=False,
conv_kxk_num=4,
lr_mult=1.0,
lab_lr=0.1,
):
super().__init__()
self.use_se = use_se
self.dw_conv = LearnableRepLayer(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=dw_size,
stride=stride,
groups=in_channels,
num_conv_branches=conv_kxk_num,
lr_mult=lr_mult,
lab_lr=lab_lr,
)
if use_se:
self.se = SELayer(in_channels, lr_mult=lr_mult)
self.pw_conv = LearnableRepLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
num_conv_branches=conv_kxk_num,
lr_mult=lr_mult,
lab_lr=lab_lr,
)
def forward(self, x):
x = self.dw_conv(x)
if self.use_se:
x = self.se(x)
x = self.pw_conv(x)
return x
class PPLCNetV3(nn.Module):
def __init__(
self,
scale=1.0,
conv_kxk_num=4,
lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
lab_lr=0.1,
det=False,
**kwargs
):
super().__init__()
self.scale = scale
self.lr_mult_list = lr_mult_list
self.det = det
self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec
assert isinstance(
self.lr_mult_list, (list, tuple)
), "lr_mult_list should be in (list, tuple) but got {}".format(
type(self.lr_mult_list)
)
assert (
len(self.lr_mult_list) == 6
), "lr_mult_list length should be 6 but got {}".format(len(self.lr_mult_list))
self.conv1 = ConvBNLayer(
in_channels=3,
out_channels=make_divisible(16 * scale),
kernel_size=3,
stride=2,
lr_mult=self.lr_mult_list[0],
)
self.blocks2 = nn.Sequential(
*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[1],
lab_lr=lab_lr,
)
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks2"])
]
)
self.blocks3 = nn.Sequential(
*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[2],
lab_lr=lab_lr,
)
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks3"])
]
)
self.blocks4 = nn.Sequential(
*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[3],
lab_lr=lab_lr,
)
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks4"])
]
)
self.blocks5 = nn.Sequential(
*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[4],
lab_lr=lab_lr,
)
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks5"])
]
)
self.blocks6 = nn.Sequential(
*[
LCNetV3Block(
in_channels=make_divisible(in_c * scale),
out_channels=make_divisible(out_c * scale),
dw_size=k,
stride=s,
use_se=se,
conv_kxk_num=conv_kxk_num,
lr_mult=self.lr_mult_list[5],
lab_lr=lab_lr,
)
for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks6"])
]
)
self.out_channels = make_divisible(512 * scale)
if self.det:
mv_c = [16, 24, 56, 480]
self.out_channels = [
make_divisible(self.net_config["blocks3"][-1][2] * scale),
make_divisible(self.net_config["blocks4"][-1][2] * scale),
make_divisible(self.net_config["blocks5"][-1][2] * scale),
make_divisible(self.net_config["blocks6"][-1][2] * scale),
]
self.layer_list = nn.ModuleList(
[
nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0),
nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0),
]
)
self.out_channels = [
int(mv_c[0] * scale),
int(mv_c[1] * scale),
int(mv_c[2] * scale),
int(mv_c[3] * scale),
]
def forward(self, x):
out_list = []
x = self.conv1(x)
x = self.blocks2(x)
x = self.blocks3(x)
out_list.append(x)
x = self.blocks4(x)
out_list.append(x)
x = self.blocks5(x)
out_list.append(x)
x = self.blocks6(x)
out_list.append(x)
if self.det:
out_list[0] = self.layer_list[0](out_list[0])
out_list[1] = self.layer_list[1](out_list[1])
out_list[2] = self.layer_list[2](out_list[2])
out_list[3] = self.layer_list[3](out_list[3])
return out_list
if self.training:
x = F.adaptive_avg_pool2d(x, [1, 40])
else:
x = F.avg_pool2d(x, [3, 2])
return x
from torch import nn
from .det_mobilenet_v3 import ConvBNLayer, ResidualUnit, make_divisible
class MobileNetV3(nn.Module):
def __init__(
self,
in_channels=3,
model_name="small",
scale=0.5,
large_stride=None,
small_stride=None,
**kwargs
):
super(MobileNetV3, self).__init__()
if small_stride is None:
small_stride = [2, 2, 2, 2]
if large_stride is None:
large_stride = [1, 2, 2, 2]
assert isinstance(
large_stride, list
), "large_stride type must " "be list but got {}".format(type(large_stride))
assert isinstance(
small_stride, list
), "small_stride type must " "be list but got {}".format(type(small_stride))
assert (
len(large_stride) == 4
), "large_stride length must be " "4 but got {}".format(len(large_stride))
assert (
len(small_stride) == 4
), "small_stride length must be " "4 but got {}".format(len(small_stride))
if model_name == "large":
cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, "relu", large_stride[0]],
[3, 64, 24, False, "relu", (large_stride[1], 1)],
[3, 72, 24, False, "relu", 1],
[5, 72, 40, True, "relu", (large_stride[2], 1)],
[5, 120, 40, True, "relu", 1],
[5, 120, 40, True, "relu", 1],
[3, 240, 80, False, "hard_swish", 1],
[3, 200, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 184, 80, False, "hard_swish", 1],
[3, 480, 112, True, "hard_swish", 1],
[3, 672, 112, True, "hard_swish", 1],
[5, 672, 160, True, "hard_swish", (large_stride[3], 1)],
[5, 960, 160, True, "hard_swish", 1],
[5, 960, 160, True, "hard_swish", 1],
]
cls_ch_squeeze = 960
elif model_name == "small":
cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, "relu", (small_stride[0], 1)],
[3, 72, 24, False, "relu", (small_stride[1], 1)],
[3, 88, 24, False, "relu", 1],
[5, 96, 40, True, "hard_swish", (small_stride[2], 1)],
[5, 240, 40, True, "hard_swish", 1],
[5, 240, 40, True, "hard_swish", 1],
[5, 120, 48, True, "hard_swish", 1],
[5, 144, 48, True, "hard_swish", 1],
[5, 288, 96, True, "hard_swish", (small_stride[3], 1)],
[5, 576, 96, True, "hard_swish", 1],
[5, 576, 96, True, "hard_swish", 1],
]
cls_ch_squeeze = 576
else:
raise NotImplementedError(
"mode[" + model_name + "_model] is not implemented!"
)
supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
assert (
scale in supported_scale
), "supported scales are {} but input scale is {}".format(
supported_scale, scale
)
inplanes = 16
# conv1
self.conv1 = ConvBNLayer(
in_channels=in_channels,
out_channels=make_divisible(inplanes * scale),
kernel_size=3,
stride=2,
padding=1,
groups=1,
if_act=True,
act="hard_swish",
name="conv1",
)
i = 0
block_list = []
inplanes = make_divisible(inplanes * scale)
for k, exp, c, se, nl, s in cfg:
block_list.append(
ResidualUnit(
in_channels=inplanes,
mid_channels=make_divisible(scale * exp),
out_channels=make_divisible(scale * c),
kernel_size=k,
stride=s,
use_se=se,
act=nl,
name="conv" + str(i + 2),
)
)
inplanes = make_divisible(scale * c)
i += 1
self.blocks = nn.Sequential(*block_list)
self.conv2 = ConvBNLayer(
in_channels=inplanes,
out_channels=make_divisible(scale * cls_ch_squeeze),
kernel_size=1,
stride=1,
padding=0,
groups=1,
if_act=True,
act="hard_swish",
name="conv_last",
)
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.out_channels = make_divisible(scale * cls_ch_squeeze)
def forward(self, x):
x = self.conv1(x)
x = self.blocks(x)
x = self.conv2(x)
x = self.pool(x)
return x
import os, sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from ..common import Activation
class ConvBNLayer(nn.Module):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
act='hard_swish'):
super(ConvBNLayer, self).__init__()
self.act = act
self._conv = nn.Conv2d(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
bias=False)
self._batch_norm = nn.BatchNorm2d(
num_filters,
)
if self.act is not None:
self._act = Activation(act_type=act, inplace=True)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
if self.act is not None:
y = self._act(y)
return y
class DepthwiseSeparable(nn.Module):
def __init__(self,
num_channels,
num_filters1,
num_filters2,
num_groups,
stride,
scale,
dw_size=3,
padding=1,
use_se=False):
super(DepthwiseSeparable, self).__init__()
self.use_se = use_se
self._depthwise_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=int(num_filters1 * scale),
filter_size=dw_size,
stride=stride,
padding=padding,
num_groups=int(num_groups * scale))
if use_se:
self._se = SEModule(int(num_filters1 * scale))
self._pointwise_conv = ConvBNLayer(
num_channels=int(num_filters1 * scale),
filter_size=1,
num_filters=int(num_filters2 * scale),
stride=1,
padding=0)
def forward(self, inputs):
y = self._depthwise_conv(inputs)
if self.use_se:
y = self._se(y)
y = self._pointwise_conv(y)
return y
class MobileNetV1Enhance(nn.Module):
def __init__(self,
in_channels=3,
scale=0.5,
last_conv_stride=1,
last_pool_type='max',
**kwargs):
super().__init__()
self.scale = scale
self.block_list = []
self.conv1 = ConvBNLayer(
num_channels=in_channels,
filter_size=3,
channels=3,
num_filters=int(32 * scale),
stride=2,
padding=1)
conv2_1 = DepthwiseSeparable(
num_channels=int(32 * scale),
num_filters1=32,
num_filters2=64,
num_groups=32,
stride=1,
scale=scale)
self.block_list.append(conv2_1)
conv2_2 = DepthwiseSeparable(
num_channels=int(64 * scale),
num_filters1=64,
num_filters2=128,
num_groups=64,
stride=1,
scale=scale)
self.block_list.append(conv2_2)
conv3_1 = DepthwiseSeparable(
num_channels=int(128 * scale),
num_filters1=128,
num_filters2=128,
num_groups=128,
stride=1,
scale=scale)
self.block_list.append(conv3_1)
conv3_2 = DepthwiseSeparable(
num_channels=int(128 * scale),
num_filters1=128,
num_filters2=256,
num_groups=128,
stride=(2, 1),
scale=scale)
self.block_list.append(conv3_2)
conv4_1 = DepthwiseSeparable(
num_channels=int(256 * scale),
num_filters1=256,
num_filters2=256,
num_groups=256,
stride=1,
scale=scale)
self.block_list.append(conv4_1)
conv4_2 = DepthwiseSeparable(
num_channels=int(256 * scale),
num_filters1=256,
num_filters2=512,
num_groups=256,
stride=(2, 1),
scale=scale)
self.block_list.append(conv4_2)
for _ in range(5):
conv5 = DepthwiseSeparable(
num_channels=int(512 * scale),
num_filters1=512,
num_filters2=512,
num_groups=512,
stride=1,
dw_size=5,
padding=2,
scale=scale,
use_se=False)
self.block_list.append(conv5)
conv5_6 = DepthwiseSeparable(
num_channels=int(512 * scale),
num_filters1=512,
num_filters2=1024,
num_groups=512,
stride=(2, 1),
dw_size=5,
padding=2,
scale=scale,
use_se=True)
self.block_list.append(conv5_6)
conv6 = DepthwiseSeparable(
num_channels=int(1024 * scale),
num_filters1=1024,
num_filters2=1024,
num_groups=1024,
stride=last_conv_stride,
dw_size=5,
padding=2,
use_se=True,
scale=scale)
self.block_list.append(conv6)
self.block_list = nn.Sequential(*self.block_list)
if last_pool_type == 'avg':
self.pool = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
else:
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.out_channels = int(1024 * scale)
def forward(self, inputs):
y = self.conv1(inputs)
y = self.block_list(y)
y = self.pool(y)
return y
def hardsigmoid(x):
return F.relu6(x + 3., inplace=True) / 6.
class SEModule(nn.Module):
def __init__(self, channel, reduction=4):
super(SEModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.conv1 = nn.Conv2d(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0,
bias=True)
self.conv2 = nn.Conv2d(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0,
bias=True)
def forward(self, inputs):
outputs = self.avg_pool(inputs)
outputs = self.conv1(outputs)
outputs = F.relu(outputs)
outputs = self.conv2(outputs)
outputs = hardsigmoid(outputs)
x = torch.mul(inputs, outputs)
return x
import torch
import torch.nn.functional as F
from torch import nn
class Hswish(nn.Module):
def __init__(self, inplace=True):
super(Hswish, self).__init__()
self.inplace = inplace
def forward(self, x):
return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0
# out = max(0, min(1, slop*x+offset))
# paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None)
class Hsigmoid(nn.Module):
def __init__(self, inplace=True):
super(Hsigmoid, self).__init__()
self.inplace = inplace
def forward(self, x):
# torch: F.relu6(x + 3., inplace=self.inplace) / 6.
# paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6.
return F.relu6(1.2 * x + 3.0, inplace=self.inplace) / 6.0
class GELU(nn.Module):
def __init__(self, inplace=True):
super(GELU, self).__init__()
self.inplace = inplace
def forward(self, x):
return torch.nn.functional.gelu(x)
class Swish(nn.Module):
def __init__(self, inplace=True):
super(Swish, self).__init__()
self.inplace = inplace
def forward(self, x):
if self.inplace:
x.mul_(torch.sigmoid(x))
return x
else:
return x * torch.sigmoid(x)
class Activation(nn.Module):
def __init__(self, act_type, inplace=True):
super(Activation, self).__init__()
act_type = act_type.lower()
if act_type == "relu":
self.act = nn.ReLU(inplace=inplace)
elif act_type == "relu6":
self.act = nn.ReLU6(inplace=inplace)
elif act_type == "sigmoid":
raise NotImplementedError
elif act_type == "hard_sigmoid":
self.act = Hsigmoid(
inplace
) # nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)#
elif act_type == "hard_swish" or act_type == "hswish":
self.act = Hswish(inplace=inplace)
elif act_type == "leakyrelu":
self.act = nn.LeakyReLU(inplace=inplace)
elif act_type == "gelu":
self.act = GELU(inplace=inplace)
elif act_type == "swish":
self.act = Swish(inplace=inplace)
else:
raise NotImplementedError
def forward(self, inputs):
return self.act(inputs)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ["build_head"]
def build_head(config, **kwargs):
# det head
from .det_db_head import DBHead, PFHeadLocal
# rec head
from .rec_ctc_head import CTCHead
from .rec_multi_head import MultiHead
# cls head
from .cls_head import ClsHead
support_dict = [
"DBHead",
"CTCHead",
"ClsHead",
"MultiHead",
"PFHeadLocal",
]
module_name = config.pop("name")
char_num = config.pop("char_num", 6625)
assert module_name in support_dict, Exception(
"head only support {}".format(support_dict)
)
module_class = eval(module_name)(**config, **kwargs)
return module_class
import torch
import torch.nn.functional as F
from torch import nn
class ClsHead(nn.Module):
"""
Class orientation
Args:
params(dict): super parameters for build Class network
"""
def __init__(self, in_channels, class_dim, **kwargs):
super(ClsHead, self).__init__()
self.pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Linear(in_channels, class_dim, bias=True)
def forward(self, x):
x = self.pool(x)
x = torch.reshape(x, shape=[x.shape[0], x.shape[1]])
x = self.fc(x)
x = F.softmax(x, dim=1)
return x
import torch
import torch.nn as nn
import torch.nn.functional as F
from ..common import Activation
from ..backbones.det_mobilenet_v3 import ConvBNLayer
class Head(nn.Module):
def __init__(self, in_channels, **kwargs):
super(Head, self).__init__()
self.conv1 = nn.Conv2d(
in_channels=in_channels,
out_channels=in_channels // 4,
kernel_size=3,
padding=1,
bias=False)
self.conv_bn1 = nn.BatchNorm2d(
in_channels // 4)
self.relu1 = Activation(act_type='relu')
self.conv2 = nn.ConvTranspose2d(
in_channels=in_channels // 4,
out_channels=in_channels // 4,
kernel_size=2,
stride=2)
self.conv_bn2 = nn.BatchNorm2d(
in_channels // 4)
self.relu2 = Activation(act_type='relu')
self.conv3 = nn.ConvTranspose2d(
in_channels=in_channels // 4,
out_channels=1,
kernel_size=2,
stride=2)
def forward(self, x, return_f=False):
x = self.conv1(x)
x = self.conv_bn1(x)
x = self.relu1(x)
x = self.conv2(x)
x = self.conv_bn2(x)
x = self.relu2(x)
if return_f is True:
f = x
x = self.conv3(x)
x = torch.sigmoid(x)
if return_f is True:
return x, f
return x
class DBHead(nn.Module):
"""
Differentiable Binarization (DB) for text detection:
see https://arxiv.org/abs/1911.08947
args:
params(dict): super parameters for build DB network
"""
def __init__(self, in_channels, k=50, **kwargs):
super(DBHead, self).__init__()
self.k = k
binarize_name_list = [
'conv2d_56', 'batch_norm_47', 'conv2d_transpose_0', 'batch_norm_48',
'conv2d_transpose_1', 'binarize'
]
thresh_name_list = [
'conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50',
'conv2d_transpose_3', 'thresh'
]
self.binarize = Head(in_channels, **kwargs)# binarize_name_list)
self.thresh = Head(in_channels, **kwargs)#thresh_name_list)
def step_function(self, x, y):
return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
def forward(self, x):
shrink_maps = self.binarize(x)
return {'maps': shrink_maps}
class LocalModule(nn.Module):
def __init__(self, in_c, mid_c, use_distance=True):
super(self.__class__, self).__init__()
self.last_3 = ConvBNLayer(in_c + 1, mid_c, 3, 1, 1, act='relu')
self.last_1 = nn.Conv2d(mid_c, 1, 1, 1, 0)
def forward(self, x, init_map, distance_map):
outf = torch.cat([init_map, x], dim=1)
# last Conv
out = self.last_1(self.last_3(outf))
return out
class PFHeadLocal(DBHead):
def __init__(self, in_channels, k=50, mode='small', **kwargs):
super(PFHeadLocal, self).__init__(in_channels, k, **kwargs)
self.mode = mode
self.up_conv = nn.Upsample(scale_factor=2, mode="nearest")
if self.mode == 'large':
self.cbn_layer = LocalModule(in_channels // 4, in_channels // 4)
elif self.mode == 'small':
self.cbn_layer = LocalModule(in_channels // 4, in_channels // 8)
def forward(self, x, targets=None):
shrink_maps, f = self.binarize(x, return_f=True)
base_maps = shrink_maps
cbn_maps = self.cbn_layer(self.up_conv(f), shrink_maps, None)
cbn_maps = F.sigmoid(cbn_maps)
return {'maps': 0.5 * (base_maps + cbn_maps), 'cbn_maps': cbn_maps}
\ No newline at end of file
import torch.nn.functional as F
from torch import nn
class CTCHead(nn.Module):
def __init__(
self,
in_channels,
out_channels=6625,
fc_decay=0.0004,
mid_channels=None,
return_feats=False,
**kwargs
):
super(CTCHead, self).__init__()
if mid_channels is None:
self.fc = nn.Linear(
in_channels,
out_channels,
bias=True,
)
else:
self.fc1 = nn.Linear(
in_channels,
mid_channels,
bias=True,
)
self.fc2 = nn.Linear(
mid_channels,
out_channels,
bias=True,
)
self.out_channels = out_channels
self.mid_channels = mid_channels
self.return_feats = return_feats
def forward(self, x, labels=None):
if self.mid_channels is None:
predicts = self.fc(x)
else:
x = self.fc1(x)
predicts = self.fc2(x)
if self.return_feats:
result = (x, predicts)
else:
result = predicts
if not self.training:
predicts = F.softmax(predicts, dim=2)
result = predicts
return result
from torch import nn
from ..necks.rnn import Im2Seq, SequenceEncoder
from .rec_ctc_head import CTCHead
class FCTranspose(nn.Module):
def __init__(self, in_channels, out_channels, only_transpose=False):
super().__init__()
self.only_transpose = only_transpose
if not self.only_transpose:
self.fc = nn.Linear(in_channels, out_channels, bias=False)
def forward(self, x):
if self.only_transpose:
return x.permute([0, 2, 1])
else:
return self.fc(x.permute([0, 2, 1]))
class MultiHead(nn.Module):
def __init__(self, in_channels, out_channels_list, **kwargs):
super().__init__()
self.head_list = kwargs.pop("head_list")
self.gtc_head = "sar"
assert len(self.head_list) >= 2
for idx, head_name in enumerate(self.head_list):
name = list(head_name)[0]
if name == "SARHead":
pass
elif name == "NRTRHead":
pass
elif name == "CTCHead":
# ctc neck
self.encoder_reshape = Im2Seq(in_channels)
neck_args = self.head_list[idx][name]["Neck"]
encoder_type = neck_args.pop("name")
self.ctc_encoder = SequenceEncoder(
in_channels=in_channels, encoder_type=encoder_type, **neck_args
)
# ctc head
head_args = self.head_list[idx][name].get("Head", {})
if head_args is None:
head_args = {}
self.ctc_head = CTCHead(
in_channels=self.ctc_encoder.out_channels,
out_channels=out_channels_list["CTCLabelDecode"],
**head_args,
)
else:
raise NotImplementedError(f"{name} is not supported in MultiHead yet")
def forward(self, x, data=None):
ctc_encoder = self.ctc_encoder(x)
return self.ctc_head(ctc_encoder)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ["build_neck"]
def build_neck(config):
from .db_fpn import DBFPN, LKPAN, RSEFPN
from .rnn import SequenceEncoder
support_dict = ["DBFPN", "SequenceEncoder", "RSEFPN", "LKPAN"]
module_name = config.pop("name")
assert module_name in support_dict, Exception(
"neck only support {}".format(support_dict)
)
module_class = eval(module_name)(**config)
return module_class
from torch import nn
class IntraCLBlock(nn.Module):
def __init__(self, in_channels=96, reduce_factor=4):
super(IntraCLBlock, self).__init__()
self.channels = in_channels
self.rf = reduce_factor
self.conv1x1_reduce_channel = nn.Conv2d(
self.channels, self.channels // self.rf, kernel_size=1, stride=1, padding=0
)
self.conv1x1_return_channel = nn.Conv2d(
self.channels // self.rf, self.channels, kernel_size=1, stride=1, padding=0
)
self.v_layer_7x1 = nn.Conv2d(
self.channels // self.rf,
self.channels // self.rf,
kernel_size=(7, 1),
stride=(1, 1),
padding=(3, 0),
)
self.v_layer_5x1 = nn.Conv2d(
self.channels // self.rf,
self.channels // self.rf,
kernel_size=(5, 1),
stride=(1, 1),
padding=(2, 0),
)
self.v_layer_3x1 = nn.Conv2d(
self.channels // self.rf,
self.channels // self.rf,
kernel_size=(3, 1),
stride=(1, 1),
padding=(1, 0),
)
self.q_layer_1x7 = nn.Conv2d(
self.channels // self.rf,
self.channels // self.rf,
kernel_size=(1, 7),
stride=(1, 1),
padding=(0, 3),
)
self.q_layer_1x5 = nn.Conv2d(
self.channels // self.rf,
self.channels // self.rf,
kernel_size=(1, 5),
stride=(1, 1),
padding=(0, 2),
)
self.q_layer_1x3 = nn.Conv2d(
self.channels // self.rf,
self.channels // self.rf,
kernel_size=(1, 3),
stride=(1, 1),
padding=(0, 1),
)
# base
self.c_layer_7x7 = nn.Conv2d(
self.channels // self.rf,
self.channels // self.rf,
kernel_size=(7, 7),
stride=(1, 1),
padding=(3, 3),
)
self.c_layer_5x5 = nn.Conv2d(
self.channels // self.rf,
self.channels // self.rf,
kernel_size=(5, 5),
stride=(1, 1),
padding=(2, 2),
)
self.c_layer_3x3 = nn.Conv2d(
self.channels // self.rf,
self.channels // self.rf,
kernel_size=(3, 3),
stride=(1, 1),
padding=(1, 1),
)
self.bn = nn.BatchNorm2d(self.channels)
self.relu = nn.ReLU()
def forward(self, x):
x_new = self.conv1x1_reduce_channel(x)
x_7_c = self.c_layer_7x7(x_new)
x_7_v = self.v_layer_7x1(x_new)
x_7_q = self.q_layer_1x7(x_new)
x_7 = x_7_c + x_7_v + x_7_q
x_5_c = self.c_layer_5x5(x_7)
x_5_v = self.v_layer_5x1(x_7)
x_5_q = self.q_layer_1x5(x_7)
x_5 = x_5_c + x_5_v + x_5_q
x_3_c = self.c_layer_3x3(x_5)
x_3_v = self.v_layer_3x1(x_5)
x_3_q = self.q_layer_1x3(x_5)
x_3 = x_3_c + x_3_v + x_3_q
x_relation = self.conv1x1_return_channel(x_3)
x_relation = self.bn(x_relation)
x_relation = self.relu(x_relation)
return x + x_relation
def build_intraclblock_list(num_block):
IntraCLBlock_list = nn.ModuleList()
for i in range(num_block):
IntraCLBlock_list.append(IntraCLBlock())
return IntraCLBlock_list
import torch
from torch import nn
from ..backbones.rec_svtrnet import Block, ConvBNLayer
class Im2Seq(nn.Module):
def __init__(self, in_channels, **kwargs):
super().__init__()
self.out_channels = in_channels
def forward(self, x):
B, C, H, W = x.shape
# assert H == 1
x = x.squeeze(dim=2)
# x = x.transpose([0, 2, 1]) # paddle (NTC)(batch, width, channels)
x = x.permute(0, 2, 1)
return x
class EncoderWithRNN_(nn.Module):
def __init__(self, in_channels, hidden_size):
super(EncoderWithRNN_, self).__init__()
self.out_channels = hidden_size * 2
self.rnn1 = nn.LSTM(
in_channels,
hidden_size,
bidirectional=False,
batch_first=True,
num_layers=2,
)
self.rnn2 = nn.LSTM(
in_channels,
hidden_size,
bidirectional=False,
batch_first=True,
num_layers=2,
)
def forward(self, x):
self.rnn1.flatten_parameters()
self.rnn2.flatten_parameters()
out1, h1 = self.rnn1(x)
out2, h2 = self.rnn2(torch.flip(x, [1]))
return torch.cat([out1, torch.flip(out2, [1])], 2)
class EncoderWithRNN(nn.Module):
def __init__(self, in_channels, hidden_size):
super(EncoderWithRNN, self).__init__()
self.out_channels = hidden_size * 2
self.lstm = nn.LSTM(
in_channels, hidden_size, num_layers=2, batch_first=True, bidirectional=True
) # batch_first:=True
def forward(self, x):
x, _ = self.lstm(x)
return x
class EncoderWithFC(nn.Module):
def __init__(self, in_channels, hidden_size):
super(EncoderWithFC, self).__init__()
self.out_channels = hidden_size
self.fc = nn.Linear(
in_channels,
hidden_size,
bias=True,
)
def forward(self, x):
x = self.fc(x)
return x
class EncoderWithSVTR(nn.Module):
def __init__(
self,
in_channels,
dims=64, # XS
depth=2,
hidden_dims=120,
use_guide=False,
num_heads=8,
qkv_bias=True,
mlp_ratio=2.0,
drop_rate=0.1,
kernel_size=[3, 3],
attn_drop_rate=0.1,
drop_path=0.0,
qk_scale=None,
):
super(EncoderWithSVTR, self).__init__()
self.depth = depth
self.use_guide = use_guide
self.conv1 = ConvBNLayer(
in_channels,
in_channels // 8,
kernel_size=kernel_size,
padding=[kernel_size[0] // 2, kernel_size[1] // 2],
act="swish",
)
self.conv2 = ConvBNLayer(
in_channels // 8, hidden_dims, kernel_size=1, act="swish"
)
self.svtr_block = nn.ModuleList(
[
Block(
dim=hidden_dims,
num_heads=num_heads,
mixer="Global",
HW=None,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop_rate,
act_layer="swish",
attn_drop=attn_drop_rate,
drop_path=drop_path,
norm_layer="nn.LayerNorm",
epsilon=1e-05,
prenorm=False,
)
for i in range(depth)
]
)
self.norm = nn.LayerNorm(hidden_dims, eps=1e-6)
self.conv3 = ConvBNLayer(hidden_dims, in_channels, kernel_size=1, act="swish")
# last conv-nxn, the input is concat of input tensor and conv3 output tensor
self.conv4 = ConvBNLayer(
2 * in_channels, in_channels // 8, padding=1, act="swish"
)
self.conv1x1 = ConvBNLayer(in_channels // 8, dims, kernel_size=1, act="swish")
self.out_channels = dims
self.apply(self._init_weights)
def _init_weights(self, m):
# weight initialization
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal_(m.weight, mode="fan_out")
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
def forward(self, x):
# for use guide
if self.use_guide:
z = x.clone()
z.stop_gradient = True
else:
z = x
# for short cut
h = z
# reduce dim
z = self.conv1(z)
z = self.conv2(z)
# SVTR global block
B, C, H, W = z.shape
z = z.flatten(2).permute(0, 2, 1)
for blk in self.svtr_block:
z = blk(z)
z = self.norm(z)
# last stage
z = z.reshape([-1, H, W, C]).permute(0, 3, 1, 2)
z = self.conv3(z)
z = torch.cat((h, z), dim=1)
z = self.conv1x1(self.conv4(z))
return z
class SequenceEncoder(nn.Module):
def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs):
super(SequenceEncoder, self).__init__()
self.encoder_reshape = Im2Seq(in_channels)
self.out_channels = self.encoder_reshape.out_channels
self.encoder_type = encoder_type
if encoder_type == "reshape":
self.only_reshape = True
else:
support_encoder_dict = {
"reshape": Im2Seq,
"fc": EncoderWithFC,
"rnn": EncoderWithRNN,
"svtr": EncoderWithSVTR,
}
assert encoder_type in support_encoder_dict, "{} must in {}".format(
encoder_type, support_encoder_dict.keys()
)
if encoder_type == "svtr":
self.encoder = support_encoder_dict[encoder_type](
self.encoder_reshape.out_channels, **kwargs
)
else:
self.encoder = support_encoder_dict[encoder_type](
self.encoder_reshape.out_channels, hidden_size
)
self.out_channels = self.encoder.out_channels
self.only_reshape = False
def forward(self, x):
if self.encoder_type != "svtr":
x = self.encoder_reshape(x)
if not self.only_reshape:
x = self.encoder(x)
return x
else:
x = self.encoder(x)
x = self.encoder_reshape(x)
return x
import torch
class ClsPostProcess(object):
""" Convert between text-label and text-index """
def __init__(self, label_list, **kwargs):
super(ClsPostProcess, self).__init__()
self.label_list = label_list
def __call__(self, preds, label=None, *args, **kwargs):
if isinstance(preds, torch.Tensor):
preds = preds.cpu().numpy()
pred_idxs = preds.argmax(axis=1)
decode_out = [(self.label_list[idx], preds[i, idx])
for i, idx in enumerate(pred_idxs)]
if label is None:
return decode_out
label = [(self.label_list[idx], 1.0) for idx in label]
return decode_out, label
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment