Commit aad3093a authored by WenmuZhou's avatar WenmuZhou
Browse files

dygraph first commit

parent 10f7e519
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
import numpy as np
class AttentionLoss(object):
def __init__(self, params):
super(AttentionLoss, self).__init__()
self.char_num = params['char_num']
def __call__(self, predicts, labels):
predict = predicts['predict']
label_out = labels['label_out']
label_out = fluid.layers.cast(x=label_out, dtype='int64')
cost = fluid.layers.cross_entropy(input=predict, label=label_out)
sum_cost = fluid.layers.reduce_sum(cost)
return sum_cost
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.fluid as fluid
from paddle import nn
class CTCLoss(object):
def __init__(self, params):
class CTCLoss(nn.Layer):
def __init__(self, **kwargs):
super(CTCLoss, self).__init__()
self.char_num = params['char_num']
self.loss_func = nn.CTCLoss(blank=0, reduction='none')
def __call__(self, predicts, labels):
predict = predicts['predict']
label = labels['label']
cost = fluid.layers.warpctc(
input=predict, label=label, blank=self.char_num, norm_by_times=True)
sum_cost = fluid.layers.reduce_sum(cost)
return sum_cost
def __call__(self, predicts, batch):
predicts = predicts.transpose((1, 0, 2))
N, B, _ = predicts.shape
preds_lengths = paddle.to_tensor([N] * B, dtype='int64')
labels = batch[1].astype("int32")
label_lengths = batch[2].astype('int64')
loss = self.loss_func(predicts, labels, preds_lengths, label_lengths)
loss = loss.mean()
return {'loss': loss}
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.fluid as fluid
class SRNLoss(object):
def __init__(self, params):
super(SRNLoss, self).__init__()
self.char_num = params['char_num']
def __call__(self, predicts, others):
predict = predicts['predict']
word_predict = predicts['word_out']
gsrm_predict = predicts['gsrm_out']
label = others['label']
lbl_weight = others['lbl_weight']
casted_label = fluid.layers.cast(x=label, dtype='int64')
cost_word = fluid.layers.cross_entropy(
input=word_predict, label=casted_label)
cost_gsrm = fluid.layers.cross_entropy(
input=gsrm_predict, label=casted_label)
cost_vsfd = fluid.layers.cross_entropy(
input=predict, label=casted_label)
cost_word = fluid.layers.reshape(
x=fluid.layers.reduce_sum(cost_word), shape=[1])
cost_gsrm = fluid.layers.reshape(
x=fluid.layers.reduce_sum(cost_gsrm), shape=[1])
cost_vsfd = fluid.layers.reshape(
x=fluid.layers.reduce_sum(cost_vsfd), shape=[1])
sum_cost = fluid.layers.sum(
[cost_word, cost_vsfd * 2.0, cost_gsrm * 0.15])
return [sum_cost, cost_vsfd, cost_word]
......@@ -11,3 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ['build_neck']
def build_neck(config):
from .fpn import FPN
from .rnn import SequenceEncoder
support_dict = ['FPN', 'SequenceEncoder']
module_name = config.pop('name')
assert module_name in support_dict, Exception('neck only support {}'.format(
support_dict))
module_class = eval(module_name)(**config)
return module_class
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
class FPN(nn.Layer):
def __init__(self, in_channels, out_channels, **kwargs):
super(FPN, self).__init__()
self.out_channels = out_channels
weight_attr = paddle.nn.initializer.MSRA(uniform=False)
self.in2_conv = nn.Conv2d(
in_channels=in_channels[0],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(
name='conv2d_51.w_0', initializer=weight_attr),
bias_attr=False)
self.in3_conv = nn.Conv2d(
in_channels=in_channels[1],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(
name='conv2d_50.w_0', initializer=weight_attr),
bias_attr=False)
self.in4_conv = nn.Conv2d(
in_channels=in_channels[2],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(
name='conv2d_49.w_0', initializer=weight_attr),
bias_attr=False)
self.in5_conv = nn.Conv2d(
in_channels=in_channels[3],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(
name='conv2d_48.w_0', initializer=weight_attr),
bias_attr=False)
self.p5_conv = nn.Conv2d(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
name='conv2d_52.w_0', initializer=weight_attr),
bias_attr=False)
self.p4_conv = nn.Conv2d(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
name='conv2d_53.w_0', initializer=weight_attr),
bias_attr=False)
self.p3_conv = nn.Conv2d(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
name='conv2d_54.w_0', initializer=weight_attr),
bias_attr=False)
self.p2_conv = nn.Conv2d(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
name='conv2d_55.w_0', initializer=weight_attr),
bias_attr=False)
def forward(self, x):
c2, c3, c4, c5 = x
in5 = self.in5_conv(c5)
in4 = self.in4_conv(c4)
in3 = self.in3_conv(c3)
in2 = self.in2_conv(c2)
out4 = in4 + F.resize_nearest(in5, scale=2) # 1/16
out3 = in3 + F.resize_nearest(out4, scale=2) # 1/8
out2 = in2 + F.resize_nearest(out3, scale=2) # 1/4
p5 = self.p5_conv(in5)
p4 = self.p4_conv(out4)
p3 = self.p3_conv(out3)
p2 = self.p2_conv(out2)
p5 = F.resize_nearest(p5, scale=8)
p4 = F.resize_nearest(p4, scale=4)
p3 = F.resize_nearest(p3, scale=2)
fuse = paddle.concat([p5, p4, p3, p2], axis=1)
return fuse
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import nn
from ppocr.modeling.heads.rec_ctc_head import get_para_bias_attr
class EncoderWithReshape(nn.Layer):
def __init__(self, in_channels, **kwargs):
super().__init__()
self.out_channels = in_channels
def forward(self, x):
B, C, H, W = x.shape
x = x.reshape((B, C, -1))
x = x.transpose([0, 2, 1]) # (NTC)(batch, width, channels)
return x
class Im2Seq(nn.Layer):
def __init__(self, in_channels, **kwargs):
super().__init__()
self.out_channels = in_channels
def forward(self, x):
B, C, H, W = x.shape
assert H == 1
x = x.transpose((0, 2, 3, 1))
x = x.reshape((-1, C))
return x
class EncoderWithRNN(nn.Layer):
def __init__(self, in_channels, hidden_size):
super(EncoderWithRNN, self).__init__()
self.out_channels = hidden_size * 2
# self.lstm1_fw = nn.LSTMCell(
# in_channels,
# hidden_size,
# weight_ih_attr=ParamAttr(name='lstm_st1_fc1_w'),
# bias_ih_attr=ParamAttr(name='lstm_st1_fc1_b'),
# weight_hh_attr=ParamAttr(name='lstm_st1_out1_w'),
# bias_hh_attr=ParamAttr(name='lstm_st1_out1_b'),
# )
# self.lstm1_bw = nn.LSTMCell(
# in_channels,
# hidden_size,
# weight_ih_attr=ParamAttr(name='lstm_st1_fc2_w'),
# bias_ih_attr=ParamAttr(name='lstm_st1_fc2_b'),
# weight_hh_attr=ParamAttr(name='lstm_st1_out2_w'),
# bias_hh_attr=ParamAttr(name='lstm_st1_out2_b'),
# )
# self.lstm2_fw = nn.LSTMCell(
# hidden_size,
# hidden_size,
# weight_ih_attr=ParamAttr(name='lstm_st2_fc1_w'),
# bias_ih_attr=ParamAttr(name='lstm_st2_fc1_b'),
# weight_hh_attr=ParamAttr(name='lstm_st2_out1_w'),
# bias_hh_attr=ParamAttr(name='lstm_st2_out1_b'),
# )
# self.lstm2_bw = nn.LSTMCell(
# hidden_size,
# hidden_size,
# weight_ih_attr=ParamAttr(name='lstm_st2_fc2_w'),
# bias_ih_attr=ParamAttr(name='lstm_st2_fc2_b'),
# weight_hh_attr=ParamAttr(name='lstm_st2_out2_w'),
# bias_hh_attr=ParamAttr(name='lstm_st2_out2_b'),
# )
self.lstm = nn.LSTM(
in_channels, hidden_size, direction='bidirectional', num_layers=2)
def forward(self, x):
# fw_x, _ = self.lstm1_fw(x)
# fw_x, _ = self.lstm2_fw(fw_x)
#
# # bw
# bw_x, _ = self.lstm1_bw(x)
# bw_x, _ = self.lstm2_bw(bw_x)
# x = paddle.concat([fw_x, bw_x], axis=2)
x, _ = self.lstm(x)
return x
class EncoderWithFC(nn.Layer):
def __init__(self, in_channels, hidden_size):
super(EncoderWithFC, self).__init__()
self.out_channels = hidden_size
weight_attr, bias_attr = get_para_bias_attr(
l2_decay=0.00001, k=in_channels, name='reduce_encoder_fea')
self.fc = nn.Linear(
in_channels,
hidden_size,
weight_attr=weight_attr,
bias_attr=bias_attr,
name='reduce_encoder_fea')
def forward(self, x):
x = self.fc(x)
return x
class SequenceEncoder(nn.Layer):
def __init__(self, in_channels, encoder_type, hidden_size, **kwargs):
super(SequenceEncoder, self).__init__()
self.encoder_reshape = EncoderWithReshape(in_channels)
self.out_channels = self.encoder_reshape.out_channels
if encoder_type == 'reshape':
self.only_reshape = True
else:
support_encoder_dict = {
'reshape': EncoderWithReshape,
'fc': EncoderWithFC,
'rnn': EncoderWithRNN
}
assert encoder_type in support_encoder_dict, '{} must in {}'.format(
encoder_type, support_encoder_dict.keys())
self.encoder = support_encoder_dict[encoder_type](
self.encoder_reshape.out_channels, hidden_size)
self.out_channels = self.encoder.out_channels
self.only_reshape = False
def forward(self, x):
x = self.encoder_reshape(x)
if not self.only_reshape:
x = self.encoder(x)
return x
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.param_attr import ParamAttr
import numpy as np
class LocalizationNetwork(object):
def __init__(self, params):
super(LocalizationNetwork, self).__init__()
self.F = params['num_fiducial']
self.loc_lr = params['loc_lr']
self.model_name = params['model_name']
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None):
conv = layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
bn_name = "bn_" + name
return layers.batch_norm(
input=conv,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def get_initial_fiducials(self):
""" see RARE paper Fig. 6 (a) """
F = self.F
ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2))
ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2))
ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
return initial_bias
def __call__(self, image):
F = self.F
loc_lr = self.loc_lr
if self.model_name == "large":
num_filters_list = [64, 128, 256, 512]
fc_dim = 256
else:
num_filters_list = [16, 32, 64, 128]
fc_dim = 64
for fno in range(len(num_filters_list)):
num_filters = num_filters_list[fno]
name = "loc_conv%d" % fno
if fno == 0:
conv = self.conv_bn_layer(
image, num_filters, 3, act='relu', name=name)
else:
conv = self.conv_bn_layer(
pool, num_filters, 3, act='relu', name=name)
if fno == len(num_filters_list) - 1:
pool = layers.adaptive_pool2d(
input=conv, pool_size=[1, 1], pool_type='avg')
else:
pool = layers.pool2d(
input=conv,
pool_size=2,
pool_stride=2,
pool_padding=0,
pool_type='max')
name = "loc_fc1"
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
fc1 = layers.fc(input=pool,
size=fc_dim,
param_attr=fluid.param_attr.ParamAttr(
learning_rate=loc_lr,
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + "_w"),
act='relu',
name=name)
initial_bias = self.get_initial_fiducials()
initial_bias = initial_bias.reshape(-1)
name = "loc_fc2"
param_attr = fluid.param_attr.ParamAttr(
learning_rate=loc_lr,
initializer=fluid.initializer.NumpyArrayInitializer(
np.zeros([fc_dim, F * 2])),
name=name + "_w")
bias_attr = fluid.param_attr.ParamAttr(
learning_rate=loc_lr,
initializer=fluid.initializer.NumpyArrayInitializer(initial_bias),
name=name + "_b")
fc2 = layers.fc(input=fc1,
size=F * 2,
param_attr=param_attr,
bias_attr=bias_attr,
name=name)
batch_C_prime = layers.reshape(x=fc2, shape=[-1, F, 2], inplace=False)
return batch_C_prime
class GridGenerator(object):
def __init__(self, params):
super(GridGenerator, self).__init__()
self.eps = 1e-6
self.F = params['num_fiducial']
def build_C(self):
""" Return coordinates of fiducial points in I_r; C """
F = self.F
ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
ctrl_pts_y_top = -1 * np.ones(int(F / 2))
ctrl_pts_y_bottom = np.ones(int(F / 2))
ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
return C # F x 2
def build_P(self, I_r_size):
I_r_width, I_r_height = I_r_size
I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0)\
/ I_r_width # self.I_r_width
I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0)\
/ I_r_height # self.I_r_height
# P: self.I_r_width x self.I_r_height x 2
P = np.stack(np.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
# n (= self.I_r_width x self.I_r_height) x 2
return P.reshape([-1, 2])
def build_inv_delta_C(self, C):
""" Return inv_delta_C which is needed to calculate T """
F = self.F
hat_C = np.zeros((F, F), dtype=float) # F x F
for i in range(0, F):
for j in range(i, F):
r = np.linalg.norm(C[i] - C[j])
hat_C[i, j] = r
hat_C[j, i] = r
np.fill_diagonal(hat_C, 1)
hat_C = (hat_C**2) * np.log(hat_C)
# print(C.shape, hat_C.shape)
delta_C = np.concatenate( # F+3 x F+3
[
np.concatenate(
[np.ones((F, 1)), C, hat_C], axis=1), # F x F+3
np.concatenate(
[np.zeros((2, 3)), np.transpose(C)], axis=1), # 2 x F+3
np.concatenate(
[np.zeros((1, 3)), np.ones((1, F))], axis=1) # 1 x F+3
],
axis=0)
inv_delta_C = np.linalg.inv(delta_C)
return inv_delta_C # F+3 x F+3
def build_P_hat(self, C, P):
F = self.F
eps = self.eps
n = P.shape[0] # n (= self.I_r_width x self.I_r_height)
#P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1))
C_tile = np.expand_dims(C, axis=0) # 1 x F x 2
P_diff = P_tile - C_tile # n x F x 2
#rbf_norm: n x F
rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False)
#rbf: n x F
rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + eps))
P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1)
return P_hat # n x F+3
def get_expand_tensor(self, batch_C_prime):
name = "ex_fc"
initializer = fluid.initializer.ConstantInitializer(value=0.0)
param_attr = fluid.param_attr.ParamAttr(
learning_rate=0.0, initializer=initializer, name=name + "_w")
bias_attr = fluid.param_attr.ParamAttr(
learning_rate=0.0, initializer=initializer, name=name + "_b")
batch_C_ex_part_tensor = fluid.layers.fc(input=batch_C_prime,
size=6,
param_attr=param_attr,
bias_attr=bias_attr,
name=name)
batch_C_ex_part_tensor = fluid.layers.reshape(
x=batch_C_ex_part_tensor, shape=[-1, 3, 2])
return batch_C_ex_part_tensor
def __call__(self, batch_C_prime, I_r_size):
C = self.build_C()
P = self.build_P(I_r_size)
inv_delta_C = self.build_inv_delta_C(C).astype('float32')
P_hat = self.build_P_hat(C, P).astype('float32')
inv_delta_C_tensor = layers.create_tensor(dtype='float32')
layers.assign(inv_delta_C, inv_delta_C_tensor)
inv_delta_C_tensor.stop_gradient = True
P_hat_tensor = layers.create_tensor(dtype='float32')
layers.assign(P_hat, P_hat_tensor)
P_hat_tensor.stop_gradient = True
batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
# batch_C_ex_part_tensor = create_tmp_var(
# fluid.default_main_program(),
# name='batch_C_ex_part_tensor',
# dtype='float32', shape=[-1, 3, 2])
# layers.py_func(func=get_batch_C_expand,
# x=[batch_C_prime], out=[batch_C_ex_part_tensor])
batch_C_ex_part_tensor.stop_gradient = True
batch_C_prime_with_zeros = layers.concat(
[batch_C_prime, batch_C_ex_part_tensor], axis=1)
batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros)
batch_P_prime = layers.matmul(P_hat_tensor, batch_T)
return batch_P_prime
class TPS(object):
def __init__(self, params):
super(TPS, self).__init__()
self.loc_net = LocalizationNetwork(params)
self.grid_generator = GridGenerator(params)
def __call__(self, image):
batch_C_prime = self.loc_net(image)
I_r_size = [image.shape[3], image.shape[2]]
batch_P_prime = self.grid_generator(batch_C_prime, I_r_size)
batch_P_prime = layers.reshape(
x=batch_P_prime, shape=[-1, image.shape[2], image.shape[3], 2])
batch_I_r = layers.grid_sampler(x=image, grid=batch_P_prime)
image.stop_gradient = False
return batch_I_r
......@@ -11,3 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ['build_transform']
def build_transform(config):
support_dict = ['']
module_name = config.pop('name')
assert module_name in support_dict, Exception(
'transform only support {}'.format(support_dict))
module_class = eval(module_name)(**config)
return module_class
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
import paddle.fluid.layers.ops as ops
from ppocr.utils.utility import initial_logger
logger = initial_logger()
def cosine_decay_with_warmup(learning_rate,
step_each_epoch,
epochs=500,
warmup_minibatch=1000):
"""Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
decrease lr for every mini-batch and start with warmup.
"""
global_step = _decay_step_counter()
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
warmup_minibatch = fluid.layers.fill_constant(
shape=[1],
dtype='float32',
value=float(warmup_minibatch),
force_cpu=True)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < warmup_minibatch):
decayed_lr = learning_rate * (1.0 * global_step / warmup_minibatch)
fluid.layers.tensor.assign(input=decayed_lr, output=lr)
with switch.default():
decayed_lr = learning_rate * \
(ops.cos((global_step - warmup_minibatch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
fluid.layers.tensor.assign(input=decayed_lr, output=lr)
return lr
def AdamDecay(params, parameter_list=None):
"""
define optimizer function
args:
params(dict): the super parameters
parameter_list (list): list of Variable names to update to minimize loss
return:
"""
base_lr = params['base_lr']
beta1 = params['beta1']
beta2 = params['beta2']
l2_decay = params.get("l2_decay", 0.0)
if 'decay' in params:
supported_decay_mode = [
"cosine_decay", "cosine_decay_warmup", "piecewise_decay"
]
params = params['decay']
decay_mode = params['function']
assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
supported_decay_mode, decay_mode)
if decay_mode == "cosine_decay":
step_each_epoch = params['step_each_epoch']
total_epoch = params['total_epoch']
base_lr = fluid.layers.cosine_decay(
learning_rate=base_lr,
step_each_epoch=step_each_epoch,
epochs=total_epoch)
elif decay_mode == "cosine_decay_warmup":
step_each_epoch = params['step_each_epoch']
total_epoch = params['total_epoch']
warmup_minibatch = params.get("warmup_minibatch", 1000)
base_lr = cosine_decay_with_warmup(
learning_rate=base_lr,
step_each_epoch=step_each_epoch,
epochs=total_epoch,
warmup_minibatch=warmup_minibatch)
elif decay_mode == "piecewise_decay":
boundaries = params["boundaries"]
decay_rate = params["decay_rate"]
values = [
base_lr * decay_rate**idx
for idx in range(len(boundaries) + 1)
]
base_lr = fluid.layers.piecewise_decay(boundaries, values)
optimizer = fluid.optimizer.Adam(
learning_rate=base_lr,
beta1=beta1,
beta2=beta2,
regularization=L2Decay(regularization_coeff=l2_decay),
parameter_list=parameter_list)
return optimizer
def RMSProp(params, parameter_list=None):
"""
define optimizer function
args:
params(dict): the super parameters
parameter_list (list): list of Variable names to update to minimize loss
return:
"""
base_lr = params.get("base_lr", 0.001)
l2_decay = params.get("l2_decay", 0.00005)
if 'decay' in params:
supported_decay_mode = ["cosine_decay", "piecewise_decay"]
params = params['decay']
decay_mode = params['function']
assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
supported_decay_mode, decay_mode)
if decay_mode == "cosine_decay":
step_each_epoch = params['step_each_epoch']
total_epoch = params['total_epoch']
base_lr = fluid.layers.cosine_decay(
learning_rate=base_lr,
step_each_epoch=step_each_epoch,
epochs=total_epoch)
elif decay_mode == "piecewise_decay":
boundaries = params["boundaries"]
decay_rate = params["decay_rate"]
values = [
base_lr * decay_rate**idx
for idx in range(len(boundaries) + 1)
]
base_lr = fluid.layers.piecewise_decay(boundaries, values)
optimizer = fluid.optimizer.RMSProp(
learning_rate=base_lr,
regularization=fluid.regularizer.L2Decay(regularization_coeff=l2_decay))
return optimizer
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import copy
__all__ = ['build_optimizer']
def build_lr_scheduler(lr_config, epochs, step_each_epoch):
from . import learning_rate
lr_config.update({'epochs': epochs, 'step_each_epoch': step_each_epoch})
if 'name' in lr_config:
lr_name = lr_config.pop('name')
lr = getattr(learning_rate, lr_name)(**lr_config)()
else:
lr = lr_config['lr']
return lr
def build_optimizer(config, epochs, step_each_epoch, parameters):
from . import regularizer, optimizer
config = copy.deepcopy(config)
# step1 build lr
lr = build_lr_scheduler(
config.pop('learning_rate'), epochs, step_each_epoch)
# step2 build regularization
if 'regularizer' in config and config['regularizer'] is not None:
reg_config = config.pop('regularizer')
reg_name = reg_config.pop('name') + 'Decay'
reg = getattr(regularizer, reg_name)(**reg_config)()
else:
reg = None
# step3 build optimizer
optim_name = config.pop('name')
optim = getattr(optimizer, optim_name)(learning_rate=lr,
regularization=reg,
**config)
return optim(parameters), lr
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from paddle.optimizer import lr_scheduler
class Linear(object):
"""
Linear learning rate decay
Args:
lr (float): The initial learning rate. It is a python float number.
epochs(int): The decay step size. It determines the decay cycle.
end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
power(float, optional): Power of polynomial. Default: 1.0.
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
"""
def __init__(self,
lr,
epochs,
step_each_epoch,
end_lr=0.0,
power=1.0,
warmup_epoch=0,
last_epoch=-1,
**kwargs):
super(Linear, self).__init__()
self.lr = lr
self.epochs = epochs * step_each_epoch
self.end_lr = end_lr
self.power = power
self.last_epoch = last_epoch
self.warmup_epoch = warmup_epoch * step_each_epoch
def __call__(self):
learning_rate = lr_scheduler.PolynomialLR(
learning_rate=self.lr,
decay_steps=self.epochs,
end_lr=self.end_lr,
power=self.power,
last_epoch=self.last_epoch)
if self.warmup_epoch > 0:
learning_rate = lr_scheduler.LinearLrWarmup(
learning_rate=learning_rate,
warmup_steps=self.warmup_epoch,
start_lr=0.0,
end_lr=self.lr,
last_epoch=self.last_epoch)
return learning_rate
class Cosine(object):
"""
Cosine learning rate decay
lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
Args:
lr(float): initial learning rate
step_each_epoch(int): steps each epoch
epochs(int): total training epochs
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
"""
def __init__(self,
lr,
step_each_epoch,
epochs,
warmup_epoch=0,
last_epoch=-1,
**kwargs):
super(Cosine, self).__init__()
self.lr = lr
self.T_max = step_each_epoch * epochs
self.last_epoch = last_epoch
self.warmup_epoch = warmup_epoch * step_each_epoch
def __call__(self):
learning_rate = lr_scheduler.CosineAnnealingLR(
learning_rate=self.lr, T_max=self.T_max, last_epoch=self.last_epoch)
if self.warmup_epoch > 0:
learning_rate = lr_scheduler.LinearLrWarmup(
learning_rate=learning_rate,
warmup_steps=self.warmup_epoch,
start_lr=0.0,
end_lr=self.lr,
last_epoch=self.last_epoch)
return learning_rate
class Step(object):
"""
Piecewise learning rate decay
Args:
step_each_epoch(int): steps each epoch
learning_rate (float): The initial learning rate. It is a python float number.
step_size (int): the interval to update.
gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
It should be less than 1.0. Default: 0.1.
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
"""
def __init__(self,
lr,
step_size,
step_each_epoch,
gamma,
warmup_epoch=0,
last_epoch=-1,
**kwargs):
super(Step, self).__init__()
self.step_size = step_each_epoch * step_size
self.lr = lr
self.gamma = gamma
self.last_epoch = last_epoch
self.warmup_epoch = warmup_epoch * step_each_epoch
def __call__(self):
learning_rate = lr_scheduler.StepLR(
learning_rate=self.lr,
step_size=self.step_size,
gamma=self.gamma,
last_epoch=self.last_epoch)
if self.warmup_epoch > 0:
learning_rate = lr_scheduler.LinearLrWarmup(
learning_rate=learning_rate,
warmup_steps=self.warmup_epoch,
start_lr=0.0,
end_lr=self.lr,
last_epoch=self.last_epoch)
return learning_rate
class Piecewise(object):
"""
Piecewise learning rate decay
Args:
boundaries(list): A list of steps numbers. The type of element in the list is python int.
values(list): A list of learning rate values that will be picked during different epoch boundaries.
The type of element in the list is python float.
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
"""
def __init__(self,
step_each_epoch,
decay_epochs,
values,
warmup_epoch=0,
last_epoch=-1,
**kwargs):
super(Piecewise, self).__init__()
self.boundaries = [step_each_epoch * e for e in decay_epochs]
self.values = values
self.last_epoch = last_epoch
self.warmup_epoch = warmup_epoch * step_each_epoch
def __call__(self):
learning_rate = lr_scheduler.PiecewiseLR(
boundaries=self.boundaries,
values=self.values,
last_epoch=self.last_epoch)
if self.warmup_epoch > 0:
learning_rate = lr_scheduler.LinearLrWarmup(
learning_rate=learning_rate,
warmup_steps=self.warmup_epoch,
start_lr=0.0,
end_lr=self.values[0],
last_epoch=self.last_epoch)
return learning_rate
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from paddle import optimizer as optim
class Momentum(object):
"""
Simple Momentum optimizer with velocity state.
Args:
learning_rate (float|Variable) - The learning rate used to update parameters.
Can be a float value or a Variable with one float value as data element.
momentum (float) - Momentum factor.
regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
"""
def __init__(self, learning_rate, momentum, weight_decay=None, **args):
super(Momentum, self).__init__()
self.learning_rate = learning_rate
self.momentum = momentum
self.weight_decay = weight_decay
def __call__(self, parameters):
opt = optim.Momentum(
learning_rate=self.learning_rate,
momentum=self.momentum,
parameters=self.weight_decay,
weight_decay=parameters)
return opt
class Adam(object):
def __init__(self,
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-08,
parameter_list=None,
weight_decay=None,
grad_clip=None,
name=None,
lazy_mode=False,
**kwargs):
self.learning_rate = learning_rate
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
self.parameter_list = parameter_list
self.learning_rate = learning_rate
self.weight_decay = weight_decay
self.grad_clip = grad_clip
self.name = name
self.lazy_mode = lazy_mode
def __call__(self, parameters):
opt = optim.Adam(
learning_rate=self.learning_rate,
beta1=self.beta1,
beta2=self.beta2,
epsilon=self.epsilon,
weight_decay=self.weight_decay,
grad_clip=self.grad_clip,
name=self.name,
lazy_mode=self.lazy_mode,
parameters=parameters)
return opt
class RMSProp(object):
"""
Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
Args:
learning_rate (float|Variable) - The learning rate used to update parameters.
Can be a float value or a Variable with one float value as data element.
momentum (float) - Momentum factor.
rho (float) - rho value in equation.
epsilon (float) - avoid division by zero, default is 1e-6.
regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
"""
def __init__(self,
learning_rate,
momentum,
rho=0.95,
epsilon=1e-6,
weight_decay=None,
**args):
super(RMSProp, self).__init__()
self.learning_rate = learning_rate
self.momentum = momentum
self.rho = rho
self.epsilon = epsilon
self.weight_decay = weight_decay
def __call__(self, parameters):
opt = optim.RMSProp(
learning_rate=self.learning_rate,
momentum=self.momentum,
rho=self.rho,
epsilon=self.epsilon,
weight_decay=self.weight_decay,
parameters=parameters)
return opt
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from paddle import fluid
class L1Decay(object):
"""
L1 Weight Decay Regularization, which encourages the weights to be sparse.
Args:
factor(float): regularization coeff. Default:0.0.
"""
def __init__(self, factor=0.0):
super(L1Decay, self).__init__()
self.regularization_coeff = factor
def __call__(self):
reg = fluid.regularizer.L1Decay(
regularization_coeff=self.regularization_coeff)
return reg
class L2Decay(object):
"""
L2 Weight Decay Regularization, which encourages the weights to be sparse.
Args:
factor(float): regularization coeff. Default:0.0.
"""
def __init__(self, factor=0.0):
super(L2Decay, self).__init__()
self.regularization_coeff = factor
def __call__(self):
reg = fluid.regularizer.L2Decay(
regularization_coeff=self.regularization_coeff)
return reg
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import copy
__all__ = ['build_post_process']
def build_post_process(config, global_config=None):
from .db_postprocess import DBPostProcess
from .rec_postprocess import CTCLabelDecode, AttnLabelDecode
support_dict = ['DBPostProcess', 'CTCLabelDecode', 'AttnLabelDecode']
config = copy.deepcopy(config)
module_name = config.pop('name')
if global_config is not None:
config.update(global_config)
assert module_name in support_dict, Exception(
'post process only support {}'.format(support_dict))
module_class = eval(module_name)(**config)
return module_class
......@@ -16,11 +16,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import numpy as np
import string
import cv2
from shapely.geometry import Polygon
import pyclipper
......@@ -31,11 +27,16 @@ class DBPostProcess(object):
The post process for Differentiable Binarization (DB).
"""
def __init__(self, params):
self.thresh = params['thresh']
self.box_thresh = params['box_thresh']
self.max_candidates = params['max_candidates']
self.unclip_ratio = params['unclip_ratio']
def __init__(self,
thresh=0.3,
box_thresh=0.7,
max_candidates=1000,
unclip_ratio=2.0,
**kwargs):
self.thresh = thresh
self.box_thresh = box_thresh
self.max_candidates = max_candidates
self.unclip_ratio = unclip_ratio
self.min_size = 3
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
......@@ -55,9 +56,9 @@ class DBPostProcess(object):
contours, _ = outs[0], outs[1]
num_contours = min(len(contours), self.max_candidates)
boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
scores = np.zeros((num_contours, ), dtype=np.float32)
boxes = []
scores = []
for index in range(num_contours):
contour = contours[index]
points, sside = self.get_mini_boxes(contour)
......@@ -73,17 +74,14 @@ class DBPostProcess(object):
if sside < self.min_size + 2:
continue
box = np.array(box)
if not isinstance(dest_width, int):
dest_width = dest_width.item()
dest_height = dest_height.item()
box[:, 0] = np.clip(
np.round(box[:, 0] / width * dest_width), 0, dest_width)
box[:, 1] = np.clip(
np.round(box[:, 1] / height * dest_height), 0, dest_height)
boxes[index, :, :] = box.astype(np.int16)
scores[index] = score
return boxes, scores
boxes.append(box.astype(np.int16))
scores.append(score)
return np.array(boxes, dtype=np.int16), scores
def unclip(self, box):
unclip_ratio = self.unclip_ratio
......@@ -131,28 +129,15 @@ class DBPostProcess(object):
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
def __call__(self, outs_dict, ratio_list):
pred = outs_dict['maps']
pred = pred[:, 0, :, :]
def __call__(self, pred, shape_list):
pred = pred.numpy()[:, 0, :, :]
segmentation = pred > self.thresh
boxes_batch = []
for batch_index in range(pred.shape[0]):
height, width = pred.shape[-2:]
tmp_boxes, tmp_scores = self.boxes_from_bitmap(
height, width = shape_list[batch_index]
boxes, scores = self.boxes_from_bitmap(
pred[batch_index], segmentation[batch_index], width, height)
boxes = []
for k in range(len(tmp_boxes)):
if tmp_scores[k] > self.box_thresh:
boxes.append(tmp_boxes[k])
if len(boxes) > 0:
boxes = np.array(boxes)
ratio_h, ratio_w = ratio_list[batch_index]
boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
boxes_batch.append(boxes)
boxes_batch.append({'points': boxes})
return boxes_batch
import cv2
import numpy as np
import pyclipper
from shapely.geometry import Polygon
class DBPostProcess():
def __init__(self,
thresh=0.3,
box_thresh=0.7,
max_candidates=1000,
unclip_ratio=1.5):
self.min_size = 3
self.thresh = thresh
self.box_thresh = box_thresh
self.max_candidates = max_candidates
self.unclip_ratio = unclip_ratio
def __call__(self, pred, shape_list, is_output_polygon=False):
'''
batch: (image, polygons, ignore_tags
h_w_list: 包含[h,w]的数组
pred:
binary: text region segmentation map, with shape (N, 1,H, W)
'''
pred = pred.numpy()[:, 0, :, :]
segmentation = self.binarize(pred)
batch_out = []
for batch_index in range(pred.shape[0]):
height, width = shape_list[batch_index]
boxes, scores = self.post_p(
pred[batch_index],
segmentation[batch_index],
width,
height,
is_output_polygon=is_output_polygon)
batch_out.append({"points": boxes})
return batch_out
def binarize(self, pred):
return pred > self.thresh
def post_p(self,
pred,
bitmap,
dest_width,
dest_height,
is_output_polygon=True):
'''
_bitmap: single map with shape (H, W),
whose values are binarized as {0, 1}
'''
height, width = pred.shape
boxes = []
new_scores = []
contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours[:self.max_candidates]:
epsilon = 0.005 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
points = approx.reshape((-1, 2))
if points.shape[0] < 4:
continue
score = self.box_score_fast(pred, points.reshape(-1, 2))
if self.box_thresh > score:
continue
if points.shape[0] > 2:
box = self.unclip(points, unclip_ratio=self.unclip_ratio)
if len(box) > 1 or len(box) == 0:
continue
else:
continue
four_point_box, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
if sside < self.min_size + 2:
continue
if not is_output_polygon:
box = np.array(four_point_box)
else:
box = box.reshape(-1, 2)
box[:, 0] = np.clip(
np.round(box[:, 0] / width * dest_width), 0, dest_width)
box[:, 1] = np.clip(
np.round(box[:, 1] / height * dest_height), 0, dest_height)
boxes.append(box)
new_scores.append(score)
return boxes, new_scores
def unclip(self, box, unclip_ratio=1.5):
poly = Polygon(box)
distance = poly.area * unclip_ratio / poly.length
offset = pyclipper.PyclipperOffset()
offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
expanded = np.array(offset.Execute(distance))
return expanded
def get_mini_boxes(self, contour):
bounding_box = cv2.minAreaRect(contour)
points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
index_1, index_2, index_3, index_4 = 0, 1, 2, 3
if points[1][1] > points[0][1]:
index_1 = 0
index_4 = 1
else:
index_1 = 1
index_4 = 0
if points[3][1] > points[2][1]:
index_2 = 2
index_3 = 3
else:
index_2 = 3
index_3 = 2
box = [
points[index_1], points[index_2], points[index_3], points[index_4]
]
return box, min(bounding_box[1])
def box_score_fast(self, bitmap, _box):
h, w = bitmap.shape[:2]
box = _box.copy()
xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
box[:, 0] = box[:, 0] - xmin
box[:, 1] = box[:, 1] - ymin
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from .locality_aware_nms import nms_locality
import cv2
import os
import sys
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
class EASTPostPocess(object):
"""
The post process for EAST.
"""
def __init__(self, params):
self.score_thresh = params['score_thresh']
self.cover_thresh = params['cover_thresh']
self.nms_thresh = params['nms_thresh']
# c++ la-nms is faster, but only support python 3.5
self.is_python35 = False
if sys.version_info.major == 3 and sys.version_info.minor == 5:
self.is_python35 = True
def restore_rectangle_quad(self, origin, geometry):
"""
Restore rectangle from quadrangle.
"""
# quad
origin_concat = np.concatenate(
(origin, origin, origin, origin), axis=1) # (n, 8)
pred_quads = origin_concat - geometry
pred_quads = pred_quads.reshape((-1, 4, 2)) # (n, 4, 2)
return pred_quads
def detect(self,
score_map,
geo_map,
score_thresh=0.8,
cover_thresh=0.1,
nms_thresh=0.2):
"""
restore text boxes from score map and geo map
"""
score_map = score_map[0]
geo_map = np.swapaxes(geo_map, 1, 0)
geo_map = np.swapaxes(geo_map, 1, 2)
# filter the score map
xy_text = np.argwhere(score_map > score_thresh)
if len(xy_text) == 0:
return []
# sort the text boxes via the y axis
xy_text = xy_text[np.argsort(xy_text[:, 0])]
#restore quad proposals
text_box_restored = self.restore_rectangle_quad(
xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
boxes[:, :8] = text_box_restored.reshape((-1, 8))
boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
if self.is_python35:
import lanms
boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
else:
boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
if boxes.shape[0] == 0:
return []
# Here we filter some low score boxes by the average score map,
# this is different from the orginal paper.
for i, box in enumerate(boxes):
mask = np.zeros_like(score_map, dtype=np.uint8)
cv2.fillPoly(mask, box[:8].reshape(
(-1, 4, 2)).astype(np.int32) // 4, 1)
boxes[i, 8] = cv2.mean(score_map, mask)[0]
boxes = boxes[boxes[:, 8] > cover_thresh]
return boxes
def sort_poly(self, p):
"""
Sort polygons.
"""
min_axis = np.argmin(np.sum(p, axis=1))
p = p[[min_axis, (min_axis + 1) % 4,\
(min_axis + 2) % 4, (min_axis + 3) % 4]]
if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
return p
else:
return p[[0, 3, 2, 1]]
def __call__(self, outs_dict, ratio_list):
score_list = outs_dict['f_score']
geo_list = outs_dict['f_geo']
img_num = len(ratio_list)
dt_boxes_list = []
for ino in range(img_num):
score = score_list[ino]
geo = geo_list[ino]
boxes = self.detect(
score_map=score,
geo_map=geo,
score_thresh=self.score_thresh,
cover_thresh=self.cover_thresh,
nms_thresh=self.nms_thresh)
boxes_norm = []
if len(boxes) > 0:
ratio_h, ratio_w = ratio_list[ino]
boxes = boxes[:, :8].reshape((-1, 4, 2))
boxes[:, :, 0] /= ratio_w
boxes[:, :, 1] /= ratio_h
for i_box, box in enumerate(boxes):
box = self.sort_poly(box.astype(np.int32))
if np.linalg.norm(box[0] - box[1]) < 5 \
or np.linalg.norm(box[3] - box[0]) < 5:
continue
boxes_norm.append(box)
dt_boxes_list.append(np.array(boxes_norm))
return dt_boxes_list
#!/usr/bin/env python
#
# Copyright (C) 2014 Google Inc.
#
# This file is part of YouCompleteMe.
#
# YouCompleteMe is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# YouCompleteMe is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import glob
import ycm_core
# These are the compilation flags that will be used in case there's no
# compilation database set (by default, one is not set).
# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
BASE_DIR = os.path.dirname(os.path.realpath(__file__))
from plumbum.cmd import python_config
flags = [
'-Wall',
'-Wextra',
'-Wnon-virtual-dtor',
'-Winvalid-pch',
'-Wno-unused-local-typedefs',
'-std=c++11',
'-x', 'c++',
'-Iinclude',
] + python_config('--cflags').split()
# Set this to the absolute path to the folder (NOT the file!) containing the
# compile_commands.json file to use that instead of 'flags'. See here for
# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
#
# Most projects will NOT need to set this to anything; you can just change the
# 'flags' list of compilation flags.
compilation_database_folder = ''
if os.path.exists( compilation_database_folder ):
database = ycm_core.CompilationDatabase( compilation_database_folder )
else:
database = None
SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ]
def DirectoryOfThisScript():
return os.path.dirname( os.path.abspath( __file__ ) )
def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
if not working_directory:
return list( flags )
new_flags = []
make_next_absolute = False
path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
for flag in flags:
new_flag = flag
if make_next_absolute:
make_next_absolute = False
if not flag.startswith( '/' ):
new_flag = os.path.join( working_directory, flag )
for path_flag in path_flags:
if flag == path_flag:
make_next_absolute = True
break
if flag.startswith( path_flag ):
path = flag[ len( path_flag ): ]
new_flag = path_flag + os.path.join( working_directory, path )
break
if new_flag:
new_flags.append( new_flag )
return new_flags
def IsHeaderFile( filename ):
extension = os.path.splitext( filename )[ 1 ]
return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
def GetCompilationInfoForFile( filename ):
# The compilation_commands.json file generated by CMake does not have entries
# for header files. So we do our best by asking the db for flags for a
# corresponding source file, if any. If one exists, the flags for that file
# should be good enough.
if IsHeaderFile( filename ):
basename = os.path.splitext( filename )[ 0 ]
for extension in SOURCE_EXTENSIONS:
replacement_file = basename + extension
if os.path.exists( replacement_file ):
compilation_info = database.GetCompilationInfoForFile(
replacement_file )
if compilation_info.compiler_flags_:
return compilation_info
return None
return database.GetCompilationInfoForFile( filename )
# This is the entry point; this function is called by ycmd to produce flags for
# a file.
def FlagsForFile( filename, **kwargs ):
if database:
# Bear in mind that compilation_info.compiler_flags_ does NOT return a
# python list, but a "list-like" StringVec object
compilation_info = GetCompilationInfoForFile( filename )
if not compilation_info:
return None
final_flags = MakeRelativePathsInFlagsAbsolute(
compilation_info.compiler_flags_,
compilation_info.compiler_working_dir_ )
else:
relative_to = DirectoryOfThisScript()
final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
return {
'flags': final_flags,
'do_cache': True
}
CXXFLAGS = -I include -std=c++11 -O3 $(shell python3-config --cflags)
LDFLAGS = $(shell python3-config --ldflags)
DEPS = lanms.h $(shell find include -xtype f)
CXX_SOURCES = adaptor.cpp include/clipper/clipper.cpp
LIB_SO = adaptor.so
$(LIB_SO): $(CXX_SOURCES) $(DEPS)
$(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $(CXX_SOURCES) --shared -fPIC
clean:
rm -rf $(LIB_SO)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment