Commit aad3093a authored by WenmuZhou's avatar WenmuZhou
Browse files

dygraph first commit

parent 10f7e519
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
import numpy as np
class AttentionLoss(object):
def __init__(self, params):
super(AttentionLoss, self).__init__()
self.char_num = params['char_num']
def __call__(self, predicts, labels):
predict = predicts['predict']
label_out = labels['label_out']
label_out = fluid.layers.cast(x=label_out, dtype='int64')
cost = fluid.layers.cross_entropy(input=predict, label=label_out)
sum_cost = fluid.layers.reduce_sum(cost)
return sum_cost
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import math
import paddle import paddle
import paddle.fluid as fluid from paddle import nn
class CTCLoss(object): class CTCLoss(nn.Layer):
def __init__(self, params): def __init__(self, **kwargs):
super(CTCLoss, self).__init__() super(CTCLoss, self).__init__()
self.char_num = params['char_num'] self.loss_func = nn.CTCLoss(blank=0, reduction='none')
def __call__(self, predicts, labels): def __call__(self, predicts, batch):
predict = predicts['predict'] predicts = predicts.transpose((1, 0, 2))
label = labels['label'] N, B, _ = predicts.shape
cost = fluid.layers.warpctc( preds_lengths = paddle.to_tensor([N] * B, dtype='int64')
input=predict, label=label, blank=self.char_num, norm_by_times=True) labels = batch[1].astype("int32")
sum_cost = fluid.layers.reduce_sum(cost) label_lengths = batch[2].astype('int64')
return sum_cost loss = self.loss_func(predicts, labels, preds_lengths, label_lengths)
loss = loss.mean()
return {'loss': loss}
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.fluid as fluid
class SRNLoss(object):
def __init__(self, params):
super(SRNLoss, self).__init__()
self.char_num = params['char_num']
def __call__(self, predicts, others):
predict = predicts['predict']
word_predict = predicts['word_out']
gsrm_predict = predicts['gsrm_out']
label = others['label']
lbl_weight = others['lbl_weight']
casted_label = fluid.layers.cast(x=label, dtype='int64')
cost_word = fluid.layers.cross_entropy(
input=word_predict, label=casted_label)
cost_gsrm = fluid.layers.cross_entropy(
input=gsrm_predict, label=casted_label)
cost_vsfd = fluid.layers.cross_entropy(
input=predict, label=casted_label)
cost_word = fluid.layers.reshape(
x=fluid.layers.reduce_sum(cost_word), shape=[1])
cost_gsrm = fluid.layers.reshape(
x=fluid.layers.reduce_sum(cost_gsrm), shape=[1])
cost_vsfd = fluid.layers.reshape(
x=fluid.layers.reduce_sum(cost_vsfd), shape=[1])
sum_cost = fluid.layers.sum(
[cost_word, cost_vsfd * 2.0, cost_gsrm * 0.15])
return [sum_cost, cost_vsfd, cost_word]
...@@ -11,3 +11,17 @@ ...@@ -11,3 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = ['build_neck']
def build_neck(config):
from .fpn import FPN
from .rnn import SequenceEncoder
support_dict = ['FPN', 'SequenceEncoder']
module_name = config.pop('name')
assert module_name in support_dict, Exception('neck only support {}'.format(
support_dict))
module_class = eval(module_name)(**config)
return module_class
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
class FPN(nn.Layer):
def __init__(self, in_channels, out_channels, **kwargs):
super(FPN, self).__init__()
self.out_channels = out_channels
weight_attr = paddle.nn.initializer.MSRA(uniform=False)
self.in2_conv = nn.Conv2d(
in_channels=in_channels[0],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(
name='conv2d_51.w_0', initializer=weight_attr),
bias_attr=False)
self.in3_conv = nn.Conv2d(
in_channels=in_channels[1],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(
name='conv2d_50.w_0', initializer=weight_attr),
bias_attr=False)
self.in4_conv = nn.Conv2d(
in_channels=in_channels[2],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(
name='conv2d_49.w_0', initializer=weight_attr),
bias_attr=False)
self.in5_conv = nn.Conv2d(
in_channels=in_channels[3],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(
name='conv2d_48.w_0', initializer=weight_attr),
bias_attr=False)
self.p5_conv = nn.Conv2d(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
name='conv2d_52.w_0', initializer=weight_attr),
bias_attr=False)
self.p4_conv = nn.Conv2d(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
name='conv2d_53.w_0', initializer=weight_attr),
bias_attr=False)
self.p3_conv = nn.Conv2d(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
name='conv2d_54.w_0', initializer=weight_attr),
bias_attr=False)
self.p2_conv = nn.Conv2d(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
name='conv2d_55.w_0', initializer=weight_attr),
bias_attr=False)
def forward(self, x):
c2, c3, c4, c5 = x
in5 = self.in5_conv(c5)
in4 = self.in4_conv(c4)
in3 = self.in3_conv(c3)
in2 = self.in2_conv(c2)
out4 = in4 + F.resize_nearest(in5, scale=2) # 1/16
out3 = in3 + F.resize_nearest(out4, scale=2) # 1/8
out2 = in2 + F.resize_nearest(out3, scale=2) # 1/4
p5 = self.p5_conv(in5)
p4 = self.p4_conv(out4)
p3 = self.p3_conv(out3)
p2 = self.p2_conv(out2)
p5 = F.resize_nearest(p5, scale=8)
p4 = F.resize_nearest(p4, scale=4)
p3 = F.resize_nearest(p3, scale=2)
fuse = paddle.concat([p5, p4, p3, p2], axis=1)
return fuse
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import nn
from ppocr.modeling.heads.rec_ctc_head import get_para_bias_attr
class EncoderWithReshape(nn.Layer):
def __init__(self, in_channels, **kwargs):
super().__init__()
self.out_channels = in_channels
def forward(self, x):
B, C, H, W = x.shape
x = x.reshape((B, C, -1))
x = x.transpose([0, 2, 1]) # (NTC)(batch, width, channels)
return x
class Im2Seq(nn.Layer):
def __init__(self, in_channels, **kwargs):
super().__init__()
self.out_channels = in_channels
def forward(self, x):
B, C, H, W = x.shape
assert H == 1
x = x.transpose((0, 2, 3, 1))
x = x.reshape((-1, C))
return x
class EncoderWithRNN(nn.Layer):
def __init__(self, in_channels, hidden_size):
super(EncoderWithRNN, self).__init__()
self.out_channels = hidden_size * 2
# self.lstm1_fw = nn.LSTMCell(
# in_channels,
# hidden_size,
# weight_ih_attr=ParamAttr(name='lstm_st1_fc1_w'),
# bias_ih_attr=ParamAttr(name='lstm_st1_fc1_b'),
# weight_hh_attr=ParamAttr(name='lstm_st1_out1_w'),
# bias_hh_attr=ParamAttr(name='lstm_st1_out1_b'),
# )
# self.lstm1_bw = nn.LSTMCell(
# in_channels,
# hidden_size,
# weight_ih_attr=ParamAttr(name='lstm_st1_fc2_w'),
# bias_ih_attr=ParamAttr(name='lstm_st1_fc2_b'),
# weight_hh_attr=ParamAttr(name='lstm_st1_out2_w'),
# bias_hh_attr=ParamAttr(name='lstm_st1_out2_b'),
# )
# self.lstm2_fw = nn.LSTMCell(
# hidden_size,
# hidden_size,
# weight_ih_attr=ParamAttr(name='lstm_st2_fc1_w'),
# bias_ih_attr=ParamAttr(name='lstm_st2_fc1_b'),
# weight_hh_attr=ParamAttr(name='lstm_st2_out1_w'),
# bias_hh_attr=ParamAttr(name='lstm_st2_out1_b'),
# )
# self.lstm2_bw = nn.LSTMCell(
# hidden_size,
# hidden_size,
# weight_ih_attr=ParamAttr(name='lstm_st2_fc2_w'),
# bias_ih_attr=ParamAttr(name='lstm_st2_fc2_b'),
# weight_hh_attr=ParamAttr(name='lstm_st2_out2_w'),
# bias_hh_attr=ParamAttr(name='lstm_st2_out2_b'),
# )
self.lstm = nn.LSTM(
in_channels, hidden_size, direction='bidirectional', num_layers=2)
def forward(self, x):
# fw_x, _ = self.lstm1_fw(x)
# fw_x, _ = self.lstm2_fw(fw_x)
#
# # bw
# bw_x, _ = self.lstm1_bw(x)
# bw_x, _ = self.lstm2_bw(bw_x)
# x = paddle.concat([fw_x, bw_x], axis=2)
x, _ = self.lstm(x)
return x
class EncoderWithFC(nn.Layer):
def __init__(self, in_channels, hidden_size):
super(EncoderWithFC, self).__init__()
self.out_channels = hidden_size
weight_attr, bias_attr = get_para_bias_attr(
l2_decay=0.00001, k=in_channels, name='reduce_encoder_fea')
self.fc = nn.Linear(
in_channels,
hidden_size,
weight_attr=weight_attr,
bias_attr=bias_attr,
name='reduce_encoder_fea')
def forward(self, x):
x = self.fc(x)
return x
class SequenceEncoder(nn.Layer):
def __init__(self, in_channels, encoder_type, hidden_size, **kwargs):
super(SequenceEncoder, self).__init__()
self.encoder_reshape = EncoderWithReshape(in_channels)
self.out_channels = self.encoder_reshape.out_channels
if encoder_type == 'reshape':
self.only_reshape = True
else:
support_encoder_dict = {
'reshape': EncoderWithReshape,
'fc': EncoderWithFC,
'rnn': EncoderWithRNN
}
assert encoder_type in support_encoder_dict, '{} must in {}'.format(
encoder_type, support_encoder_dict.keys())
self.encoder = support_encoder_dict[encoder_type](
self.encoder_reshape.out_channels, hidden_size)
self.out_channels = self.encoder.out_channels
self.only_reshape = False
def forward(self, x):
x = self.encoder_reshape(x)
if not self.only_reshape:
x = self.encoder(x)
return x
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.param_attr import ParamAttr
import numpy as np
class LocalizationNetwork(object):
def __init__(self, params):
super(LocalizationNetwork, self).__init__()
self.F = params['num_fiducial']
self.loc_lr = params['loc_lr']
self.model_name = params['model_name']
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None):
conv = layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
bn_name = "bn_" + name
return layers.batch_norm(
input=conv,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def get_initial_fiducials(self):
""" see RARE paper Fig. 6 (a) """
F = self.F
ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2))
ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2))
ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
return initial_bias
def __call__(self, image):
F = self.F
loc_lr = self.loc_lr
if self.model_name == "large":
num_filters_list = [64, 128, 256, 512]
fc_dim = 256
else:
num_filters_list = [16, 32, 64, 128]
fc_dim = 64
for fno in range(len(num_filters_list)):
num_filters = num_filters_list[fno]
name = "loc_conv%d" % fno
if fno == 0:
conv = self.conv_bn_layer(
image, num_filters, 3, act='relu', name=name)
else:
conv = self.conv_bn_layer(
pool, num_filters, 3, act='relu', name=name)
if fno == len(num_filters_list) - 1:
pool = layers.adaptive_pool2d(
input=conv, pool_size=[1, 1], pool_type='avg')
else:
pool = layers.pool2d(
input=conv,
pool_size=2,
pool_stride=2,
pool_padding=0,
pool_type='max')
name = "loc_fc1"
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
fc1 = layers.fc(input=pool,
size=fc_dim,
param_attr=fluid.param_attr.ParamAttr(
learning_rate=loc_lr,
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + "_w"),
act='relu',
name=name)
initial_bias = self.get_initial_fiducials()
initial_bias = initial_bias.reshape(-1)
name = "loc_fc2"
param_attr = fluid.param_attr.ParamAttr(
learning_rate=loc_lr,
initializer=fluid.initializer.NumpyArrayInitializer(
np.zeros([fc_dim, F * 2])),
name=name + "_w")
bias_attr = fluid.param_attr.ParamAttr(
learning_rate=loc_lr,
initializer=fluid.initializer.NumpyArrayInitializer(initial_bias),
name=name + "_b")
fc2 = layers.fc(input=fc1,
size=F * 2,
param_attr=param_attr,
bias_attr=bias_attr,
name=name)
batch_C_prime = layers.reshape(x=fc2, shape=[-1, F, 2], inplace=False)
return batch_C_prime
class GridGenerator(object):
def __init__(self, params):
super(GridGenerator, self).__init__()
self.eps = 1e-6
self.F = params['num_fiducial']
def build_C(self):
""" Return coordinates of fiducial points in I_r; C """
F = self.F
ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
ctrl_pts_y_top = -1 * np.ones(int(F / 2))
ctrl_pts_y_bottom = np.ones(int(F / 2))
ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
return C # F x 2
def build_P(self, I_r_size):
I_r_width, I_r_height = I_r_size
I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0)\
/ I_r_width # self.I_r_width
I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0)\
/ I_r_height # self.I_r_height
# P: self.I_r_width x self.I_r_height x 2
P = np.stack(np.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
# n (= self.I_r_width x self.I_r_height) x 2
return P.reshape([-1, 2])
def build_inv_delta_C(self, C):
""" Return inv_delta_C which is needed to calculate T """
F = self.F
hat_C = np.zeros((F, F), dtype=float) # F x F
for i in range(0, F):
for j in range(i, F):
r = np.linalg.norm(C[i] - C[j])
hat_C[i, j] = r
hat_C[j, i] = r
np.fill_diagonal(hat_C, 1)
hat_C = (hat_C**2) * np.log(hat_C)
# print(C.shape, hat_C.shape)
delta_C = np.concatenate( # F+3 x F+3
[
np.concatenate(
[np.ones((F, 1)), C, hat_C], axis=1), # F x F+3
np.concatenate(
[np.zeros((2, 3)), np.transpose(C)], axis=1), # 2 x F+3
np.concatenate(
[np.zeros((1, 3)), np.ones((1, F))], axis=1) # 1 x F+3
],
axis=0)
inv_delta_C = np.linalg.inv(delta_C)
return inv_delta_C # F+3 x F+3
def build_P_hat(self, C, P):
F = self.F
eps = self.eps
n = P.shape[0] # n (= self.I_r_width x self.I_r_height)
#P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1))
C_tile = np.expand_dims(C, axis=0) # 1 x F x 2
P_diff = P_tile - C_tile # n x F x 2
#rbf_norm: n x F
rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False)
#rbf: n x F
rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + eps))
P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1)
return P_hat # n x F+3
def get_expand_tensor(self, batch_C_prime):
name = "ex_fc"
initializer = fluid.initializer.ConstantInitializer(value=0.0)
param_attr = fluid.param_attr.ParamAttr(
learning_rate=0.0, initializer=initializer, name=name + "_w")
bias_attr = fluid.param_attr.ParamAttr(
learning_rate=0.0, initializer=initializer, name=name + "_b")
batch_C_ex_part_tensor = fluid.layers.fc(input=batch_C_prime,
size=6,
param_attr=param_attr,
bias_attr=bias_attr,
name=name)
batch_C_ex_part_tensor = fluid.layers.reshape(
x=batch_C_ex_part_tensor, shape=[-1, 3, 2])
return batch_C_ex_part_tensor
def __call__(self, batch_C_prime, I_r_size):
C = self.build_C()
P = self.build_P(I_r_size)
inv_delta_C = self.build_inv_delta_C(C).astype('float32')
P_hat = self.build_P_hat(C, P).astype('float32')
inv_delta_C_tensor = layers.create_tensor(dtype='float32')
layers.assign(inv_delta_C, inv_delta_C_tensor)
inv_delta_C_tensor.stop_gradient = True
P_hat_tensor = layers.create_tensor(dtype='float32')
layers.assign(P_hat, P_hat_tensor)
P_hat_tensor.stop_gradient = True
batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
# batch_C_ex_part_tensor = create_tmp_var(
# fluid.default_main_program(),
# name='batch_C_ex_part_tensor',
# dtype='float32', shape=[-1, 3, 2])
# layers.py_func(func=get_batch_C_expand,
# x=[batch_C_prime], out=[batch_C_ex_part_tensor])
batch_C_ex_part_tensor.stop_gradient = True
batch_C_prime_with_zeros = layers.concat(
[batch_C_prime, batch_C_ex_part_tensor], axis=1)
batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros)
batch_P_prime = layers.matmul(P_hat_tensor, batch_T)
return batch_P_prime
class TPS(object):
def __init__(self, params):
super(TPS, self).__init__()
self.loc_net = LocalizationNetwork(params)
self.grid_generator = GridGenerator(params)
def __call__(self, image):
batch_C_prime = self.loc_net(image)
I_r_size = [image.shape[3], image.shape[2]]
batch_P_prime = self.grid_generator(batch_C_prime, I_r_size)
batch_P_prime = layers.reshape(
x=batch_P_prime, shape=[-1, image.shape[2], image.shape[3], 2])
batch_I_r = layers.grid_sampler(x=image, grid=batch_P_prime)
image.stop_gradient = False
return batch_I_r
...@@ -11,3 +11,15 @@ ...@@ -11,3 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = ['build_transform']
def build_transform(config):
support_dict = ['']
module_name = config.pop('name')
assert module_name in support_dict, Exception(
'transform only support {}'.format(support_dict))
module_class = eval(module_name)(**config)
return module_class
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
from paddle.fluid.regularizer import L2Decay
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
import paddle.fluid.layers.ops as ops
from ppocr.utils.utility import initial_logger
logger = initial_logger()
def cosine_decay_with_warmup(learning_rate,
step_each_epoch,
epochs=500,
warmup_minibatch=1000):
"""Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
decrease lr for every mini-batch and start with warmup.
"""
global_step = _decay_step_counter()
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
warmup_minibatch = fluid.layers.fill_constant(
shape=[1],
dtype='float32',
value=float(warmup_minibatch),
force_cpu=True)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < warmup_minibatch):
decayed_lr = learning_rate * (1.0 * global_step / warmup_minibatch)
fluid.layers.tensor.assign(input=decayed_lr, output=lr)
with switch.default():
decayed_lr = learning_rate * \
(ops.cos((global_step - warmup_minibatch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
fluid.layers.tensor.assign(input=decayed_lr, output=lr)
return lr
def AdamDecay(params, parameter_list=None):
"""
define optimizer function
args:
params(dict): the super parameters
parameter_list (list): list of Variable names to update to minimize loss
return:
"""
base_lr = params['base_lr']
beta1 = params['beta1']
beta2 = params['beta2']
l2_decay = params.get("l2_decay", 0.0)
if 'decay' in params:
supported_decay_mode = [
"cosine_decay", "cosine_decay_warmup", "piecewise_decay"
]
params = params['decay']
decay_mode = params['function']
assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
supported_decay_mode, decay_mode)
if decay_mode == "cosine_decay":
step_each_epoch = params['step_each_epoch']
total_epoch = params['total_epoch']
base_lr = fluid.layers.cosine_decay(
learning_rate=base_lr,
step_each_epoch=step_each_epoch,
epochs=total_epoch)
elif decay_mode == "cosine_decay_warmup":
step_each_epoch = params['step_each_epoch']
total_epoch = params['total_epoch']
warmup_minibatch = params.get("warmup_minibatch", 1000)
base_lr = cosine_decay_with_warmup(
learning_rate=base_lr,
step_each_epoch=step_each_epoch,
epochs=total_epoch,
warmup_minibatch=warmup_minibatch)
elif decay_mode == "piecewise_decay":
boundaries = params["boundaries"]
decay_rate = params["decay_rate"]
values = [
base_lr * decay_rate**idx
for idx in range(len(boundaries) + 1)
]
base_lr = fluid.layers.piecewise_decay(boundaries, values)
optimizer = fluid.optimizer.Adam(
learning_rate=base_lr,
beta1=beta1,
beta2=beta2,
regularization=L2Decay(regularization_coeff=l2_decay),
parameter_list=parameter_list)
return optimizer
def RMSProp(params, parameter_list=None):
"""
define optimizer function
args:
params(dict): the super parameters
parameter_list (list): list of Variable names to update to minimize loss
return:
"""
base_lr = params.get("base_lr", 0.001)
l2_decay = params.get("l2_decay", 0.00005)
if 'decay' in params:
supported_decay_mode = ["cosine_decay", "piecewise_decay"]
params = params['decay']
decay_mode = params['function']
assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
supported_decay_mode, decay_mode)
if decay_mode == "cosine_decay":
step_each_epoch = params['step_each_epoch']
total_epoch = params['total_epoch']
base_lr = fluid.layers.cosine_decay(
learning_rate=base_lr,
step_each_epoch=step_each_epoch,
epochs=total_epoch)
elif decay_mode == "piecewise_decay":
boundaries = params["boundaries"]
decay_rate = params["decay_rate"]
values = [
base_lr * decay_rate**idx
for idx in range(len(boundaries) + 1)
]
base_lr = fluid.layers.piecewise_decay(boundaries, values)
optimizer = fluid.optimizer.RMSProp(
learning_rate=base_lr,
regularization=fluid.regularizer.L2Decay(regularization_coeff=l2_decay))
return optimizer
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import copy
__all__ = ['build_optimizer']
def build_lr_scheduler(lr_config, epochs, step_each_epoch):
from . import learning_rate
lr_config.update({'epochs': epochs, 'step_each_epoch': step_each_epoch})
if 'name' in lr_config:
lr_name = lr_config.pop('name')
lr = getattr(learning_rate, lr_name)(**lr_config)()
else:
lr = lr_config['lr']
return lr
def build_optimizer(config, epochs, step_each_epoch, parameters):
from . import regularizer, optimizer
config = copy.deepcopy(config)
# step1 build lr
lr = build_lr_scheduler(
config.pop('learning_rate'), epochs, step_each_epoch)
# step2 build regularization
if 'regularizer' in config and config['regularizer'] is not None:
reg_config = config.pop('regularizer')
reg_name = reg_config.pop('name') + 'Decay'
reg = getattr(regularizer, reg_name)(**reg_config)()
else:
reg = None
# step3 build optimizer
optim_name = config.pop('name')
optim = getattr(optimizer, optim_name)(learning_rate=lr,
regularization=reg,
**config)
return optim(parameters), lr
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from paddle.optimizer import lr_scheduler
class Linear(object):
"""
Linear learning rate decay
Args:
lr (float): The initial learning rate. It is a python float number.
epochs(int): The decay step size. It determines the decay cycle.
end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
power(float, optional): Power of polynomial. Default: 1.0.
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
"""
def __init__(self,
lr,
epochs,
step_each_epoch,
end_lr=0.0,
power=1.0,
warmup_epoch=0,
last_epoch=-1,
**kwargs):
super(Linear, self).__init__()
self.lr = lr
self.epochs = epochs * step_each_epoch
self.end_lr = end_lr
self.power = power
self.last_epoch = last_epoch
self.warmup_epoch = warmup_epoch * step_each_epoch
def __call__(self):
learning_rate = lr_scheduler.PolynomialLR(
learning_rate=self.lr,
decay_steps=self.epochs,
end_lr=self.end_lr,
power=self.power,
last_epoch=self.last_epoch)
if self.warmup_epoch > 0:
learning_rate = lr_scheduler.LinearLrWarmup(
learning_rate=learning_rate,
warmup_steps=self.warmup_epoch,
start_lr=0.0,
end_lr=self.lr,
last_epoch=self.last_epoch)
return learning_rate
class Cosine(object):
"""
Cosine learning rate decay
lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
Args:
lr(float): initial learning rate
step_each_epoch(int): steps each epoch
epochs(int): total training epochs
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
"""
def __init__(self,
lr,
step_each_epoch,
epochs,
warmup_epoch=0,
last_epoch=-1,
**kwargs):
super(Cosine, self).__init__()
self.lr = lr
self.T_max = step_each_epoch * epochs
self.last_epoch = last_epoch
self.warmup_epoch = warmup_epoch * step_each_epoch
def __call__(self):
learning_rate = lr_scheduler.CosineAnnealingLR(
learning_rate=self.lr, T_max=self.T_max, last_epoch=self.last_epoch)
if self.warmup_epoch > 0:
learning_rate = lr_scheduler.LinearLrWarmup(
learning_rate=learning_rate,
warmup_steps=self.warmup_epoch,
start_lr=0.0,
end_lr=self.lr,
last_epoch=self.last_epoch)
return learning_rate
class Step(object):
"""
Piecewise learning rate decay
Args:
step_each_epoch(int): steps each epoch
learning_rate (float): The initial learning rate. It is a python float number.
step_size (int): the interval to update.
gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
It should be less than 1.0. Default: 0.1.
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
"""
def __init__(self,
lr,
step_size,
step_each_epoch,
gamma,
warmup_epoch=0,
last_epoch=-1,
**kwargs):
super(Step, self).__init__()
self.step_size = step_each_epoch * step_size
self.lr = lr
self.gamma = gamma
self.last_epoch = last_epoch
self.warmup_epoch = warmup_epoch * step_each_epoch
def __call__(self):
learning_rate = lr_scheduler.StepLR(
learning_rate=self.lr,
step_size=self.step_size,
gamma=self.gamma,
last_epoch=self.last_epoch)
if self.warmup_epoch > 0:
learning_rate = lr_scheduler.LinearLrWarmup(
learning_rate=learning_rate,
warmup_steps=self.warmup_epoch,
start_lr=0.0,
end_lr=self.lr,
last_epoch=self.last_epoch)
return learning_rate
class Piecewise(object):
"""
Piecewise learning rate decay
Args:
boundaries(list): A list of steps numbers. The type of element in the list is python int.
values(list): A list of learning rate values that will be picked during different epoch boundaries.
The type of element in the list is python float.
last_epoch (int, optional): The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
"""
def __init__(self,
step_each_epoch,
decay_epochs,
values,
warmup_epoch=0,
last_epoch=-1,
**kwargs):
super(Piecewise, self).__init__()
self.boundaries = [step_each_epoch * e for e in decay_epochs]
self.values = values
self.last_epoch = last_epoch
self.warmup_epoch = warmup_epoch * step_each_epoch
def __call__(self):
learning_rate = lr_scheduler.PiecewiseLR(
boundaries=self.boundaries,
values=self.values,
last_epoch=self.last_epoch)
if self.warmup_epoch > 0:
learning_rate = lr_scheduler.LinearLrWarmup(
learning_rate=learning_rate,
warmup_steps=self.warmup_epoch,
start_lr=0.0,
end_lr=self.values[0],
last_epoch=self.last_epoch)
return learning_rate
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from paddle import optimizer as optim
class Momentum(object):
"""
Simple Momentum optimizer with velocity state.
Args:
learning_rate (float|Variable) - The learning rate used to update parameters.
Can be a float value or a Variable with one float value as data element.
momentum (float) - Momentum factor.
regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
"""
def __init__(self, learning_rate, momentum, weight_decay=None, **args):
super(Momentum, self).__init__()
self.learning_rate = learning_rate
self.momentum = momentum
self.weight_decay = weight_decay
def __call__(self, parameters):
opt = optim.Momentum(
learning_rate=self.learning_rate,
momentum=self.momentum,
parameters=self.weight_decay,
weight_decay=parameters)
return opt
class Adam(object):
def __init__(self,
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-08,
parameter_list=None,
weight_decay=None,
grad_clip=None,
name=None,
lazy_mode=False,
**kwargs):
self.learning_rate = learning_rate
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
self.parameter_list = parameter_list
self.learning_rate = learning_rate
self.weight_decay = weight_decay
self.grad_clip = grad_clip
self.name = name
self.lazy_mode = lazy_mode
def __call__(self, parameters):
opt = optim.Adam(
learning_rate=self.learning_rate,
beta1=self.beta1,
beta2=self.beta2,
epsilon=self.epsilon,
weight_decay=self.weight_decay,
grad_clip=self.grad_clip,
name=self.name,
lazy_mode=self.lazy_mode,
parameters=parameters)
return opt
class RMSProp(object):
"""
Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
Args:
learning_rate (float|Variable) - The learning rate used to update parameters.
Can be a float value or a Variable with one float value as data element.
momentum (float) - Momentum factor.
rho (float) - rho value in equation.
epsilon (float) - avoid division by zero, default is 1e-6.
regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
"""
def __init__(self,
learning_rate,
momentum,
rho=0.95,
epsilon=1e-6,
weight_decay=None,
**args):
super(RMSProp, self).__init__()
self.learning_rate = learning_rate
self.momentum = momentum
self.rho = rho
self.epsilon = epsilon
self.weight_decay = weight_decay
def __call__(self, parameters):
opt = optim.RMSProp(
learning_rate=self.learning_rate,
momentum=self.momentum,
rho=self.rho,
epsilon=self.epsilon,
weight_decay=self.weight_decay,
parameters=parameters)
return opt
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from paddle import fluid
class L1Decay(object):
"""
L1 Weight Decay Regularization, which encourages the weights to be sparse.
Args:
factor(float): regularization coeff. Default:0.0.
"""
def __init__(self, factor=0.0):
super(L1Decay, self).__init__()
self.regularization_coeff = factor
def __call__(self):
reg = fluid.regularizer.L1Decay(
regularization_coeff=self.regularization_coeff)
return reg
class L2Decay(object):
"""
L2 Weight Decay Regularization, which encourages the weights to be sparse.
Args:
factor(float): regularization coeff. Default:0.0.
"""
def __init__(self, factor=0.0):
super(L2Decay, self).__init__()
self.regularization_coeff = factor
def __call__(self):
reg = fluid.regularizer.L2Decay(
regularization_coeff=self.regularization_coeff)
return reg
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import copy
__all__ = ['build_post_process']
def build_post_process(config, global_config=None):
from .db_postprocess import DBPostProcess
from .rec_postprocess import CTCLabelDecode, AttnLabelDecode
support_dict = ['DBPostProcess', 'CTCLabelDecode', 'AttnLabelDecode']
config = copy.deepcopy(config)
module_name = config.pop('name')
if global_config is not None:
config.update(global_config)
assert module_name in support_dict, Exception(
'post process only support {}'.format(support_dict))
module_class = eval(module_name)(**config)
return module_class
...@@ -16,11 +16,7 @@ from __future__ import absolute_import ...@@ -16,11 +16,7 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import paddle
import paddle.fluid as fluid
import numpy as np import numpy as np
import string
import cv2 import cv2
from shapely.geometry import Polygon from shapely.geometry import Polygon
import pyclipper import pyclipper
...@@ -31,11 +27,16 @@ class DBPostProcess(object): ...@@ -31,11 +27,16 @@ class DBPostProcess(object):
The post process for Differentiable Binarization (DB). The post process for Differentiable Binarization (DB).
""" """
def __init__(self, params): def __init__(self,
self.thresh = params['thresh'] thresh=0.3,
self.box_thresh = params['box_thresh'] box_thresh=0.7,
self.max_candidates = params['max_candidates'] max_candidates=1000,
self.unclip_ratio = params['unclip_ratio'] unclip_ratio=2.0,
**kwargs):
self.thresh = thresh
self.box_thresh = box_thresh
self.max_candidates = max_candidates
self.unclip_ratio = unclip_ratio
self.min_size = 3 self.min_size = 3
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
...@@ -55,9 +56,9 @@ class DBPostProcess(object): ...@@ -55,9 +56,9 @@ class DBPostProcess(object):
contours, _ = outs[0], outs[1] contours, _ = outs[0], outs[1]
num_contours = min(len(contours), self.max_candidates) num_contours = min(len(contours), self.max_candidates)
boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
scores = np.zeros((num_contours, ), dtype=np.float32)
boxes = []
scores = []
for index in range(num_contours): for index in range(num_contours):
contour = contours[index] contour = contours[index]
points, sside = self.get_mini_boxes(contour) points, sside = self.get_mini_boxes(contour)
...@@ -73,17 +74,14 @@ class DBPostProcess(object): ...@@ -73,17 +74,14 @@ class DBPostProcess(object):
if sside < self.min_size + 2: if sside < self.min_size + 2:
continue continue
box = np.array(box) box = np.array(box)
if not isinstance(dest_width, int):
dest_width = dest_width.item()
dest_height = dest_height.item()
box[:, 0] = np.clip( box[:, 0] = np.clip(
np.round(box[:, 0] / width * dest_width), 0, dest_width) np.round(box[:, 0] / width * dest_width), 0, dest_width)
box[:, 1] = np.clip( box[:, 1] = np.clip(
np.round(box[:, 1] / height * dest_height), 0, dest_height) np.round(box[:, 1] / height * dest_height), 0, dest_height)
boxes[index, :, :] = box.astype(np.int16) boxes.append(box.astype(np.int16))
scores[index] = score scores.append(score)
return boxes, scores return np.array(boxes, dtype=np.int16), scores
def unclip(self, box): def unclip(self, box):
unclip_ratio = self.unclip_ratio unclip_ratio = self.unclip_ratio
...@@ -131,28 +129,15 @@ class DBPostProcess(object): ...@@ -131,28 +129,15 @@ class DBPostProcess(object):
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
def __call__(self, outs_dict, ratio_list): def __call__(self, pred, shape_list):
pred = outs_dict['maps'] pred = pred.numpy()[:, 0, :, :]
pred = pred[:, 0, :, :]
segmentation = pred > self.thresh segmentation = pred > self.thresh
boxes_batch = [] boxes_batch = []
for batch_index in range(pred.shape[0]): for batch_index in range(pred.shape[0]):
height, width = pred.shape[-2:] height, width = shape_list[batch_index]
tmp_boxes, tmp_scores = self.boxes_from_bitmap( boxes, scores = self.boxes_from_bitmap(
pred[batch_index], segmentation[batch_index], width, height) pred[batch_index], segmentation[batch_index], width, height)
boxes = [] boxes_batch.append({'points': boxes})
for k in range(len(tmp_boxes)):
if tmp_scores[k] > self.box_thresh:
boxes.append(tmp_boxes[k])
if len(boxes) > 0:
boxes = np.array(boxes)
ratio_h, ratio_w = ratio_list[batch_index]
boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
boxes_batch.append(boxes)
return boxes_batch return boxes_batch
import cv2
import numpy as np
import pyclipper
from shapely.geometry import Polygon
class DBPostProcess():
def __init__(self,
thresh=0.3,
box_thresh=0.7,
max_candidates=1000,
unclip_ratio=1.5):
self.min_size = 3
self.thresh = thresh
self.box_thresh = box_thresh
self.max_candidates = max_candidates
self.unclip_ratio = unclip_ratio
def __call__(self, pred, shape_list, is_output_polygon=False):
'''
batch: (image, polygons, ignore_tags
h_w_list: 包含[h,w]的数组
pred:
binary: text region segmentation map, with shape (N, 1,H, W)
'''
pred = pred.numpy()[:, 0, :, :]
segmentation = self.binarize(pred)
batch_out = []
for batch_index in range(pred.shape[0]):
height, width = shape_list[batch_index]
boxes, scores = self.post_p(
pred[batch_index],
segmentation[batch_index],
width,
height,
is_output_polygon=is_output_polygon)
batch_out.append({"points": boxes})
return batch_out
def binarize(self, pred):
return pred > self.thresh
def post_p(self,
pred,
bitmap,
dest_width,
dest_height,
is_output_polygon=True):
'''
_bitmap: single map with shape (H, W),
whose values are binarized as {0, 1}
'''
height, width = pred.shape
boxes = []
new_scores = []
contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
for contour in contours[:self.max_candidates]:
epsilon = 0.005 * cv2.arcLength(contour, True)
approx = cv2.approxPolyDP(contour, epsilon, True)
points = approx.reshape((-1, 2))
if points.shape[0] < 4:
continue
score = self.box_score_fast(pred, points.reshape(-1, 2))
if self.box_thresh > score:
continue
if points.shape[0] > 2:
box = self.unclip(points, unclip_ratio=self.unclip_ratio)
if len(box) > 1 or len(box) == 0:
continue
else:
continue
four_point_box, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
if sside < self.min_size + 2:
continue
if not is_output_polygon:
box = np.array(four_point_box)
else:
box = box.reshape(-1, 2)
box[:, 0] = np.clip(
np.round(box[:, 0] / width * dest_width), 0, dest_width)
box[:, 1] = np.clip(
np.round(box[:, 1] / height * dest_height), 0, dest_height)
boxes.append(box)
new_scores.append(score)
return boxes, new_scores
def unclip(self, box, unclip_ratio=1.5):
poly = Polygon(box)
distance = poly.area * unclip_ratio / poly.length
offset = pyclipper.PyclipperOffset()
offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
expanded = np.array(offset.Execute(distance))
return expanded
def get_mini_boxes(self, contour):
bounding_box = cv2.minAreaRect(contour)
points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
index_1, index_2, index_3, index_4 = 0, 1, 2, 3
if points[1][1] > points[0][1]:
index_1 = 0
index_4 = 1
else:
index_1 = 1
index_4 = 0
if points[3][1] > points[2][1]:
index_2 = 2
index_3 = 3
else:
index_2 = 3
index_3 = 2
box = [
points[index_1], points[index_2], points[index_3], points[index_4]
]
return box, min(bounding_box[1])
def box_score_fast(self, bitmap, _box):
h, w = bitmap.shape[:2]
box = _box.copy()
xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
box[:, 0] = box[:, 0] - xmin
box[:, 1] = box[:, 1] - ymin
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from .locality_aware_nms import nms_locality
import cv2
import os
import sys
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
class EASTPostPocess(object):
"""
The post process for EAST.
"""
def __init__(self, params):
self.score_thresh = params['score_thresh']
self.cover_thresh = params['cover_thresh']
self.nms_thresh = params['nms_thresh']
# c++ la-nms is faster, but only support python 3.5
self.is_python35 = False
if sys.version_info.major == 3 and sys.version_info.minor == 5:
self.is_python35 = True
def restore_rectangle_quad(self, origin, geometry):
"""
Restore rectangle from quadrangle.
"""
# quad
origin_concat = np.concatenate(
(origin, origin, origin, origin), axis=1) # (n, 8)
pred_quads = origin_concat - geometry
pred_quads = pred_quads.reshape((-1, 4, 2)) # (n, 4, 2)
return pred_quads
def detect(self,
score_map,
geo_map,
score_thresh=0.8,
cover_thresh=0.1,
nms_thresh=0.2):
"""
restore text boxes from score map and geo map
"""
score_map = score_map[0]
geo_map = np.swapaxes(geo_map, 1, 0)
geo_map = np.swapaxes(geo_map, 1, 2)
# filter the score map
xy_text = np.argwhere(score_map > score_thresh)
if len(xy_text) == 0:
return []
# sort the text boxes via the y axis
xy_text = xy_text[np.argsort(xy_text[:, 0])]
#restore quad proposals
text_box_restored = self.restore_rectangle_quad(
xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
boxes[:, :8] = text_box_restored.reshape((-1, 8))
boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
if self.is_python35:
import lanms
boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
else:
boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
if boxes.shape[0] == 0:
return []
# Here we filter some low score boxes by the average score map,
# this is different from the orginal paper.
for i, box in enumerate(boxes):
mask = np.zeros_like(score_map, dtype=np.uint8)
cv2.fillPoly(mask, box[:8].reshape(
(-1, 4, 2)).astype(np.int32) // 4, 1)
boxes[i, 8] = cv2.mean(score_map, mask)[0]
boxes = boxes[boxes[:, 8] > cover_thresh]
return boxes
def sort_poly(self, p):
"""
Sort polygons.
"""
min_axis = np.argmin(np.sum(p, axis=1))
p = p[[min_axis, (min_axis + 1) % 4,\
(min_axis + 2) % 4, (min_axis + 3) % 4]]
if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
return p
else:
return p[[0, 3, 2, 1]]
def __call__(self, outs_dict, ratio_list):
score_list = outs_dict['f_score']
geo_list = outs_dict['f_geo']
img_num = len(ratio_list)
dt_boxes_list = []
for ino in range(img_num):
score = score_list[ino]
geo = geo_list[ino]
boxes = self.detect(
score_map=score,
geo_map=geo,
score_thresh=self.score_thresh,
cover_thresh=self.cover_thresh,
nms_thresh=self.nms_thresh)
boxes_norm = []
if len(boxes) > 0:
ratio_h, ratio_w = ratio_list[ino]
boxes = boxes[:, :8].reshape((-1, 4, 2))
boxes[:, :, 0] /= ratio_w
boxes[:, :, 1] /= ratio_h
for i_box, box in enumerate(boxes):
box = self.sort_poly(box.astype(np.int32))
if np.linalg.norm(box[0] - box[1]) < 5 \
or np.linalg.norm(box[3] - box[0]) < 5:
continue
boxes_norm.append(box)
dt_boxes_list.append(np.array(boxes_norm))
return dt_boxes_list
#!/usr/bin/env python
#
# Copyright (C) 2014 Google Inc.
#
# This file is part of YouCompleteMe.
#
# YouCompleteMe is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# YouCompleteMe is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with YouCompleteMe. If not, see <http://www.gnu.org/licenses/>.
import os
import sys
import glob
import ycm_core
# These are the compilation flags that will be used in case there's no
# compilation database set (by default, one is not set).
# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
BASE_DIR = os.path.dirname(os.path.realpath(__file__))
from plumbum.cmd import python_config
flags = [
'-Wall',
'-Wextra',
'-Wnon-virtual-dtor',
'-Winvalid-pch',
'-Wno-unused-local-typedefs',
'-std=c++11',
'-x', 'c++',
'-Iinclude',
] + python_config('--cflags').split()
# Set this to the absolute path to the folder (NOT the file!) containing the
# compile_commands.json file to use that instead of 'flags'. See here for
# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
#
# Most projects will NOT need to set this to anything; you can just change the
# 'flags' list of compilation flags.
compilation_database_folder = ''
if os.path.exists( compilation_database_folder ):
database = ycm_core.CompilationDatabase( compilation_database_folder )
else:
database = None
SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ]
def DirectoryOfThisScript():
return os.path.dirname( os.path.abspath( __file__ ) )
def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
if not working_directory:
return list( flags )
new_flags = []
make_next_absolute = False
path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
for flag in flags:
new_flag = flag
if make_next_absolute:
make_next_absolute = False
if not flag.startswith( '/' ):
new_flag = os.path.join( working_directory, flag )
for path_flag in path_flags:
if flag == path_flag:
make_next_absolute = True
break
if flag.startswith( path_flag ):
path = flag[ len( path_flag ): ]
new_flag = path_flag + os.path.join( working_directory, path )
break
if new_flag:
new_flags.append( new_flag )
return new_flags
def IsHeaderFile( filename ):
extension = os.path.splitext( filename )[ 1 ]
return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
def GetCompilationInfoForFile( filename ):
# The compilation_commands.json file generated by CMake does not have entries
# for header files. So we do our best by asking the db for flags for a
# corresponding source file, if any. If one exists, the flags for that file
# should be good enough.
if IsHeaderFile( filename ):
basename = os.path.splitext( filename )[ 0 ]
for extension in SOURCE_EXTENSIONS:
replacement_file = basename + extension
if os.path.exists( replacement_file ):
compilation_info = database.GetCompilationInfoForFile(
replacement_file )
if compilation_info.compiler_flags_:
return compilation_info
return None
return database.GetCompilationInfoForFile( filename )
# This is the entry point; this function is called by ycmd to produce flags for
# a file.
def FlagsForFile( filename, **kwargs ):
if database:
# Bear in mind that compilation_info.compiler_flags_ does NOT return a
# python list, but a "list-like" StringVec object
compilation_info = GetCompilationInfoForFile( filename )
if not compilation_info:
return None
final_flags = MakeRelativePathsInFlagsAbsolute(
compilation_info.compiler_flags_,
compilation_info.compiler_working_dir_ )
else:
relative_to = DirectoryOfThisScript()
final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
return {
'flags': final_flags,
'do_cache': True
}
CXXFLAGS = -I include -std=c++11 -O3 $(shell python3-config --cflags)
LDFLAGS = $(shell python3-config --ldflags)
DEPS = lanms.h $(shell find include -xtype f)
CXX_SOURCES = adaptor.cpp include/clipper/clipper.cpp
LIB_SO = adaptor.so
$(LIB_SO): $(CXX_SOURCES) $(DEPS)
$(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $(CXX_SOURCES) --shared -fPIC
clean:
rm -rf $(LIB_SO)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment