Unverified Commit 56c6c3ae authored by xiaoting's avatar xiaoting Committed by GitHub
Browse files

Merge pull request #1 from LDOUBLEV/upload

Upload PaddleOCR code 
parents e27cf9a2 338ba3ee
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
class DBHead(object):
"""
Differentiable Binarization (DB) for text detection:
see https://arxiv.org/abs/1911.08947
args:
params(dict): super parameters for build DB network
"""
def __init__(self, params):
self.k = params['k']
self.inner_channels = params['inner_channels']
self.C, self.H, self.W = params['image_shape']
print(self.C, self.H, self.W)
def binarize(self, x):
conv1 = fluid.layers.conv2d(
input=x,
num_filters=self.inner_channels // 4,
filter_size=3,
padding=1,
param_attr=fluid.initializer.MSRAInitializer(uniform=False),
bias_attr=False)
conv_bn1 = fluid.layers.batch_norm(
input=conv1,
param_attr=fluid.initializer.ConstantInitializer(value=1.0),
bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
act="relu")
conv2 = fluid.layers.conv2d_transpose(
input=conv_bn1,
num_filters=self.inner_channels // 4,
filter_size=2,
stride=2,
param_attr=fluid.initializer.MSRAInitializer(uniform=False),
bias_attr=self._get_bias_attr(0.0004, conv_bn1.shape[1], "conv2"),
act=None)
conv_bn2 = fluid.layers.batch_norm(
input=conv2,
param_attr=fluid.initializer.ConstantInitializer(value=1.0),
bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
act="relu")
conv3 = fluid.layers.conv2d_transpose(
input=conv_bn2,
num_filters=1,
filter_size=2,
stride=2,
param_attr=fluid.initializer.MSRAInitializer(uniform=False),
bias_attr=self._get_bias_attr(0.0004, conv_bn2.shape[1], "conv3"),
act=None)
out = fluid.layers.sigmoid(conv3)
return out
def thresh(self, x):
conv1 = fluid.layers.conv2d(
input=x,
num_filters=self.inner_channels // 4,
filter_size=3,
padding=1,
param_attr=fluid.initializer.MSRAInitializer(uniform=False),
bias_attr=False)
conv_bn1 = fluid.layers.batch_norm(
input=conv1,
param_attr=fluid.initializer.ConstantInitializer(value=1.0),
bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
act="relu")
conv2 = fluid.layers.conv2d_transpose(
input=conv_bn1,
num_filters=self.inner_channels // 4,
filter_size=2,
stride=2,
param_attr=fluid.initializer.MSRAInitializer(uniform=False),
bias_attr=self._get_bias_attr(0.0004, conv_bn1.shape[1], "conv2"),
act=None)
conv_bn2 = fluid.layers.batch_norm(
input=conv2,
param_attr=fluid.initializer.ConstantInitializer(value=1.0),
bias_attr=fluid.initializer.ConstantInitializer(value=1e-4),
act="relu")
conv3 = fluid.layers.conv2d_transpose(
input=conv_bn2,
num_filters=1,
filter_size=2,
stride=2,
param_attr=fluid.initializer.MSRAInitializer(uniform=False),
bias_attr=self._get_bias_attr(0.0004, conv_bn2.shape[1], "conv3"),
act=None)
out = fluid.layers.sigmoid(conv3)
return out
def _get_bias_attr(self, l2_decay, k, name, gradient_clip=None):
regularizer = fluid.regularizer.L2Decay(l2_decay)
stdv = 1.0 / math.sqrt(k * 1.0)
initializer = fluid.initializer.Uniform(-stdv, stdv)
bias_attr = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=initializer,
name=name + "_b_attr")
return bias_attr
def step_function(self, x, y):
return fluid.layers.reciprocal(1 + fluid.layers.exp(-self.k * (x - y)))
def __call__(self, conv_features, mode="train"):
c2, c3, c4, c5 = conv_features
param_attr = fluid.initializer.MSRAInitializer(uniform=False)
in5 = fluid.layers.conv2d(
input=c5,
num_filters=self.inner_channels,
filter_size=1,
param_attr=param_attr,
bias_attr=False)
in4 = fluid.layers.conv2d(
input=c4,
num_filters=self.inner_channels,
filter_size=1,
param_attr=param_attr,
bias_attr=False)
in3 = fluid.layers.conv2d(
input=c3,
num_filters=self.inner_channels,
filter_size=1,
param_attr=param_attr,
bias_attr=False)
in2 = fluid.layers.conv2d(
input=c2,
num_filters=self.inner_channels,
filter_size=1,
param_attr=param_attr,
bias_attr=False)
out4 = fluid.layers.elementwise_add(
x=fluid.layers.resize_nearest(
input=in5, scale=2), y=in4) # 1/16
out3 = fluid.layers.elementwise_add(
x=fluid.layers.resize_nearest(
input=out4, scale=2), y=in3) # 1/8
out2 = fluid.layers.elementwise_add(
x=fluid.layers.resize_nearest(
input=out3, scale=2), y=in2) # 1/4
p5 = fluid.layers.conv2d(
input=in5,
num_filters=self.inner_channels // 4,
filter_size=3,
padding=1,
param_attr=param_attr,
bias_attr=False)
p5 = fluid.layers.resize_nearest(input=p5, scale=8)
p4 = fluid.layers.conv2d(
input=out4,
num_filters=self.inner_channels // 4,
filter_size=3,
padding=1,
param_attr=param_attr,
bias_attr=False)
p4 = fluid.layers.resize_nearest(input=p4, scale=4)
p3 = fluid.layers.conv2d(
input=out3,
num_filters=self.inner_channels // 4,
filter_size=3,
padding=1,
param_attr=param_attr,
bias_attr=False)
p3 = fluid.layers.resize_nearest(input=p3, scale=2)
p2 = fluid.layers.conv2d(
input=out2,
num_filters=self.inner_channels // 4,
filter_size=3,
padding=1,
param_attr=param_attr,
bias_attr=False)
fuse = fluid.layers.concat(input=[p5, p4, p3, p2], axis=1)
shrink_maps = self.binarize(fuse)
if mode != "train":
return shrink_maps
threshold_maps = self.thresh(fuse)
binary_maps = self.step_function(shrink_maps, threshold_maps)
y = fluid.layers.concat(
input=[shrink_maps, threshold_maps, binary_maps], axis=1)
predicts = {}
predicts['maps'] = y
return predicts
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from ..common_functions import conv_bn_layer, deconv_bn_layer
class EASTHead(object):
"""
EAST: An Efficient and Accurate Scene Text Detector
see arxiv: https://arxiv.org/abs/1704.03155
args:
params(dict): the super parameters for network build
"""
def __init__(self, params):
self.model_name = params['model_name']
def unet_fusion(self, inputs):
f = inputs[::-1]
if self.model_name == "large":
num_outputs = [128, 128, 128, 128]
else:
num_outputs = [64, 64, 64, 64]
g = [None, None, None, None]
h = [None, None, None, None]
for i in range(4):
if i == 0:
h[i] = f[i]
else:
h[i] = fluid.layers.concat([g[i - 1], f[i]], axis=1)
h[i] = conv_bn_layer(
input=h[i],
num_filters=num_outputs[i],
filter_size=3,
stride=1,
act='relu',
name="unet_h_%d" % (i))
if i <= 2:
#can be replaced with unpool
g[i] = deconv_bn_layer(
input=h[i],
num_filters=num_outputs[i],
name="unet_g_%d" % (i))
else:
g[i] = conv_bn_layer(
input=h[i],
num_filters=num_outputs[i],
filter_size=3,
stride=1,
act='relu',
name="unet_g_%d" % (i))
return g[3]
def detector_header(self, f_common):
if self.model_name == "large":
num_outputs = [128, 64, 1, 8]
else:
num_outputs = [64, 32, 1, 8]
f_det = conv_bn_layer(
input=f_common,
num_filters=num_outputs[0],
filter_size=3,
stride=1,
act='relu',
name="det_head1")
f_det = conv_bn_layer(
input=f_det,
num_filters=num_outputs[1],
filter_size=3,
stride=1,
act='relu',
name="det_head2")
#f_score
f_score = conv_bn_layer(
input=f_det,
num_filters=num_outputs[2],
filter_size=1,
stride=1,
act=None,
name="f_score")
f_score = fluid.layers.sigmoid(f_score)
#f_geo
f_geo = conv_bn_layer(
input=f_det,
num_filters=num_outputs[3],
filter_size=1,
stride=1,
act=None,
name="f_geo")
f_geo = (fluid.layers.sigmoid(f_geo) - 0.5) * 2 * 800
return f_score, f_geo
def __call__(self, inputs):
f_common = self.unet_fusion(inputs)
f_score, f_geo = self.detector_header(f_common)
predicts = {}
predicts['f_score'] = f_score
predicts['f_geo'] = f_geo
return predicts
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from .rec_seq_encoder import SequenceEncoder
import numpy as np
class AttentionPredict(object):
def __init__(self, params):
super(AttentionPredict, self).__init__()
self.char_num = params['char_num']
self.encoder = SequenceEncoder(params)
self.decoder_size = params['Attention']['decoder_size']
self.word_vector_dim = params['Attention']['word_vector_dim']
self.encoder_type = params['encoder_type']
self.max_length = params['max_text_length']
def simple_attention(self, encoder_vec, encoder_proj, decoder_state,
decoder_size):
decoder_state_proj = layers.fc(input=decoder_state,
size=decoder_size,
bias_attr=False,
name="decoder_state_proj_fc")
decoder_state_expand = layers.sequence_expand(
x=decoder_state_proj, y=encoder_proj)
concated = layers.elementwise_add(encoder_proj, decoder_state_expand)
concated = layers.tanh(x=concated)
attention_weights = layers.fc(input=concated,
size=1,
act=None,
bias_attr=False,
name="attention_weights_fc")
attention_weights = layers.sequence_softmax(input=attention_weights)
weigths_reshape = layers.reshape(x=attention_weights, shape=[-1])
scaled = layers.elementwise_mul(
x=encoder_vec, y=weigths_reshape, axis=0)
context = layers.sequence_pool(input=scaled, pool_type='sum')
return context
def gru_decoder_with_attention(self, target_embedding, encoder_vec,
encoder_proj, decoder_boot, decoder_size,
char_num):
rnn = layers.DynamicRNN()
with rnn.block():
current_word = rnn.step_input(target_embedding)
encoder_vec = rnn.static_input(encoder_vec)
encoder_proj = rnn.static_input(encoder_proj)
hidden_mem = rnn.memory(init=decoder_boot, need_reorder=True)
context = self.simple_attention(encoder_vec, encoder_proj,
hidden_mem, decoder_size)
fc_1 = layers.fc(input=context,
size=decoder_size * 3,
bias_attr=False,
name="rnn_fc1")
fc_2 = layers.fc(input=current_word,
size=decoder_size * 3,
bias_attr=False,
name="rnn_fc2")
decoder_inputs = fc_1 + fc_2
h, _, _ = layers.gru_unit(
input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3)
rnn.update_memory(hidden_mem, h)
out = layers.fc(input=h,
size=char_num,
bias_attr=True,
act='softmax',
name="rnn_out_fc")
rnn.output(out)
return rnn()
def gru_attention_infer(self, decoder_boot, max_length, char_num,
word_vector_dim, encoded_vector, encoded_proj,
decoder_size):
init_state = decoder_boot
beam_size = 1
array_len = layers.fill_constant(
shape=[1], dtype='int64', value=max_length)
counter = layers.zeros(shape=[1], dtype='int64', force_cpu=True)
# fill the first element with init_state
state_array = layers.create_array('float32')
layers.array_write(init_state, array=state_array, i=counter)
# ids, scores as memory
ids_array = layers.create_array('int64')
scores_array = layers.create_array('float32')
rois_shape = layers.shape(init_state)
batch_size = layers.slice(
rois_shape, axes=[0], starts=[0], ends=[1]) + 1
lod_level = layers.range(
start=0, end=batch_size, step=1, dtype=batch_size.dtype)
init_ids = layers.fill_constant_batch_size_like(
input=init_state, shape=[-1, 1], value=0, dtype='int64')
init_ids = layers.lod_reset(init_ids, lod_level)
init_ids = layers.lod_append(init_ids, lod_level)
init_scores = layers.fill_constant_batch_size_like(
input=init_state, shape=[-1, 1], value=1, dtype='float32')
init_scores = layers.lod_reset(init_scores, init_ids)
layers.array_write(init_ids, array=ids_array, i=counter)
layers.array_write(init_scores, array=scores_array, i=counter)
full_ids = fluid.layers.fill_constant_batch_size_like(
input=init_state, shape=[-1, 1], dtype='int64', value=1)
cond = layers.less_than(x=counter, y=array_len)
while_op = layers.While(cond=cond)
with while_op.block():
pre_ids = layers.array_read(array=ids_array, i=counter)
pre_state = layers.array_read(array=state_array, i=counter)
pre_score = layers.array_read(array=scores_array, i=counter)
pre_ids_emb = layers.embedding(
input=pre_ids,
size=[char_num, word_vector_dim],
dtype='float32')
context = self.simple_attention(encoded_vector, encoded_proj,
pre_state, decoder_size)
# expand the recursive_sequence_lengths of pre_state
# to be the same with pre_score
pre_state_expanded = layers.sequence_expand(pre_state, pre_score)
context_expanded = layers.sequence_expand(context, pre_score)
fc_1 = layers.fc(input=context_expanded,
size=decoder_size * 3,
bias_attr=False,
name="rnn_fc1")
fc_2 = layers.fc(input=pre_ids_emb,
size=decoder_size * 3,
bias_attr=False,
name="rnn_fc2")
decoder_inputs = fc_1 + fc_2
current_state, _, _ = layers.gru_unit(
input=decoder_inputs,
hidden=pre_state_expanded,
size=decoder_size * 3)
current_state_with_lod = layers.lod_reset(
x=current_state, y=pre_score)
# use score to do beam search
current_score = layers.fc(input=current_state_with_lod,
size=char_num,
bias_attr=True,
act='softmax',
name="rnn_out_fc")
topk_scores, topk_indices = layers.topk(current_score, k=beam_size)
new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1)
fluid.layers.assign(new_ids, full_ids)
layers.increment(x=counter, value=1, in_place=True)
# update the memories
layers.array_write(current_state, array=state_array, i=counter)
layers.array_write(topk_indices, array=ids_array, i=counter)
layers.array_write(topk_scores, array=scores_array, i=counter)
# update the break condition:
# up to the max length or all candidates of
# source sentences have ended.
length_cond = layers.less_than(x=counter, y=array_len)
finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
layers.logical_and(x=length_cond, y=finish_cond, out=cond)
return full_ids
def __call__(self, inputs, labels=None, mode=None):
encoder_features = self.encoder(inputs)
char_num = self.char_num
word_vector_dim = self.word_vector_dim
decoder_size = self.decoder_size
if self.encoder_type == "reshape":
encoder_input = encoder_features
encoded_vector = encoder_features
else:
encoder_input = encoder_features[1]
encoded_vector = layers.concat(encoder_features, axis=1)
encoded_proj = layers.fc(input=encoded_vector,
size=decoder_size,
bias_attr=False,
name="encoded_proj_fc")
backward_first = layers.sequence_pool(
input=encoder_input, pool_type='first')
decoder_boot = layers.fc(input=backward_first,
size=decoder_size,
bias_attr=False,
act="relu",
name='decoder_boot')
if mode == "train":
label_in = labels['label_in']
label_out = labels['label_out']
label_in = layers.cast(x=label_in, dtype='int64')
trg_embedding = layers.embedding(
input=label_in,
size=[char_num, word_vector_dim],
dtype='float32')
predict = self.gru_decoder_with_attention(
trg_embedding, encoded_vector, encoded_proj, decoder_boot,
decoder_size, char_num)
_, decoded_out = layers.topk(input=predict, k=1)
decoded_out = layers.lod_reset(decoded_out, y=label_out)
predicts = {'predict': predict, 'decoded_out': decoded_out}
else:
ids = self.gru_attention_infer(
decoder_boot, self.max_length, char_num, word_vector_dim,
encoded_vector, encoded_proj, decoder_size)
predicts = {'decoded_out': ids}
return predicts
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from .rec_seq_encoder import SequenceEncoder
from ..common_functions import get_para_bias_attr
import numpy as np
class CTCPredict(object):
def __init__(self, params):
super(CTCPredict, self).__init__()
self.char_num = params['char_num']
self.encoder = SequenceEncoder(params)
self.encoder_type = params['encoder_type']
def __call__(self, inputs, labels=None, mode=None):
encoder_features = self.encoder(inputs)
if self.encoder_type != "reshape":
encoder_features = fluid.layers.concat(encoder_features, axis=1)
name = "ctc_fc"
para_attr, bias_attr = get_para_bias_attr(
l2_decay=0.0004, k=encoder_features.shape[1], name=name)
predict = fluid.layers.fc(input=encoder_features,
size=self.char_num + 1,
param_attr=para_attr,
bias_attr=bias_attr,
name=name)
decoded_out = fluid.layers.ctc_greedy_decoder(
input=predict, blank=self.char_num)
predicts = {'predict': predict, 'decoded_out': decoded_out}
return predicts
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
import paddle.fluid.layers as layers
class EncoderWithReshape(object):
def __init__(self, params):
super(EncoderWithReshape, self).__init__()
def __call__(self, inputs):
sliced_feature = layers.im2sequence(
input=inputs,
stride=[1, 1],
filter_size=[inputs.shape[2], 1],
name="sliced_feature")
return sliced_feature
class EncoderWithRNN(object):
def __init__(self, params):
super(EncoderWithRNN, self).__init__()
self.rnn_hidden_size = params['SeqRNN']['hidden_size']
def __call__(self, inputs):
lstm_list = []
name_prefix = "lstm"
rnn_hidden_size = self.rnn_hidden_size
for no in range(1, 3):
if no == 1:
is_reverse = False
else:
is_reverse = True
name = "%s_st1_fc%d" % (name_prefix, no)
fc = layers.fc(input=inputs,
size=rnn_hidden_size * 4,
param_attr=fluid.ParamAttr(name=name + "_w"),
bias_attr=fluid.ParamAttr(name=name + "_b"),
name=name)
name = "%s_st1_out%d" % (name_prefix, no)
lstm, _ = layers.dynamic_lstm(
input=fc,
size=rnn_hidden_size * 4,
is_reverse=is_reverse,
param_attr=fluid.ParamAttr(name=name + "_w"),
bias_attr=fluid.ParamAttr(name=name + "_b"),
use_peepholes=False)
name = "%s_st2_fc%d" % (name_prefix, no)
fc = layers.fc(input=lstm,
size=rnn_hidden_size * 4,
param_attr=fluid.ParamAttr(name=name + "_w"),
bias_attr=fluid.ParamAttr(name=name + "_b"),
name=name)
name = "%s_st2_out%d" % (name_prefix, no)
lstm, _ = layers.dynamic_lstm(
input=fc,
size=rnn_hidden_size * 4,
is_reverse=is_reverse,
param_attr=fluid.ParamAttr(name=name + "_w"),
bias_attr=fluid.ParamAttr(name=name + "_b"),
use_peepholes=False)
lstm_list.append(lstm)
return lstm_list
class SequenceEncoder(object):
def __init__(self, params):
super(SequenceEncoder, self).__init__()
self.encoder_type = params['encoder_type']
self.encoder_reshape = EncoderWithReshape(params)
if self.encoder_type == "rnn":
self.encoder_rnn = EncoderWithRNN(params)
def __call__(self, inputs):
if self.encoder_type == "reshape":
encoder_features = self.encoder_reshape(inputs)
elif self.encoder_type == "rnn":
inputs = self.encoder_reshape(inputs)
encoder_features = self.encoder_rnn(inputs)
else:
assert False, "Unsupport encoder_type:%s"\
% self.encoder_type
return encoder_features
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
def BalanceLoss(pred,
gt,
mask,
balance_loss=True,
main_loss_type="DiceLoss",
negative_ratio=3,
return_origin=False,
eps=1e-6):
"""
The BalanceLoss for Differentiable Binarization text detection
args:
pred (variable): predicted feature maps.
gt (variable): ground truth feature maps.
mask (variable): masked maps.
balance_loss (bool): whether balance loss or not, default is True
main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'.
negative_ratio (int|float): float, default is 3.
return_origin (bool): whether return unbalanced loss or not, default is False.
eps (float): default is 1e-6.
return: (variable) balanced loss
"""
positive = gt * mask
negative = (1 - gt) * mask
positive_count = fluid.layers.reduce_sum(positive)
positive_count_int = fluid.layers.cast(positive_count, dtype=np.int32)
negative_count = min(
fluid.layers.reduce_sum(negative), positive_count * negative_ratio)
negative_count_int = fluid.layers.cast(negative_count, dtype=np.int32)
if main_loss_type == "CrossEntropy":
loss = fluid.layers.cross_entropy(input=pred, label=gt, soft_label=True)
loss = fluid.layers.reduce_mean(loss)
elif main_loss_type == "Euclidean":
loss = fluid.layers.square(pred - gt)
loss = fluid.layers.reduce_mean(loss)
elif main_loss_type == "DiceLoss":
loss = DiceLoss(pred, gt, mask)
elif main_loss_type == "BCELoss":
loss = fluid.layers.sigmoid_cross_entropy_with_logits(pred, label=gt)
elif main_loss_type == "MaskL1Loss":
loss = MaskL1Loss(pred, gt, mask)
else:
loss_type = [
'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss'
]
raise Exception("main_loss_type in BalanceLoss() can only be one of {}".
format(loss_type))
if not balance_loss:
return loss
positive_loss = positive * loss
negative_loss = negative * loss
negative_loss = fluid.layers.reshape(negative_loss, shape=[-1])
negative_loss, _ = fluid.layers.topk(negative_loss, k=negative_count_int)
balance_loss = (fluid.layers.reduce_sum(positive_loss) +
fluid.layers.reduce_sum(negative_loss)) / (
positive_count + negative_count + eps)
if return_origin:
return balance_loss, loss
return balance_loss
def DiceLoss(pred, gt, mask, weights=None, eps=1e-6):
"""
DiceLoss function.
"""
assert pred.shape == gt.shape
assert pred.shape == mask.shape
if weights is not None:
assert weights.shape == mask.shape
mask = weights * mask
intersection = fluid.layers.reduce_sum(pred * gt * mask)
union = fluid.layers.reduce_sum(pred * mask) + fluid.layers.reduce_sum(
gt * mask) + eps
loss = 1 - 2.0 * intersection / union
assert loss <= 1
return loss
def MaskL1Loss(pred, gt, mask, eps=1e-6):
"""
Mask L1 Loss
"""
loss = fluid.layers.reduce_sum((fluid.layers.abs(pred - gt) * mask)) / (
fluid.layers.reduce_sum(mask) + eps)
loss = fluid.layers.reduce_mean(loss)
return loss
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss
class DBLoss(object):
"""
Differentiable Binarization (DB) Loss Function
args:
param (dict): the super paramter for DB Loss
"""
def __init__(self, params):
super(DBLoss, self).__init__()
self.balance_loss = params['balance_loss']
self.main_loss_type = params['main_loss_type']
self.alpha = params['alpha']
self.beta = params['beta']
self.ohem_ratio = params['ohem_ratio']
def __call__(self, predicts, labels):
label_shrink_map = labels['shrink_map']
label_shrink_mask = labels['shrink_mask']
label_threshold_map = labels['threshold_map']
label_threshold_mask = labels['threshold_mask']
pred = predicts['maps']
shrink_maps = pred[:, 0, :, :]
threshold_maps = pred[:, 1, :, :]
binary_maps = pred[:, 2, :, :]
loss_shrink_maps = BalanceLoss(
shrink_maps,
label_shrink_map,
label_shrink_mask,
balance_loss=self.balance_loss,
main_loss_type=self.main_loss_type,
negative_ratio=self.ohem_ratio)
loss_threshold_maps = MaskL1Loss(threshold_maps, label_threshold_map,
label_threshold_mask)
loss_binary_maps = DiceLoss(binary_maps, label_shrink_map,
label_shrink_mask)
loss_shrink_maps = self.alpha * loss_shrink_maps
loss_threshold_maps = self.beta * loss_threshold_maps
loss_all = loss_shrink_maps + loss_threshold_maps\
+ loss_binary_maps
losses = {'total_loss':loss_all,\
"loss_shrink_maps":loss_shrink_maps,\
"loss_threshold_maps":loss_threshold_maps,\
"loss_binary_maps":loss_binary_maps}
return losses
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
class EASTLoss(object):
"""
EAST Loss function
"""
def __init__(self, params=None):
super(EASTLoss, self).__init__()
def __call__(self, predicts, labels):
f_score = predicts['f_score']
f_geo = predicts['f_geo']
l_score = labels['score']
l_geo = labels['geo']
l_mask = labels['mask']
##dice_loss
intersection = fluid.layers.reduce_sum(f_score * l_score * l_mask)
union = fluid.layers.reduce_sum(f_score * l_mask)\
+ fluid.layers.reduce_sum(l_score * l_mask)
dice_loss = 1 - 2 * intersection / (union + 1e-5)
#smoooth_l1_loss
channels = 8
l_geo_split = fluid.layers.split(
l_geo, num_or_sections=channels + 1, dim=1)
f_geo_split = fluid.layers.split(f_geo, num_or_sections=channels, dim=1)
smooth_l1 = 0
for i in range(0, channels):
geo_diff = l_geo_split[i] - f_geo_split[i]
abs_geo_diff = fluid.layers.abs(geo_diff)
smooth_l1_sign = fluid.layers.less_than(abs_geo_diff, l_score)
smooth_l1_sign = fluid.layers.cast(smooth_l1_sign, dtype='float32')
in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
(abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
out_loss = l_geo_split[-1] / channels * in_loss * l_score
smooth_l1 += out_loss
smooth_l1_loss = fluid.layers.reduce_mean(smooth_l1 * l_score)
dice_loss = dice_loss * 0.01
total_loss = dice_loss + smooth_l1_loss
losses = {'total_loss':total_loss, "dice_loss":dice_loss,\
"smooth_l1_loss":smooth_l1_loss}
return losses
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
import numpy as np
class AttentionLoss(object):
def __init__(self, params):
super(AttentionLoss, self).__init__()
self.char_num = params['char_num']
def __call__(self, predicts, labels):
predict = predicts['predict']
label_out = labels['label_out']
label_out = fluid.layers.cast(x=label_out, dtype='int64')
cost = fluid.layers.cross_entropy(input=predict, label=label_out)
sum_cost = fluid.layers.reduce_sum(cost)
return sum_cost
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
import paddle.fluid as fluid
class CTCLoss(object):
def __init__(self, params):
super(CTCLoss, self).__init__()
self.char_num = params['char_num']
def __call__(self, predicts, labels):
predict = predicts['predict']
label = labels['label']
cost = fluid.layers.warpctc(
input=predict, label=label, blank=self.char_num, norm_by_times=True)
sum_cost = fluid.layers.reduce_sum(cost)
return sum_cost
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.param_attr import ParamAttr
import numpy as np
class LocalizationNetwork(object):
def __init__(self, params):
super(LocalizationNetwork, self).__init__()
self.F = params['num_fiducial']
self.loc_lr = params['loc_lr']
self.model_name = params['model_name']
def conv_bn_layer(self,
input,
num_filters,
filter_size,
stride=1,
groups=1,
act=None,
name=None):
conv = layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
bn_name = "bn_" + name
return layers.batch_norm(
input=conv,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def get_initial_fiducials(self):
""" see RARE paper Fig. 6 (a) """
F = self.F
ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2))
ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2))
ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
return initial_bias
def __call__(self, image):
F = self.F
loc_lr = self.loc_lr
if self.model_name == "large":
num_filters_list = [64, 128, 256, 512]
fc_dim = 256
else:
num_filters_list = [16, 32, 64, 128]
fc_dim = 64
for fno in range(len(num_filters_list)):
num_filters = num_filters_list[fno]
name = "loc_conv%d" % fno
if fno == 0:
conv = self.conv_bn_layer(
image, num_filters, 3, act='relu', name=name)
else:
conv = self.conv_bn_layer(
pool, num_filters, 3, act='relu', name=name)
if fno == len(num_filters_list) - 1:
pool = layers.adaptive_pool2d(
input=conv, pool_size=[1, 1], pool_type='avg')
else:
pool = layers.pool2d(
input=conv,
pool_size=2,
pool_stride=2,
pool_padding=0,
pool_type='max')
name = "loc_fc1"
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
fc1 = layers.fc(input=pool,
size=fc_dim,
param_attr=fluid.param_attr.ParamAttr(
learning_rate=loc_lr,
initializer=fluid.initializer.Uniform(-stdv, stdv),
name=name + "_w"),
act='relu',
name=name)
initial_bias = self.get_initial_fiducials()
initial_bias = initial_bias.reshape(-1)
name = "loc_fc2"
param_attr = fluid.param_attr.ParamAttr(
learning_rate=loc_lr,
initializer=fluid.initializer.NumpyArrayInitializer(
np.zeros([fc_dim, F * 2])),
name=name + "_w")
bias_attr = fluid.param_attr.ParamAttr(
learning_rate=loc_lr,
initializer=fluid.initializer.NumpyArrayInitializer(initial_bias),
name=name + "_b")
fc2 = layers.fc(input=fc1,
size=F * 2,
param_attr=param_attr,
bias_attr=bias_attr,
name=name)
batch_C_prime = layers.reshape(x=fc2, shape=[-1, F, 2], inplace=False)
return batch_C_prime
class GridGenerator(object):
def __init__(self, params):
super(GridGenerator, self).__init__()
self.eps = 1e-6
self.F = params['num_fiducial']
def build_C(self):
""" Return coordinates of fiducial points in I_r; C """
F = self.F
ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
ctrl_pts_y_top = -1 * np.ones(int(F / 2))
ctrl_pts_y_bottom = np.ones(int(F / 2))
ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
return C # F x 2
def build_P(self, I_r_size):
I_r_width, I_r_height = I_r_size
I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0)\
/ I_r_width # self.I_r_width
I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0)\
/ I_r_height # self.I_r_height
# P: self.I_r_width x self.I_r_height x 2
P = np.stack(np.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
# n (= self.I_r_width x self.I_r_height) x 2
return P.reshape([-1, 2])
def build_inv_delta_C(self, C):
""" Return inv_delta_C which is needed to calculate T """
F = self.F
hat_C = np.zeros((F, F), dtype=float) # F x F
for i in range(0, F):
for j in range(i, F):
r = np.linalg.norm(C[i] - C[j])
hat_C[i, j] = r
hat_C[j, i] = r
np.fill_diagonal(hat_C, 1)
hat_C = (hat_C**2) * np.log(hat_C)
# print(C.shape, hat_C.shape)
delta_C = np.concatenate( # F+3 x F+3
[
np.concatenate(
[np.ones((F, 1)), C, hat_C], axis=1), # F x F+3
np.concatenate(
[np.zeros((2, 3)), np.transpose(C)], axis=1), # 2 x F+3
np.concatenate(
[np.zeros((1, 3)), np.ones((1, F))], axis=1) # 1 x F+3
],
axis=0)
inv_delta_C = np.linalg.inv(delta_C)
return inv_delta_C # F+3 x F+3
def build_P_hat(self, C, P):
F = self.F
eps = self.eps
n = P.shape[0] # n (= self.I_r_width x self.I_r_height)
#P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1))
C_tile = np.expand_dims(C, axis=0) # 1 x F x 2
P_diff = P_tile - C_tile # n x F x 2
#rbf_norm: n x F
rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False)
#rbf: n x F
rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + eps))
P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1)
return P_hat # n x F+3
def get_expand_tensor(self, batch_C_prime):
name = "ex_fc"
initializer = fluid.initializer.ConstantInitializer(value=0.0)
param_attr = fluid.param_attr.ParamAttr(
learning_rate=0.0, initializer=initializer, name=name + "_w")
bias_attr = fluid.param_attr.ParamAttr(
learning_rate=0.0, initializer=initializer, name=name + "_b")
batch_C_ex_part_tensor = fluid.layers.fc(input=batch_C_prime,
size=6,
param_attr=param_attr,
bias_attr=bias_attr,
name=name)
batch_C_ex_part_tensor = fluid.layers.reshape(
x=batch_C_ex_part_tensor, shape=[-1, 3, 2])
return batch_C_ex_part_tensor
def __call__(self, batch_C_prime, I_r_size):
C = self.build_C()
P = self.build_P(I_r_size)
inv_delta_C = self.build_inv_delta_C(C).astype('float32')
P_hat = self.build_P_hat(C, P).astype('float32')
inv_delta_C_tensor = layers.create_tensor(dtype='float32')
layers.assign(inv_delta_C, inv_delta_C_tensor)
inv_delta_C_tensor.stop_gradient = True
P_hat_tensor = layers.create_tensor(dtype='float32')
layers.assign(P_hat, P_hat_tensor)
P_hat_tensor.stop_gradient = True
batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
# batch_C_ex_part_tensor = create_tmp_var(
# fluid.default_main_program(),
# name='batch_C_ex_part_tensor',
# dtype='float32', shape=[-1, 3, 2])
# layers.py_func(func=get_batch_C_expand,
# x=[batch_C_prime], out=[batch_C_ex_part_tensor])
batch_C_ex_part_tensor.stop_gradient = True
batch_C_prime_with_zeros = layers.concat(
[batch_C_prime, batch_C_ex_part_tensor], axis=1)
batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros)
batch_P_prime = layers.matmul(P_hat_tensor, batch_T)
return batch_P_prime
class TPS(object):
def __init__(self, params):
super(TPS, self).__init__()
self.loc_net = LocalizationNetwork(params)
self.grid_generator = GridGenerator(params)
def __call__(self, image):
batch_C_prime = self.loc_net(image)
I_r_size = [image.shape[3], image.shape[2]]
batch_P_prime = self.grid_generator(batch_C_prime, I_r_size)
batch_P_prime = layers.reshape(
x=batch_P_prime, shape=[-1, image.shape[2], image.shape[3], 2])
batch_I_r = layers.grid_sampler(x=image, grid=batch_P_prime)
image.stop_gradient = False
return batch_I_r
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
def AdamDecay(params, parameter_list=None):
"""
define optimizer function
args:
params(dict): the super parameters
parameter_list (list): list of Variable names to update to minimize loss
return:
"""
base_lr = params['base_lr']
beta1 = params['beta1']
beta2 = params['beta2']
optimizer = fluid.optimizer.Adam(
learning_rate=base_lr,
beta1=beta1,
beta2=beta2,
parameter_list=parameter_list)
return optimizer
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import numpy as np
import string
import cv2
from shapely.geometry import Polygon
import pyclipper
class DBPostProcess(object):
"""
The post process for Differentiable Binarization (DB).
"""
def __init__(self, params):
self.thresh = params['thresh']
self.box_thresh = params['box_thresh']
self.max_candidates = params['max_candidates']
self.min_size = 3
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
'''
_bitmap: single map with shape (1, H, W),
whose values are binarized as {0, 1}
'''
bitmap = _bitmap
height, width = bitmap.shape
# img, contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
num_contours = min(len(contours), self.max_candidates)
boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
scores = np.zeros((num_contours, ), dtype=np.float32)
for index in range(num_contours):
contour = contours[index]
points, sside = self.get_mini_boxes(contour)
if sside < self.min_size:
continue
points = np.array(points)
score = self.box_score_fast(pred, points.reshape(-1, 2))
if self.box_thresh > score:
continue
box = self.unclip(points).reshape(-1, 1, 2)
box, sside = self.get_mini_boxes(box)
if sside < self.min_size + 2:
continue
box = np.array(box)
if not isinstance(dest_width, int):
dest_width = dest_width.item()
dest_height = dest_height.item()
box[:, 0] = np.clip(
np.round(box[:, 0] / width * dest_width), 0, dest_width)
box[:, 1] = np.clip(
np.round(box[:, 1] / height * dest_height), 0, dest_height)
boxes[index, :, :] = box.astype(np.int16)
scores[index] = score
return boxes, scores
def unclip(self, box, unclip_ratio=1.5):
poly = Polygon(box)
distance = poly.area * unclip_ratio / poly.length
offset = pyclipper.PyclipperOffset()
offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
expanded = np.array(offset.Execute(distance))
return expanded
def get_mini_boxes(self, contour):
bounding_box = cv2.minAreaRect(contour)
points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
index_1, index_2, index_3, index_4 = 0, 1, 2, 3
if points[1][1] > points[0][1]:
index_1 = 0
index_4 = 1
else:
index_1 = 1
index_4 = 0
if points[3][1] > points[2][1]:
index_2 = 2
index_3 = 3
else:
index_2 = 3
index_3 = 2
box = [
points[index_1], points[index_2], points[index_3], points[index_4]
]
return box, min(bounding_box[1])
def box_score_fast(self, bitmap, _box):
h, w = bitmap.shape[:2]
box = _box.copy()
xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
box[:, 0] = box[:, 0] - xmin
box[:, 1] = box[:, 1] - ymin
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
def __call__(self, outs_dict, ratio_list):
pred = outs_dict['maps']
pred = pred[:, 0, :, :]
segmentation = pred > self.thresh
boxes_batch = []
for batch_index in range(pred.shape[0]):
height, width = pred.shape[-2:]
tmp_boxes, tmp_scores = self.boxes_from_bitmap(
pred[batch_index], segmentation[batch_index], width, height)
boxes = []
for k in range(len(tmp_boxes)):
if tmp_scores[k] > self.box_thresh:
boxes.append(tmp_boxes[k])
if len(boxes) > 0:
boxes = np.array(boxes)
ratio_h, ratio_w = ratio_list[batch_index]
boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
boxes_batch.append(boxes)
return boxes_batch
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from .locality_aware_nms import nms_locality
import cv2
class EASTPostPocess(object):
"""
The post process for EAST.
"""
def __init__(self, params):
self.score_thresh = params['score_thresh']
self.cover_thresh = params['cover_thresh']
self.nms_thresh = params['nms_thresh']
def restore_rectangle_quad(self, origin, geometry):
"""
Restore rectangle from quadrangle.
"""
# quad
origin_concat = np.concatenate(
(origin, origin, origin, origin), axis=1) # (n, 8)
pred_quads = origin_concat - geometry
pred_quads = pred_quads.reshape((-1, 4, 2)) # (n, 4, 2)
return pred_quads
def detect(self,
score_map,
geo_map,
score_thresh=0.8,
cover_thresh=0.1,
nms_thresh=0.2):
"""
restore text boxes from score map and geo map
"""
score_map = score_map[0]
geo_map = np.swapaxes(geo_map, 1, 0)
geo_map = np.swapaxes(geo_map, 1, 2)
# filter the score map
xy_text = np.argwhere(score_map > score_thresh)
if len(xy_text) == 0:
return []
# sort the text boxes via the y axis
xy_text = xy_text[np.argsort(xy_text[:, 0])]
#restore quad proposals
text_box_restored = self.restore_rectangle_quad(
xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
boxes[:, :8] = text_box_restored.reshape((-1, 8))
boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
if boxes.shape[0] == 0:
return []
# Here we filter some low score boxes by the average score map,
# this is different from the orginal paper.
for i, box in enumerate(boxes):
mask = np.zeros_like(score_map, dtype=np.uint8)
cv2.fillPoly(mask, box[:8].reshape(
(-1, 4, 2)).astype(np.int32) // 4, 1)
boxes[i, 8] = cv2.mean(score_map, mask)[0]
boxes = boxes[boxes[:, 8] > cover_thresh]
return boxes
def sort_poly(self, p):
"""
Sort polygons.
"""
min_axis = np.argmin(np.sum(p, axis=1))
p = p[[min_axis, (min_axis + 1) % 4,\
(min_axis + 2) % 4, (min_axis + 3) % 4]]
if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
return p
else:
return p[[0, 3, 2, 1]]
def __call__(self, outs_dict, ratio_list):
score_list = outs_dict['f_score']
geo_list = outs_dict['f_geo']
img_num = len(ratio_list)
dt_boxes_list = []
for ino in range(img_num):
score = score_list[ino]
geo = geo_list[ino]
boxes = self.detect(
score_map=score,
geo_map=geo,
score_thresh=self.score_thresh,
cover_thresh=self.cover_thresh,
nms_thresh=self.nms_thresh)
boxes_norm = []
if len(boxes) > 0:
ratio_h, ratio_w = ratio_list[ino]
boxes = boxes[:, :8].reshape((-1, 4, 2))
boxes[:, :, 0] /= ratio_w
boxes[:, :, 1] /= ratio_h
for i_box, box in enumerate(boxes):
box = self.sort_poly(box.astype(np.int32))
if np.linalg.norm(box[0] - box[1]) < 5 \
or np.linalg.norm(box[3] - box[0]) < 5:
continue
boxes_norm.append(box)
dt_boxes_list.append(np.array(boxes_norm))
return dt_boxes_list
"""
Locality aware nms.
"""
import numpy as np
from shapely.geometry import Polygon
def intersection(g, p):
"""
Intersection.
"""
g = Polygon(g[:8].reshape((4, 2)))
p = Polygon(p[:8].reshape((4, 2)))
g = g.buffer(0)
p = p.buffer(0)
if not g.is_valid or not p.is_valid:
return 0
inter = Polygon(g).intersection(Polygon(p)).area
union = g.area + p.area - inter
if union == 0:
return 0
else:
return inter / union
def intersection_iog(g, p):
"""
Intersection_iog.
"""
g = Polygon(g[:8].reshape((4, 2)))
p = Polygon(p[:8].reshape((4, 2)))
if not g.is_valid or not p.is_valid:
return 0
inter = Polygon(g).intersection(Polygon(p)).area
#union = g.area + p.area - inter
union = p.area
if union == 0:
print("p_area is very small")
return 0
else:
return inter / union
def weighted_merge(g, p):
"""
Weighted merge.
"""
g[:8] = (g[8] * g[:8] + p[8] * p[:8]) / (g[8] + p[8])
g[8] = (g[8] + p[8])
return g
def standard_nms(S, thres):
"""
Standard nms.
"""
order = np.argsort(S[:, 8])[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
inds = np.where(ovr <= thres)[0]
order = order[inds + 1]
return S[keep]
def standard_nms_inds(S, thres):
"""
Standard nms, retun inds.
"""
order = np.argsort(S[:, 8])[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
inds = np.where(ovr <= thres)[0]
order = order[inds + 1]
return keep
def nms(S, thres):
"""
nms.
"""
order = np.argsort(S[:, 8])[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
inds = np.where(ovr <= thres)[0]
order = order[inds + 1]
return keep
def soft_nms(boxes_in, Nt_thres=0.3, threshold=0.8, sigma=0.5, method=2):
"""
soft_nms
:para boxes_in, N x 9 (coords + score)
:para threshould, eliminate cases min score(0.001)
:para Nt_thres, iou_threshi
:para sigma, gaussian weght
:method, linear or gaussian
"""
boxes = boxes_in.copy()
N = boxes.shape[0]
if N is None or N < 1:
return np.array([])
pos, maxpos = 0, 0
weight = 0.0
inds = np.arange(N)
tbox, sbox = boxes[0].copy(), boxes[0].copy()
for i in range(N):
maxscore = boxes[i, 8]
maxpos = i
tbox = boxes[i].copy()
ti = inds[i]
pos = i + 1
#get max box
while pos < N:
if maxscore < boxes[pos, 8]:
maxscore = boxes[pos, 8]
maxpos = pos
pos = pos + 1
#add max box as a detection
boxes[i, :] = boxes[maxpos, :]
inds[i] = inds[maxpos]
#swap
boxes[maxpos, :] = tbox
inds[maxpos] = ti
tbox = boxes[i].copy()
pos = i + 1
#NMS iteration
while pos < N:
sbox = boxes[pos].copy()
ts_iou_val = intersection(tbox, sbox)
if ts_iou_val > 0:
if method == 1:
if ts_iou_val > Nt_thres:
weight = 1 - ts_iou_val
else:
weight = 1
elif method == 2:
weight = np.exp(-1.0 * ts_iou_val**2 / sigma)
else:
if ts_iou_val > Nt_thres:
weight = 0
else:
weight = 1
boxes[pos, 8] = weight * boxes[pos, 8]
#if box score falls below thresold, discard the box by
#swaping last box update N
if boxes[pos, 8] < threshold:
boxes[pos, :] = boxes[N - 1, :]
inds[pos] = inds[N - 1]
N = N - 1
pos = pos - 1
pos = pos + 1
return boxes[:N]
def nms_locality(polys, thres=0.3):
"""
locality aware nms of EAST
:param polys: a N*9 numpy array. first 8 coordinates, then prob
:return: boxes after nms
"""
S = []
p = None
for g in polys:
if p is not None and intersection(g, p) > thres:
p = weighted_merge(g, p)
else:
if p is not None:
S.append(p)
p = g
if p is not None:
S.append(p)
if len(S) == 0:
return np.array([])
return standard_nms(np.array(S), thres)
if __name__ == '__main__':
# 343,350,448,135,474,143,369,359
print(
Polygon(np.array([[343, 350], [448, 135], [474, 143], [369, 359]]))
.area)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import string
import re
from .check import check_config_params
import sys
class CharacterOps(object):
""" Convert between text-label and text-index """
def __init__(self, config):
self.character_type = config['character_type']
self.loss_type = config['loss_type']
if self.character_type == "en":
self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
dict_character = list(self.character_str)
elif self.character_type == "ch":
character_dict_path = config['character_dict_path']
self.character_str = ""
with open(character_dict_path, "rb") as fin:
lines = fin.readlines()
for line in lines:
line = line.decode('utf-8').strip("\n")
self.character_str += line
dict_character = list(self.character_str)
elif self.character_type == "en_sensitive":
# same with ASTER setting (use 94 char).
self.character_str = string.printable[:-6]
dict_character = list(self.character_str)
else:
self.character_str = None
assert self.character_str is not None, \
"Nonsupport type of the character: {}".format(self.character_str)
self.beg_str = "sos"
self.end_str = "eos"
if self.loss_type == "attention":
dict_character = [self.beg_str, self.end_str] + dict_character
self.dict = {}
for i, char in enumerate(dict_character):
self.dict[char] = i
self.character = dict_character
def encode(self, text):
"""convert text-label into text-index.
input:
text: text labels of each image. [batch_size]
output:
text: concatenated text index for CTCLoss.
[sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
length: length of each text. [batch_size]
"""
if self.character_type == "en":
text = text.lower()
text_list = []
for char in text:
if char not in self.dict:
continue
text_list.append(self.dict[char])
text = np.array(text_list)
return text
def decode(self, text_index, is_remove_duplicate=False):
""" convert text-index into text-label. """
char_list = []
char_num = self.get_char_num()
if self.loss_type == "attention":
beg_idx = self.get_beg_end_flag_idx("beg")
end_idx = self.get_beg_end_flag_idx("end")
ignored_tokens = [beg_idx, end_idx]
else:
ignored_tokens = [char_num]
for idx in range(len(text_index)):
if text_index[idx] in ignored_tokens:
continue
if is_remove_duplicate:
if idx > 0 and text_index[idx - 1] == text_index[idx]:
continue
char_list.append(self.character[text_index[idx]])
text = ''.join(char_list)
return text
def get_char_num(self):
return len(self.character)
def get_beg_end_flag_idx(self, beg_or_end):
if self.loss_type == "attention":
if beg_or_end == "beg":
idx = np.array(self.dict[self.beg_str])
elif beg_or_end == "end":
idx = np.array(self.dict[self.end_str])
else:
assert False, "Unsupport type %s in get_beg_end_flag_idx"\
% beg_or_end
return idx
else:
err = "error in get_beg_end_flag_idx when using the loss %s"\
% (self.loss_type)
assert False, err
def cal_predicts_accuracy(char_ops,
preds,
preds_lod,
labels,
labels_lod,
is_remove_duplicate=False):
acc_num = 0
img_num = 0
for ino in range(len(labels_lod) - 1):
beg_no = preds_lod[ino]
end_no = preds_lod[ino + 1]
preds_text = preds[beg_no:end_no].reshape(-1)
preds_text = char_ops.decode(preds_text, is_remove_duplicate)
beg_no = labels_lod[ino]
end_no = labels_lod[ino + 1]
labels_text = labels[beg_no:end_no].reshape(-1)
labels_text = char_ops.decode(labels_text, is_remove_duplicate)
img_num += 1
if preds_text == labels_text:
acc_num += 1
acc = acc_num * 1.0 / img_num
return acc, acc_num, img_num
def convert_rec_attention_infer_res(preds):
img_num = preds.shape[0]
target_lod = [0]
convert_ids = []
for ino in range(img_num):
end_pos = np.where(preds[ino, :] == 1)[0]
if len(end_pos) <= 1:
text_list = preds[ino, 1:]
else:
text_list = preds[ino, 1:end_pos[1]]
target_lod.append(target_lod[ino] + len(text_list))
convert_ids = convert_ids + list(text_list)
convert_ids = np.array(convert_ids)
convert_ids = convert_ids.reshape((-1, 1))
return convert_ids, target_lod
def convert_rec_label_to_lod(ori_labels):
img_num = len(ori_labels)
target_lod = [0]
convert_ids = []
for ino in range(img_num):
target_lod.append(target_lod[ino] + len(ori_labels[ino]))
convert_ids = convert_ids + list(ori_labels[ino])
convert_ids = np.array(convert_ids)
convert_ids = convert_ids.reshape((-1, 1))
return convert_ids, target_lod
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import sys
import paddle.fluid as fluid
import logging
logger = logging.getLogger(__name__)
def check_config_params(config, config_name, params):
for param in params:
if param not in config:
err = "param %s didn't find in %s!" % (param, config_name)
assert False, err
return
'
贿
2
0
8
-
7
>
:
]
,
蹿
1
3
!
诿
线
尿
|
;
H
.
/
*
忿
齿
西
5
4
亿
(
访
6
)
稿
s
u
[
9
岿
广
S
Y
F
D
A
P
T
湿
窿
@
丿
沿
使
绿
%
"
婿
r
=
饿
ˇ
q
÷
椿
寿
?
便
殿
J
l
&
驿
x
耀
仿
鸿
廿
z
±
e
t
§
姿
b
<
退
L
鹿
w
i
h
+
I
B
N
^
_
M
鱿
怀
}
~
Z
槿
C
o
E
f
\
屿
U
a
p
y
n
g
竿
Q
羿
O
宿
k
$
c
v
W
穿
×
轿
R
G
ˉ
d
°
K
X
m
涿
`
V
#
簿
{
j
·
Ë
¥
π
é
Λ
Ο
α
 
鴿
è
Ü
И
»
ä
ɔ
´
í
É
ʌ
Я
Й
粿
®
З
β
á
Ó
ò
貿
𣇉
г
楿
滿
Φ
ε
ü
調
ˋ
ā
ú
ó
ē
Ω
П
ǐ
ō
ǒ
μ
à
ɡ
ī
²
駿
θ
ū
ì
\ No newline at end of file
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import errno
import os
import shutil
import tempfile
import paddle
import paddle.fluid as fluid
from .utility import initial_logger
import re
logger = initial_logger()
def _mkdir_if_not_exist(path):
"""
mkdir if not exists, ignore the exception when multiprocess mkdir together
"""
if not os.path.exists(path):
try:
os.makedirs(path)
except OSError as e:
if e.errno == errno.EEXIST and os.path.isdir(path):
logger.warning(
'be happy if some process has already created {}'.format(
path))
else:
raise OSError('Failed to mkdir {}'.format(path))
def _load_state(path):
if os.path.exists(path + '.pdopt'):
# XXX another hack to ignore the optimizer state
tmp = tempfile.mkdtemp()
dst = os.path.join(tmp, os.path.basename(os.path.normpath(path)))
shutil.copy(path + '.pdparams', dst + '.pdparams')
state = fluid.io.load_program_state(dst)
shutil.rmtree(tmp)
else:
state = fluid.io.load_program_state(path)
return state
def load_params(exe, prog, path, ignore_params=[]):
"""
Load model from the given path.
Args:
exe (fluid.Executor): The fluid.Executor object.
prog (fluid.Program): load weight to which Program object.
path (string): URL string or loca model path.
ignore_params (list): ignore variable to load when finetuning.
It can be specified by finetune_exclude_pretrained_params
and the usage can refer to docs/advanced_tutorials/TRANSFER_LEARNING.md
"""
if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')):
raise ValueError("Model pretrain path {} does not "
"exists.".format(path))
logger.info('Loading parameters from {}...'.format(path))
ignore_set = set()
state = _load_state(path)
# ignore the parameter which mismatch the shape
# between the model and pretrain weight.
all_var_shape = {}
for block in prog.blocks:
for param in block.all_parameters():
all_var_shape[param.name] = param.shape
ignore_set.update([
name for name, shape in all_var_shape.items()
if name in state and shape != state[name].shape
])
if ignore_params:
all_var_names = [var.name for var in prog.list_vars()]
ignore_list = filter(
lambda var: any([re.match(name, var) for name in ignore_params]),
all_var_names)
ignore_set.update(list(ignore_list))
if len(ignore_set) > 0:
for k in ignore_set:
if k in state:
logger.warning('variable {} not used'.format(k))
del state[k]
fluid.io.set_program_state(prog, state)
def init_model(config, program, exe):
"""
load model from checkpoint or pretrained_model
"""
checkpoints = config['Global'].get('checkpoints')
if checkpoints:
path = checkpoints
fluid.load(program, path, exe)
logger.info("Finish initing model from {}".format(path))
return
pretrain_weights = config['Global'].get('pretrain_weights')
if pretrain_weights:
path = pretrain_weights
load_params(exe, program, path)
logger.info("Finish initing model from {}".format(path))
return
def save_model(program, model_path):
"""
save model to the target path
"""
fluid.save(program, model_path)
logger.info("Already save model in {}".format(model_path))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment