Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
ac98415b
Commit
ac98415b
authored
Sep 14, 2021
by
WenmuZhou
Browse files
merge dygraph
parents
af34d785
29929ac6
Changes
69
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1437 additions
and
97 deletions
+1437
-97
ppocr/losses/combined_loss.py
ppocr/losses/combined_loss.py
+9
-5
ppocr/losses/distillation_loss.py
ppocr/losses/distillation_loss.py
+8
-6
ppocr/losses/rec_sar_loss.py
ppocr/losses/rec_sar_loss.py
+25
-0
ppocr/modeling/backbones/__init__.py
ppocr/modeling/backbones/__init__.py
+2
-1
ppocr/modeling/backbones/rec_resnet_31.py
ppocr/modeling/backbones/rec_resnet_31.py
+176
-0
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+2
-1
ppocr/modeling/heads/rec_sar_head.py
ppocr/modeling/heads/rec_sar_head.py
+383
-0
ppocr/postprocess/__init__.py
ppocr/postprocess/__init__.py
+3
-3
ppocr/postprocess/rec_postprocess.py
ppocr/postprocess/rec_postprocess.py
+108
-22
ppocr/utils/save_load.py
ppocr/utils/save_load.py
+6
-4
tests/configs/det_mv3_db.yml
tests/configs/det_mv3_db.yml
+126
-0
tests/configs/det_r50_vd_db.yml
tests/configs/det_r50_vd_db.yml
+124
-0
tests/configs/rec_icdar15_r34_train.yml
tests/configs/rec_icdar15_r34_train.yml
+99
-0
tests/ocr_det_params.txt
tests/ocr_det_params.txt
+26
-11
tests/ocr_det_server_params.txt
tests/ocr_det_server_params.txt
+3
-3
tests/ocr_ppocr_mobile_params.txt
tests/ocr_ppocr_mobile_params.txt
+67
-0
tests/ocr_ppocr_server_params.txt
tests/ocr_ppocr_server_params.txt
+66
-0
tests/ocr_rec_params.txt
tests/ocr_rec_params.txt
+33
-3
tests/ocr_rec_server_params.txt
tests/ocr_rec_server_params.txt
+81
-0
tests/prepare.sh
tests/prepare.sh
+90
-38
No files found.
ppocr/losses/combined_loss.py
View file @
ac98415b
...
...
@@ -49,11 +49,15 @@ class CombinedLoss(nn.Layer):
loss
=
loss_func
(
input
,
batch
,
**
kargs
)
if
isinstance
(
loss
,
paddle
.
Tensor
):
loss
=
{
"loss_{}_{}"
.
format
(
str
(
loss
),
idx
):
loss
}
weight
=
self
.
loss_weight
[
idx
]
for
key
in
loss
.
keys
():
if
key
==
"loss"
:
loss_all
+=
loss
[
key
]
*
weight
else
:
loss_dict
[
"{}_{}"
.
format
(
key
,
idx
)]
=
loss
[
key
]
loss
=
{
key
:
loss
[
key
]
*
weight
for
key
in
loss
}
if
"loss"
in
loss
:
loss_all
+=
loss
[
"loss"
]
else
:
loss_all
+=
paddle
.
add_n
(
list
(
loss
.
values
()))
loss_dict
.
update
(
loss
)
loss_dict
[
"loss"
]
=
loss_all
return
loss_dict
ppocr/losses/distillation_loss.py
View file @
ac98415b
...
...
@@ -44,20 +44,22 @@ class DistillationDMLLoss(DMLLoss):
def
__init__
(
self
,
model_name_pairs
=
[],
act
=
None
,
use_log
=
False
,
key
=
None
,
maps_name
=
None
,
name
=
"dml"
):
super
().
__init__
(
act
=
act
)
super
().
__init__
(
act
=
act
,
use_log
=
use_log
)
assert
isinstance
(
model_name_pairs
,
list
)
self
.
key
=
key
self
.
model_name_pairs
=
self
.
_check_model_name_pairs
(
model_name_pairs
)
self
.
name
=
name
self
.
maps_name
=
self
.
_check_maps_name
(
maps_name
)
def
_check_model_name_pairs
(
self
,
model_name_pairs
):
if
not
isinstance
(
model_name_pairs
,
list
):
return
[]
elif
isinstance
(
model_name_pairs
[
0
],
list
)
and
isinstance
(
model_name_pairs
[
0
][
0
],
str
):
elif
isinstance
(
model_name_pairs
[
0
],
list
)
and
isinstance
(
model_name_pairs
[
0
][
0
],
str
):
return
model_name_pairs
else
:
return
[
model_name_pairs
]
...
...
@@ -112,9 +114,9 @@ class DistillationDMLLoss(DMLLoss):
loss_dict
[
"{}_{}_{}_{}_{}"
.
format
(
key
,
pair
[
0
],
pair
[
1
],
map_name
,
idx
)]
=
loss
[
key
]
else
:
loss_dict
[
"{}_{}_{}"
.
format
(
self
.
name
,
self
.
maps_name
[
_c
],
idx
)]
=
loss
loss_dict
[
"{}_{}_{}"
.
format
(
self
.
name
,
self
.
maps_name
[
_c
],
idx
)]
=
loss
loss_dict
=
_sum_loss
(
loss_dict
)
return
loss_dict
...
...
ppocr/losses/rec_sar_loss.py
0 → 100644
View file @
ac98415b
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
class
SARLoss
(
nn
.
Layer
):
def
__init__
(
self
,
**
kwargs
):
super
(
SARLoss
,
self
).
__init__
()
self
.
loss_func
=
paddle
.
nn
.
loss
.
CrossEntropyLoss
(
reduction
=
"mean"
,
ignore_index
=
96
)
def
forward
(
self
,
predicts
,
batch
):
predict
=
predicts
[:,
:
-
1
,
:]
# ignore last index of outputs to be in same seq_len with targets
label
=
batch
[
1
].
astype
(
"int64"
)[:,
1
:]
# ignore first index of target in loss calculation
batch_size
,
num_steps
,
num_classes
=
predict
.
shape
[
0
],
predict
.
shape
[
1
],
predict
.
shape
[
2
]
assert
len
(
label
.
shape
)
==
len
(
list
(
predict
.
shape
))
-
1
,
\
"The target's shape and inputs's shape is [N, d] and [N, num_steps]"
inputs
=
paddle
.
reshape
(
predict
,
[
-
1
,
num_classes
])
targets
=
paddle
.
reshape
(
label
,
[
-
1
])
loss
=
self
.
loss_func
(
inputs
,
targets
)
return
{
'loss'
:
loss
}
ppocr/modeling/backbones/__init__.py
View file @
ac98415b
...
...
@@ -27,8 +27,9 @@ def build_backbone(config, model_type):
from
.rec_resnet_fpn
import
ResNetFPN
from
.rec_mv1_enhance
import
MobileNetV1Enhance
from
.rec_nrtr_mtb
import
MTB
from
.rec_resnet_31
import
ResNet31
support_dict
=
[
'MobileNetV1Enhance'
,
'MobileNetV3'
,
'ResNet'
,
'ResNetFPN'
,
'MTB'
'MobileNetV1Enhance'
,
'MobileNetV3'
,
'ResNet'
,
'ResNetFPN'
,
'MTB'
,
"ResNet31"
]
elif
model_type
==
"e2e"
:
from
.e2e_resnet_vd_pg
import
ResNet
...
...
ppocr/modeling/backbones/rec_resnet_31.py
0 → 100644
View file @
ac98415b
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
import
numpy
as
np
__all__
=
[
"ResNet31"
]
def
conv3x3
(
in_channel
,
out_channel
,
stride
=
1
):
return
nn
.
Conv2D
(
in_channel
,
out_channel
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias_attr
=
False
)
class
BasicBlock
(
nn
.
Layer
):
expansion
=
1
def
__init__
(
self
,
in_channels
,
channels
,
stride
=
1
,
downsample
=
False
):
super
().
__init__
()
self
.
conv1
=
conv3x3
(
in_channels
,
channels
,
stride
)
self
.
bn1
=
nn
.
BatchNorm2D
(
channels
)
self
.
relu
=
nn
.
ReLU
()
self
.
conv2
=
conv3x3
(
channels
,
channels
)
self
.
bn2
=
nn
.
BatchNorm2D
(
channels
)
self
.
downsample
=
downsample
if
downsample
:
self
.
downsample
=
nn
.
Sequential
(
nn
.
Conv2D
(
in_channels
,
channels
*
self
.
expansion
,
1
,
stride
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
channels
*
self
.
expansion
),
)
else
:
self
.
downsample
=
nn
.
Sequential
()
self
.
stride
=
stride
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
if
self
.
downsample
:
residual
=
self
.
downsample
(
x
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
class
ResNet31
(
nn
.
Layer
):
'''
Args:
in_channels (int): Number of channels of input image tensor.
layers (list[int]): List of BasicBlock number for each stage.
channels (list[int]): List of out_channels of Conv2d layer.
out_indices (None | Sequence[int]): Indices of output stages.
last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage.
'''
def
__init__
(
self
,
in_channels
=
3
,
layers
=
[
1
,
2
,
5
,
3
],
channels
=
[
64
,
128
,
256
,
256
,
512
,
512
,
512
],
out_indices
=
None
,
last_stage_pool
=
False
):
super
(
ResNet31
,
self
).
__init__
()
assert
isinstance
(
in_channels
,
int
)
assert
isinstance
(
last_stage_pool
,
bool
)
self
.
out_indices
=
out_indices
self
.
last_stage_pool
=
last_stage_pool
# conv 1 (Conv Conv)
self
.
conv1_1
=
nn
.
Conv2D
(
in_channels
,
channels
[
0
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn1_1
=
nn
.
BatchNorm2D
(
channels
[
0
])
self
.
relu1_1
=
nn
.
ReLU
()
self
.
conv1_2
=
nn
.
Conv2D
(
channels
[
0
],
channels
[
1
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn1_2
=
nn
.
BatchNorm2D
(
channels
[
1
])
self
.
relu1_2
=
nn
.
ReLU
()
# conv 2 (Max-pooling, Residual block, Conv)
self
.
pool2
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
block2
=
self
.
_make_layer
(
channels
[
1
],
channels
[
2
],
layers
[
0
])
self
.
conv2
=
nn
.
Conv2D
(
channels
[
2
],
channels
[
2
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn2
=
nn
.
BatchNorm2D
(
channels
[
2
])
self
.
relu2
=
nn
.
ReLU
()
# conv 3 (Max-pooling, Residual block, Conv)
self
.
pool3
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
block3
=
self
.
_make_layer
(
channels
[
2
],
channels
[
3
],
layers
[
1
])
self
.
conv3
=
nn
.
Conv2D
(
channels
[
3
],
channels
[
3
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn3
=
nn
.
BatchNorm2D
(
channels
[
3
])
self
.
relu3
=
nn
.
ReLU
()
# conv 4 (Max-pooling, Residual block, Conv)
self
.
pool4
=
nn
.
MaxPool2D
(
kernel_size
=
(
2
,
1
),
stride
=
(
2
,
1
),
padding
=
0
,
ceil_mode
=
True
)
self
.
block4
=
self
.
_make_layer
(
channels
[
3
],
channels
[
4
],
layers
[
2
])
self
.
conv4
=
nn
.
Conv2D
(
channels
[
4
],
channels
[
4
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn4
=
nn
.
BatchNorm2D
(
channels
[
4
])
self
.
relu4
=
nn
.
ReLU
()
# conv 5 ((Max-pooling), Residual block, Conv)
self
.
pool5
=
None
if
self
.
last_stage_pool
:
self
.
pool5
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
block5
=
self
.
_make_layer
(
channels
[
4
],
channels
[
5
],
layers
[
3
])
self
.
conv5
=
nn
.
Conv2D
(
channels
[
5
],
channels
[
5
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn5
=
nn
.
BatchNorm2D
(
channels
[
5
])
self
.
relu5
=
nn
.
ReLU
()
self
.
out_channels
=
channels
[
-
1
]
def
_make_layer
(
self
,
input_channels
,
output_channels
,
blocks
):
layers
=
[]
for
_
in
range
(
blocks
):
downsample
=
None
if
input_channels
!=
output_channels
:
downsample
=
nn
.
Sequential
(
nn
.
Conv2D
(
input_channels
,
output_channels
,
kernel_size
=
1
,
stride
=
1
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
output_channels
),
)
layers
.
append
(
BasicBlock
(
input_channels
,
output_channels
,
downsample
=
downsample
))
input_channels
=
output_channels
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
x
=
self
.
conv1_1
(
x
)
x
=
self
.
bn1_1
(
x
)
x
=
self
.
relu1_1
(
x
)
x
=
self
.
conv1_2
(
x
)
x
=
self
.
bn1_2
(
x
)
x
=
self
.
relu1_2
(
x
)
outs
=
[]
for
i
in
range
(
4
):
layer_index
=
i
+
2
pool_layer
=
getattr
(
self
,
f
'pool
{
layer_index
}
'
)
block_layer
=
getattr
(
self
,
f
'block
{
layer_index
}
'
)
conv_layer
=
getattr
(
self
,
f
'conv
{
layer_index
}
'
)
bn_layer
=
getattr
(
self
,
f
'bn
{
layer_index
}
'
)
relu_layer
=
getattr
(
self
,
f
'relu
{
layer_index
}
'
)
if
pool_layer
is
not
None
:
x
=
pool_layer
(
x
)
x
=
block_layer
(
x
)
x
=
conv_layer
(
x
)
x
=
bn_layer
(
x
)
x
=
relu_layer
(
x
)
outs
.
append
(
x
)
if
self
.
out_indices
is
not
None
:
return
tuple
([
outs
[
i
]
for
i
in
self
.
out_indices
])
return
x
ppocr/modeling/heads/__init__.py
View file @
ac98415b
...
...
@@ -28,13 +28,14 @@ def build_head(config):
from
.rec_att_head
import
AttentionHead
from
.rec_srn_head
import
SRNHead
from
.rec_nrtr_head
import
Transformer
from
.rec_sar_head
import
SARHead
# cls head
from
.cls_head
import
ClsHead
support_dict
=
[
'DBHead'
,
'PSEHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
,
'AttentionHead'
,
'SRNHead'
,
'PGHead'
,
'Transformer'
,
'TableAttentionHead'
'TableAttentionHead'
,
'SARHead'
]
#table head
...
...
ppocr/modeling/heads/rec_sar_head.py
0 → 100644
View file @
ac98415b
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
class
SAREncoder
(
nn
.
Layer
):
"""
Args:
enc_bi_rnn (bool): If True, use bidirectional RNN in encoder.
enc_drop_rnn (float): Dropout probability of RNN layer in encoder.
enc_gru (bool): If True, use GRU, else LSTM in encoder.
d_model (int): Dim of channels from backbone.
d_enc (int): Dim of encoder RNN layer.
mask (bool): If True, mask padding in RNN sequence.
"""
def
__init__
(
self
,
enc_bi_rnn
=
False
,
enc_drop_rnn
=
0.1
,
enc_gru
=
False
,
d_model
=
512
,
d_enc
=
512
,
mask
=
True
,
**
kwargs
):
super
().
__init__
()
assert
isinstance
(
enc_bi_rnn
,
bool
)
assert
isinstance
(
enc_drop_rnn
,
(
int
,
float
))
assert
0
<=
enc_drop_rnn
<
1.0
assert
isinstance
(
enc_gru
,
bool
)
assert
isinstance
(
d_model
,
int
)
assert
isinstance
(
d_enc
,
int
)
assert
isinstance
(
mask
,
bool
)
self
.
enc_bi_rnn
=
enc_bi_rnn
self
.
enc_drop_rnn
=
enc_drop_rnn
self
.
mask
=
mask
# LSTM Encoder
if
enc_bi_rnn
:
direction
=
'bidirectional'
else
:
direction
=
'forward'
kwargs
=
dict
(
input_size
=
d_model
,
hidden_size
=
d_enc
,
num_layers
=
2
,
time_major
=
False
,
dropout
=
enc_drop_rnn
,
direction
=
direction
)
if
enc_gru
:
self
.
rnn_encoder
=
nn
.
GRU
(
**
kwargs
)
else
:
self
.
rnn_encoder
=
nn
.
LSTM
(
**
kwargs
)
# global feature transformation
encoder_rnn_out_size
=
d_enc
*
(
int
(
enc_bi_rnn
)
+
1
)
self
.
linear
=
nn
.
Linear
(
encoder_rnn_out_size
,
encoder_rnn_out_size
)
def
forward
(
self
,
feat
,
img_metas
=
None
):
if
img_metas
is
not
None
:
assert
len
(
img_metas
[
0
])
==
feat
.
shape
[
0
]
valid_ratios
=
None
if
img_metas
is
not
None
and
self
.
mask
:
valid_ratios
=
img_metas
[
-
1
]
h_feat
=
feat
.
shape
[
2
]
# bsz c h w
feat_v
=
F
.
max_pool2d
(
feat
,
kernel_size
=
(
h_feat
,
1
),
stride
=
1
,
padding
=
0
)
feat_v
=
feat_v
.
squeeze
(
2
)
# bsz * C * W
feat_v
=
paddle
.
transpose
(
feat_v
,
perm
=
[
0
,
2
,
1
])
# bsz * W * C
holistic_feat
=
self
.
rnn_encoder
(
feat_v
)[
0
]
# bsz * T * C
if
valid_ratios
is
not
None
:
valid_hf
=
[]
T
=
holistic_feat
.
shape
[
1
]
for
i
,
valid_ratio
in
enumerate
(
valid_ratios
):
valid_step
=
min
(
T
,
math
.
ceil
(
T
*
valid_ratio
))
-
1
valid_hf
.
append
(
holistic_feat
[
i
,
valid_step
,
:])
valid_hf
=
paddle
.
stack
(
valid_hf
,
axis
=
0
)
else
:
valid_hf
=
holistic_feat
[:,
-
1
,
:]
# bsz * C
holistic_feat
=
self
.
linear
(
valid_hf
)
# bsz * C
return
holistic_feat
class
BaseDecoder
(
nn
.
Layer
):
def
__init__
(
self
,
**
kwargs
):
super
().
__init__
()
def
forward_train
(
self
,
feat
,
out_enc
,
targets
,
img_metas
):
raise
NotImplementedError
def
forward_test
(
self
,
feat
,
out_enc
,
img_metas
):
raise
NotImplementedError
def
forward
(
self
,
feat
,
out_enc
,
label
=
None
,
img_metas
=
None
,
train_mode
=
True
):
self
.
train_mode
=
train_mode
if
train_mode
:
return
self
.
forward_train
(
feat
,
out_enc
,
label
,
img_metas
)
return
self
.
forward_test
(
feat
,
out_enc
,
img_metas
)
class
ParallelSARDecoder
(
BaseDecoder
):
"""
Args:
out_channels (int): Output class number.
enc_bi_rnn (bool): If True, use bidirectional RNN in encoder.
dec_bi_rnn (bool): If True, use bidirectional RNN in decoder.
dec_drop_rnn (float): Dropout of RNN layer in decoder.
dec_gru (bool): If True, use GRU, else LSTM in decoder.
d_model (int): Dim of channels from backbone.
d_enc (int): Dim of encoder RNN layer.
d_k (int): Dim of channels of attention module.
pred_dropout (float): Dropout probability of prediction layer.
max_seq_len (int): Maximum sequence length for decoding.
mask (bool): If True, mask padding in feature map.
start_idx (int): Index of start token.
padding_idx (int): Index of padding token.
pred_concat (bool): If True, concat glimpse feature from
attention with holistic feature and hidden state.
"""
def
__init__
(
self
,
out_channels
,
# 90 + unknown + start + padding
enc_bi_rnn
=
False
,
dec_bi_rnn
=
False
,
dec_drop_rnn
=
0.0
,
dec_gru
=
False
,
d_model
=
512
,
d_enc
=
512
,
d_k
=
64
,
pred_dropout
=
0.1
,
max_text_length
=
30
,
mask
=
True
,
pred_concat
=
True
,
**
kwargs
):
super
().
__init__
()
self
.
num_classes
=
out_channels
self
.
enc_bi_rnn
=
enc_bi_rnn
self
.
d_k
=
d_k
self
.
start_idx
=
out_channels
-
2
self
.
padding_idx
=
out_channels
-
1
self
.
max_seq_len
=
max_text_length
self
.
mask
=
mask
self
.
pred_concat
=
pred_concat
encoder_rnn_out_size
=
d_enc
*
(
int
(
enc_bi_rnn
)
+
1
)
decoder_rnn_out_size
=
encoder_rnn_out_size
*
(
int
(
dec_bi_rnn
)
+
1
)
# 2D attention layer
self
.
conv1x1_1
=
nn
.
Linear
(
decoder_rnn_out_size
,
d_k
)
self
.
conv3x3_1
=
nn
.
Conv2D
(
d_model
,
d_k
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
conv1x1_2
=
nn
.
Linear
(
d_k
,
1
)
# Decoder RNN layer
if
dec_bi_rnn
:
direction
=
'bidirectional'
else
:
direction
=
'forward'
kwargs
=
dict
(
input_size
=
encoder_rnn_out_size
,
hidden_size
=
encoder_rnn_out_size
,
num_layers
=
2
,
time_major
=
False
,
dropout
=
dec_drop_rnn
,
direction
=
direction
)
if
dec_gru
:
self
.
rnn_decoder
=
nn
.
GRU
(
**
kwargs
)
else
:
self
.
rnn_decoder
=
nn
.
LSTM
(
**
kwargs
)
# Decoder input embedding
self
.
embedding
=
nn
.
Embedding
(
self
.
num_classes
,
encoder_rnn_out_size
,
padding_idx
=
self
.
padding_idx
)
# Prediction layer
self
.
pred_dropout
=
nn
.
Dropout
(
pred_dropout
)
pred_num_classes
=
self
.
num_classes
-
1
if
pred_concat
:
fc_in_channel
=
decoder_rnn_out_size
+
d_model
+
d_enc
else
:
fc_in_channel
=
d_model
self
.
prediction
=
nn
.
Linear
(
fc_in_channel
,
pred_num_classes
)
def
_2d_attention
(
self
,
decoder_input
,
feat
,
holistic_feat
,
valid_ratios
=
None
):
y
=
self
.
rnn_decoder
(
decoder_input
)[
0
]
# y: bsz * (seq_len + 1) * hidden_size
attn_query
=
self
.
conv1x1_1
(
y
)
# bsz * (seq_len + 1) * attn_size
bsz
,
seq_len
,
attn_size
=
attn_query
.
shape
attn_query
=
paddle
.
unsqueeze
(
attn_query
,
axis
=
[
3
,
4
])
# (bsz, seq_len + 1, attn_size, 1, 1)
attn_key
=
self
.
conv3x3_1
(
feat
)
# bsz * attn_size * h * w
attn_key
=
attn_key
.
unsqueeze
(
1
)
# bsz * 1 * attn_size * h * w
attn_weight
=
paddle
.
tanh
(
paddle
.
add
(
attn_key
,
attn_query
))
# bsz * (seq_len + 1) * attn_size * h * w
attn_weight
=
paddle
.
transpose
(
attn_weight
,
perm
=
[
0
,
1
,
3
,
4
,
2
])
# bsz * (seq_len + 1) * h * w * attn_size
attn_weight
=
self
.
conv1x1_2
(
attn_weight
)
# bsz * (seq_len + 1) * h * w * 1
bsz
,
T
,
h
,
w
,
c
=
attn_weight
.
shape
assert
c
==
1
if
valid_ratios
is
not
None
:
# cal mask of attention weight
for
i
,
valid_ratio
in
enumerate
(
valid_ratios
):
valid_width
=
min
(
w
,
math
.
ceil
(
w
*
valid_ratio
))
attn_weight
[
i
,
:,
:,
valid_width
:,
:]
=
float
(
'-inf'
)
attn_weight
=
paddle
.
reshape
(
attn_weight
,
[
bsz
,
T
,
-
1
])
attn_weight
=
F
.
softmax
(
attn_weight
,
axis
=-
1
)
attn_weight
=
paddle
.
reshape
(
attn_weight
,
[
bsz
,
T
,
h
,
w
,
c
])
attn_weight
=
paddle
.
transpose
(
attn_weight
,
perm
=
[
0
,
1
,
4
,
2
,
3
])
# attn_weight: bsz * T * c * h * w
# feat: bsz * c * h * w
attn_feat
=
paddle
.
sum
(
paddle
.
multiply
(
feat
.
unsqueeze
(
1
),
attn_weight
),
(
3
,
4
),
keepdim
=
False
)
# bsz * (seq_len + 1) * C
# Linear transformation
if
self
.
pred_concat
:
hf_c
=
holistic_feat
.
shape
[
-
1
]
holistic_feat
=
paddle
.
expand
(
holistic_feat
,
shape
=
[
bsz
,
seq_len
,
hf_c
])
y
=
self
.
prediction
(
paddle
.
concat
((
y
,
attn_feat
,
holistic_feat
),
2
))
else
:
y
=
self
.
prediction
(
attn_feat
)
# bsz * (seq_len + 1) * num_classes
if
self
.
train_mode
:
y
=
self
.
pred_dropout
(
y
)
return
y
def
forward_train
(
self
,
feat
,
out_enc
,
label
,
img_metas
):
'''
img_metas: [label, valid_ratio]
'''
if
img_metas
is
not
None
:
assert
len
(
img_metas
[
0
])
==
feat
.
shape
[
0
]
valid_ratios
=
None
if
img_metas
is
not
None
and
self
.
mask
:
valid_ratios
=
img_metas
[
-
1
]
label
=
label
.
cuda
()
lab_embedding
=
self
.
embedding
(
label
)
# bsz * seq_len * emb_dim
out_enc
=
out_enc
.
unsqueeze
(
1
)
# bsz * 1 * emb_dim
in_dec
=
paddle
.
concat
((
out_enc
,
lab_embedding
),
axis
=
1
)
# bsz * (seq_len + 1) * C
out_dec
=
self
.
_2d_attention
(
in_dec
,
feat
,
out_enc
,
valid_ratios
=
valid_ratios
)
# bsz * (seq_len + 1) * num_classes
return
out_dec
[:,
1
:,
:]
# bsz * seq_len * num_classes
def
forward_test
(
self
,
feat
,
out_enc
,
img_metas
):
if
img_metas
is
not
None
:
assert
len
(
img_metas
[
0
])
==
feat
.
shape
[
0
]
valid_ratios
=
None
if
img_metas
is
not
None
and
self
.
mask
:
valid_ratios
=
img_metas
[
-
1
]
seq_len
=
self
.
max_seq_len
bsz
=
feat
.
shape
[
0
]
start_token
=
paddle
.
full
(
(
bsz
,
),
fill_value
=
self
.
start_idx
,
dtype
=
'int64'
)
# bsz
start_token
=
self
.
embedding
(
start_token
)
# bsz * emb_dim
emb_dim
=
start_token
.
shape
[
1
]
start_token
=
start_token
.
unsqueeze
(
1
)
start_token
=
paddle
.
expand
(
start_token
,
shape
=
[
bsz
,
seq_len
,
emb_dim
])
# bsz * seq_len * emb_dim
out_enc
=
out_enc
.
unsqueeze
(
1
)
# bsz * 1 * emb_dim
decoder_input
=
paddle
.
concat
((
out_enc
,
start_token
),
axis
=
1
)
# bsz * (seq_len + 1) * emb_dim
outputs
=
[]
for
i
in
range
(
1
,
seq_len
+
1
):
decoder_output
=
self
.
_2d_attention
(
decoder_input
,
feat
,
out_enc
,
valid_ratios
=
valid_ratios
)
char_output
=
decoder_output
[:,
i
,
:]
# bsz * num_classes
char_output
=
F
.
softmax
(
char_output
,
-
1
)
outputs
.
append
(
char_output
)
max_idx
=
paddle
.
argmax
(
char_output
,
axis
=
1
,
keepdim
=
False
)
char_embedding
=
self
.
embedding
(
max_idx
)
# bsz * emb_dim
if
i
<
seq_len
:
decoder_input
[:,
i
+
1
,
:]
=
char_embedding
outputs
=
paddle
.
stack
(
outputs
,
1
)
# bsz * seq_len * num_classes
return
outputs
class
SARHead
(
nn
.
Layer
):
def
__init__
(
self
,
out_channels
,
enc_bi_rnn
=
False
,
enc_drop_rnn
=
0.1
,
enc_gru
=
False
,
dec_bi_rnn
=
False
,
dec_drop_rnn
=
0.0
,
dec_gru
=
False
,
d_k
=
512
,
pred_dropout
=
0.1
,
max_text_length
=
30
,
pred_concat
=
True
,
**
kwargs
):
super
(
SARHead
,
self
).
__init__
()
# encoder module
self
.
encoder
=
SAREncoder
(
enc_bi_rnn
=
enc_bi_rnn
,
enc_drop_rnn
=
enc_drop_rnn
,
enc_gru
=
enc_gru
)
# decoder module
self
.
decoder
=
ParallelSARDecoder
(
out_channels
=
out_channels
,
enc_bi_rnn
=
enc_bi_rnn
,
dec_bi_rnn
=
dec_bi_rnn
,
dec_drop_rnn
=
dec_drop_rnn
,
dec_gru
=
dec_gru
,
d_k
=
d_k
,
pred_dropout
=
pred_dropout
,
max_text_length
=
max_text_length
,
pred_concat
=
pred_concat
)
def
forward
(
self
,
feat
,
targets
=
None
):
'''
img_metas: [label, valid_ratio]
'''
holistic_feat
=
self
.
encoder
(
feat
,
targets
)
# bsz c
if
self
.
training
:
label
=
targets
[
0
]
# label
label
=
paddle
.
to_tensor
(
label
,
dtype
=
'int64'
)
final_out
=
self
.
decoder
(
feat
,
holistic_feat
,
label
,
img_metas
=
targets
)
if
not
self
.
training
:
final_out
=
self
.
decoder
(
feat
,
holistic_feat
,
label
=
None
,
img_metas
=
targets
,
train_mode
=
False
)
# (bsz, seq_len, num_classes)
return
final_out
ppocr/postprocess/__init__.py
View file @
ac98415b
...
...
@@ -25,7 +25,7 @@ from .db_postprocess import DBPostProcess, DistillationDBPostProcess
from
.east_postprocess
import
EASTPostProcess
from
.sast_postprocess
import
SASTPostProcess
from
.rec_postprocess
import
CTCLabelDecode
,
AttnLabelDecode
,
SRNLabelDecode
,
DistillationCTCLabelDecode
,
NRTRLabelDecode
,
\
TableLabelDecode
TableLabelDecode
,
SARLabelDecode
from
.cls_postprocess
import
ClsPostProcess
from
.pg_postprocess
import
PGPostProcess
from
.pse_postprocess
import
PSEPostProcess
...
...
@@ -35,8 +35,8 @@ def build_post_process(config, global_config=None):
support_dict
=
[
'DBPostProcess'
,
'PSEPostProcess'
,
'EASTPostProcess'
,
'SASTPostProcess'
,
'CTCLabelDecode'
,
'AttnLabelDecode'
,
'ClsPostProcess'
,
'SRNLabelDecode'
,
'PGPostProcess'
,
'DistillationCTCLabelDecode'
,
'
NRTR
LabelDecode'
,
'TableLabelDecode'
,
'DistillationDBPostProcess'
'PGPostProcess'
,
'DistillationCTCLabelDecode'
,
'
Table
LabelDecode'
,
'DistillationDBPostProcess'
,
'NRTRLabelDecode'
,
'SARLabelDecode'
]
config
=
copy
.
deepcopy
(
config
)
...
...
ppocr/postprocess/rec_postprocess.py
View file @
ac98415b
...
...
@@ -15,6 +15,7 @@ import numpy as np
import
string
import
paddle
from
paddle.nn
import
functional
as
F
import
re
class
BaseRecLabelDecode
(
object
):
...
...
@@ -165,21 +166,21 @@ class NRTRLabelDecode(BaseRecLabelDecode):
use_space_char
=
True
,
**
kwargs
):
super
(
NRTRLabelDecode
,
self
).
__init__
(
character_dict_path
,
character_type
,
use_space_char
)
character_type
,
use_space_char
)
def
__call__
(
self
,
preds
,
label
=
None
,
*
args
,
**
kwargs
):
if
preds
.
dtype
==
paddle
.
int64
:
if
isinstance
(
preds
,
paddle
.
Tensor
):
preds
=
preds
.
numpy
()
if
preds
[
0
][
0
]
==
2
:
preds_idx
=
preds
[:,
1
:]
if
preds
[
0
][
0
]
==
2
:
preds_idx
=
preds
[:,
1
:]
else
:
preds_idx
=
preds
text
=
self
.
decode
(
preds_idx
)
if
label
is
None
:
return
text
label
=
self
.
decode
(
label
[:,
1
:])
label
=
self
.
decode
(
label
[:,
1
:])
else
:
if
isinstance
(
preds
,
paddle
.
Tensor
):
preds
=
preds
.
numpy
()
...
...
@@ -188,13 +189,13 @@ class NRTRLabelDecode(BaseRecLabelDecode):
text
=
self
.
decode
(
preds_idx
,
preds_prob
,
is_remove_duplicate
=
False
)
if
label
is
None
:
return
text
label
=
self
.
decode
(
label
[:,
1
:])
label
=
self
.
decode
(
label
[:,
1
:])
return
text
,
label
def
add_special_char
(
self
,
dict_character
):
dict_character
=
[
'blank'
,
'<unk>'
,
'<s>'
,
'</s>'
]
+
dict_character
dict_character
=
[
'blank'
,
'<unk>'
,
'<s>'
,
'</s>'
]
+
dict_character
return
dict_character
def
decode
(
self
,
text_index
,
text_prob
=
None
,
is_remove_duplicate
=
False
):
""" convert text-index into text-label. """
result_list
=
[]
...
...
@@ -203,10 +204,11 @@ class NRTRLabelDecode(BaseRecLabelDecode):
char_list
=
[]
conf_list
=
[]
for
idx
in
range
(
len
(
text_index
[
batch_idx
])):
if
text_index
[
batch_idx
][
idx
]
==
3
:
# end
if
text_index
[
batch_idx
][
idx
]
==
3
:
# end
break
try
:
char_list
.
append
(
self
.
character
[
int
(
text_index
[
batch_idx
][
idx
])])
char_list
.
append
(
self
.
character
[
int
(
text_index
[
batch_idx
][
idx
])])
except
:
continue
if
text_prob
is
not
None
:
...
...
@@ -218,7 +220,6 @@ class NRTRLabelDecode(BaseRecLabelDecode):
return
result_list
class
AttnLabelDecode
(
BaseRecLabelDecode
):
""" Convert between text-label and text-index """
...
...
@@ -256,7 +257,8 @@ class AttnLabelDecode(BaseRecLabelDecode):
if
idx
>
0
and
text_index
[
batch_idx
][
idx
-
1
]
==
text_index
[
batch_idx
][
idx
]:
continue
char_list
.
append
(
self
.
character
[
int
(
text_index
[
batch_idx
][
idx
])])
char_list
.
append
(
self
.
character
[
int
(
text_index
[
batch_idx
][
idx
])])
if
text_prob
is
not
None
:
conf_list
.
append
(
text_prob
[
batch_idx
][
idx
])
else
:
...
...
@@ -386,10 +388,9 @@ class SRNLabelDecode(BaseRecLabelDecode):
class
TableLabelDecode
(
object
):
""" """
def
__init__
(
self
,
character_dict_path
,
**
kwargs
):
list_character
,
list_elem
=
self
.
load_char_elem_dict
(
character_dict_path
)
def
__init__
(
self
,
character_dict_path
,
**
kwargs
):
list_character
,
list_elem
=
self
.
load_char_elem_dict
(
character_dict_path
)
list_character
=
self
.
add_special_char
(
list_character
)
list_elem
=
self
.
add_special_char
(
list_elem
)
self
.
dict_character
=
{}
...
...
@@ -408,7 +409,8 @@ class TableLabelDecode(object):
list_elem
=
[]
with
open
(
character_dict_path
,
"rb"
)
as
fin
:
lines
=
fin
.
readlines
()
substr
=
lines
[
0
].
decode
(
'utf-8'
).
strip
(
"
\n
"
).
strip
(
"
\r\n
"
).
split
(
"
\t
"
)
substr
=
lines
[
0
].
decode
(
'utf-8'
).
strip
(
"
\n
"
).
strip
(
"
\r\n
"
).
split
(
"
\t
"
)
character_num
=
int
(
substr
[
0
])
elem_num
=
int
(
substr
[
1
])
for
cno
in
range
(
1
,
1
+
character_num
):
...
...
@@ -428,14 +430,14 @@ class TableLabelDecode(object):
def
__call__
(
self
,
preds
):
structure_probs
=
preds
[
'structure_probs'
]
loc_preds
=
preds
[
'loc_preds'
]
if
isinstance
(
structure_probs
,
paddle
.
Tensor
):
if
isinstance
(
structure_probs
,
paddle
.
Tensor
):
structure_probs
=
structure_probs
.
numpy
()
if
isinstance
(
loc_preds
,
paddle
.
Tensor
):
if
isinstance
(
loc_preds
,
paddle
.
Tensor
):
loc_preds
=
loc_preds
.
numpy
()
structure_idx
=
structure_probs
.
argmax
(
axis
=
2
)
structure_probs
=
structure_probs
.
max
(
axis
=
2
)
structure_str
,
structure_pos
,
result_score_list
,
result_elem_idx_list
=
self
.
decode
(
structure_idx
,
structure_probs
,
'elem'
)
structure_str
,
structure_pos
,
result_score_list
,
result_elem_idx_list
=
self
.
decode
(
structure_idx
,
structure_probs
,
'elem'
)
res_html_code_list
=
[]
res_loc_list
=
[]
batch_num
=
len
(
structure_str
)
...
...
@@ -450,8 +452,13 @@ class TableLabelDecode(object):
res_loc
=
np
.
array
(
res_loc
)
res_html_code_list
.
append
(
res_html_code
)
res_loc_list
.
append
(
res_loc
)
return
{
'res_html_code'
:
res_html_code_list
,
'res_loc'
:
res_loc_list
,
'res_score_list'
:
result_score_list
,
'res_elem_idx_list'
:
result_elem_idx_list
,
'structure_str_list'
:
structure_str
}
return
{
'res_html_code'
:
res_html_code_list
,
'res_loc'
:
res_loc_list
,
'res_score_list'
:
result_score_list
,
'res_elem_idx_list'
:
result_elem_idx_list
,
'structure_str_list'
:
structure_str
}
def
decode
(
self
,
text_index
,
structure_probs
,
char_or_elem
):
"""convert text-label into text-index.
...
...
@@ -516,3 +523,82 @@ class TableLabelDecode(object):
assert
False
,
"Unsupport type %s in char_or_elem"
\
%
char_or_elem
return
idx
class
SARLabelDecode
(
BaseRecLabelDecode
):
""" Convert between text-label and text-index """
def
__init__
(
self
,
character_dict_path
=
None
,
character_type
=
'ch'
,
use_space_char
=
False
,
**
kwargs
):
super
(
SARLabelDecode
,
self
).
__init__
(
character_dict_path
,
character_type
,
use_space_char
)
self
.
rm_symbol
=
kwargs
.
get
(
'rm_symbol'
,
False
)
def
add_special_char
(
self
,
dict_character
):
beg_end_str
=
"<BOS/EOS>"
unknown_str
=
"<UKN>"
padding_str
=
"<PAD>"
dict_character
=
dict_character
+
[
unknown_str
]
self
.
unknown_idx
=
len
(
dict_character
)
-
1
dict_character
=
dict_character
+
[
beg_end_str
]
self
.
start_idx
=
len
(
dict_character
)
-
1
self
.
end_idx
=
len
(
dict_character
)
-
1
dict_character
=
dict_character
+
[
padding_str
]
self
.
padding_idx
=
len
(
dict_character
)
-
1
return
dict_character
def
decode
(
self
,
text_index
,
text_prob
=
None
,
is_remove_duplicate
=
False
):
""" convert text-index into text-label. """
result_list
=
[]
ignored_tokens
=
self
.
get_ignored_tokens
()
batch_size
=
len
(
text_index
)
for
batch_idx
in
range
(
batch_size
):
char_list
=
[]
conf_list
=
[]
for
idx
in
range
(
len
(
text_index
[
batch_idx
])):
if
text_index
[
batch_idx
][
idx
]
in
ignored_tokens
:
continue
if
int
(
text_index
[
batch_idx
][
idx
])
==
int
(
self
.
end_idx
):
if
text_prob
is
None
and
idx
==
0
:
continue
else
:
break
if
is_remove_duplicate
:
# only for predict
if
idx
>
0
and
text_index
[
batch_idx
][
idx
-
1
]
==
text_index
[
batch_idx
][
idx
]:
continue
char_list
.
append
(
self
.
character
[
int
(
text_index
[
batch_idx
][
idx
])])
if
text_prob
is
not
None
:
conf_list
.
append
(
text_prob
[
batch_idx
][
idx
])
else
:
conf_list
.
append
(
1
)
text
=
''
.
join
(
char_list
)
if
self
.
rm_symbol
:
comp
=
re
.
compile
(
'[^A-Z^a-z^0-9^
\u4e00
-
\u9fa5
]'
)
text
=
text
.
lower
()
text
=
comp
.
sub
(
''
,
text
)
result_list
.
append
((
text
,
np
.
mean
(
conf_list
)))
return
result_list
def
__call__
(
self
,
preds
,
label
=
None
,
*
args
,
**
kwargs
):
if
isinstance
(
preds
,
paddle
.
Tensor
):
preds
=
preds
.
numpy
()
preds_idx
=
preds
.
argmax
(
axis
=
2
)
preds_prob
=
preds
.
max
(
axis
=
2
)
text
=
self
.
decode
(
preds_idx
,
preds_prob
,
is_remove_duplicate
=
False
)
if
label
is
None
:
return
text
label
=
self
.
decode
(
label
,
is_remove_duplicate
=
False
)
return
text
,
label
def
get_ignored_tokens
(
self
):
return
[
self
.
padding_idx
]
ppocr/utils/save_load.py
View file @
ac98415b
...
...
@@ -108,14 +108,15 @@ def load_dygraph_params(config, model, logger, optimizer):
for
k1
,
k2
in
zip
(
state_dict
.
keys
(),
params
.
keys
()):
if
list
(
state_dict
[
k1
].
shape
)
==
list
(
params
[
k2
].
shape
):
new_state_dict
[
k1
]
=
params
[
k2
]
else
:
logger
.
info
(
f
"The shape of model params
{
k1
}
{
state_dict
[
k1
].
shape
}
not matched with loaded params
{
k2
}
{
params
[
k2
].
shape
}
!"
)
else
:
logger
.
info
(
f
"The shape of model params
{
k1
}
{
state_dict
[
k1
].
shape
}
not matched with loaded params
{
k2
}
{
params
[
k2
].
shape
}
!"
)
model
.
set_state_dict
(
new_state_dict
)
logger
.
info
(
f
"loaded pretrained_model successful from
{
pm
}
"
)
return
{}
def
load_pretrained_params
(
model
,
path
):
if
path
is
None
:
return
False
...
...
@@ -138,6 +139,7 @@ def load_pretrained_params(model, path):
print
(
f
"load pretrain successful from
{
path
}
"
)
return
model
def
save_model
(
model
,
optimizer
,
model_path
,
...
...
tests/configs/det_mv3_db.yml
0 → 100644
View file @
ac98415b
Global
:
use_gpu
:
false
epoch_num
:
5
log_smooth_window
:
20
print_batch_step
:
1
save_model_dir
:
./output/db_mv3/
save_epoch_step
:
1200
# evaluation is run every 2000 iterations
eval_batch_step
:
[
0
,
400
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_db/predicts_db.txt
Architecture
:
model_type
:
det
algorithm
:
DB
Transform
:
Backbone
:
name
:
MobileNetV3
scale
:
0.5
model_name
:
large
disable_se
:
True
Neck
:
name
:
DBFPN
out_channels
:
96
Head
:
name
:
DBHead
k
:
50
Loss
:
name
:
DBLoss
balance_loss
:
true
main_loss_type
:
DiceLoss
alpha
:
5
beta
:
10
ohem_ratio
:
3
Optimizer
:
name
:
Adam
#Momentum
#momentum: 0.9
beta1
:
0.9
beta2
:
0.999
lr
:
learning_rate
:
0.001
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
DBPostProcess
thresh
:
0.3
box_thresh
:
0.6
max_candidates
:
1000
unclip_ratio
:
1.5
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
Resize
:
# size: [640, 640]
-
MakeBorderMap
:
shrink_ratio
:
0.4
thresh_min
:
0.3
thresh_max
:
0.7
-
MakeShrinkMap
:
shrink_ratio
:
0.4
min_text_size
:
8
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
threshold_map'
,
'
threshold_mask'
,
'
shrink_map'
,
'
shrink_mask'
]
# the order of the dataloader list
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
num_workers
:
0
use_shared_memory
:
False
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
image_shape
:
[
736
,
1280
]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
0
use_shared_memory
:
False
tests/configs/det_r50_vd_db.yml
0 → 100644
View file @
ac98415b
Global
:
use_gpu
:
false
epoch_num
:
5
log_smooth_window
:
20
print_batch_step
:
1
save_model_dir
:
./output/db_mv3/
save_epoch_step
:
1200
# evaluation is run every 2000 iterations
eval_batch_step
:
[
0
,
400
]
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/det_db/predicts_db.txt
Architecture
:
model_type
:
det
algorithm
:
DB
Transform
:
Backbone
:
name
:
ResNet
#MobileNetV3
layers
:
50
Neck
:
name
:
DBFPN
out_channels
:
256
Head
:
name
:
DBHead
k
:
50
Loss
:
name
:
DBLoss
balance_loss
:
true
main_loss_type
:
DiceLoss
alpha
:
5
#5
beta
:
10
#10
ohem_ratio
:
3
Optimizer
:
name
:
Adam
#Momentum
#momentum: 0.9
beta1
:
0.9
beta2
:
0.999
lr
:
learning_rate
:
0.001
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
DBPostProcess
thresh
:
0.3
box_thresh
:
0.6
max_candidates
:
1000
unclip_ratio
:
1.5
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
Resize
:
# size: [640, 640]
-
MakeBorderMap
:
shrink_ratio
:
0.4
thresh_min
:
0.3
thresh_max
:
0.7
-
MakeShrinkMap
:
shrink_ratio
:
0.4
min_text_size
:
8
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
threshold_map'
,
'
threshold_mask'
,
'
shrink_map'
,
'
shrink_mask'
]
# the order of the dataloader list
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
num_workers
:
0
use_shared_memory
:
False
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
image_shape
:
[
736
,
1280
]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
0
use_shared_memory
:
False
tests/configs/rec_icdar15_r34_train.yml
0 → 100644
View file @
ac98415b
Global
:
use_gpu
:
true
epoch_num
:
72
log_smooth_window
:
20
print_batch_step
:
10
save_model_dir
:
./output/rec/ic15/
save_epoch_step
:
3
# evaluation is run every 2000 iterations
eval_batch_step
:
[
0
,
2000
]
cal_metric_during_train
:
True
pretrained_model
:
checkpoints
:
save_inference_dir
:
./
use_visualdl
:
False
infer_img
:
doc/imgs_words_en/word_10.png
# for data or label process
character_dict_path
:
ppocr/utils/en_dict.txt
character_type
:
EN
max_text_length
:
25
infer_mode
:
False
use_space_char
:
False
save_res_path
:
./output/rec/predicts_ic15.txt
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
learning_rate
:
0.0005
regularizer
:
name
:
'
L2'
factor
:
0
Architecture
:
model_type
:
rec
algorithm
:
CRNN
Transform
:
Backbone
:
name
:
ResNet
layers
:
34
Neck
:
name
:
SequenceEncoder
encoder_type
:
rnn
hidden_size
:
256
Head
:
name
:
CTCHead
fc_decay
:
0
Loss
:
name
:
CTCLoss
PostProcess
:
name
:
CTCLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/ic15_data/
label_file_list
:
[
"
./train_data/ic15_data/rec_gt_train.txt"
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
CTCLabelEncode
:
# Class handling label
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
100
]
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
label'
,
'
length'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
batch_size_per_card
:
256
drop_last
:
True
num_workers
:
8
use_shared_memory
:
False
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/ic15_data
label_file_list
:
[
"
./train_data/ic15_data/rec_gt_test.txt"
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
CTCLabelEncode
:
# Class handling label
-
RecResizeImg
:
image_shape
:
[
3
,
32
,
100
]
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
label'
,
'
length'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
256
num_workers
:
4
use_shared_memory
:
False
tests/ocr_det_params.txt
View file @
ac98415b
...
...
@@ -13,34 +13,34 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
null:null
##
trainer:norm_train|pact_train
norm_train:tools/train.py -c configs/det
/det
_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
pact_train:deploy/slim/quantization/quant.py -c configs/det
/det
_mv3_db.yml -o
fpgm_train:deploy/slim/prune/sensitivity_anal.py -c configs/det
/det
_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
norm_train:tools/train.py -c
tests/
configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
pact_train:deploy/slim/quantization/quant.py -c
tests/
configs/det_mv3_db.yml -o
fpgm_train:deploy/slim/prune/sensitivity_anal.py -c
tests/
configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
distill_train:null
null:null
null:null
##
===========================eval_params===========================
eval:tools/eval.py -c configs/det
/det
_mv3_db.yml -o
eval:tools/eval.py -c
tests/
configs/det_mv3_db.yml -o
null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.pretrained_model:
norm_export:tools/export_model.py -c configs/det
/det
_mv3_db.yml -o
quant_export:deploy/slim/quantization/export_model.py -c configs/det
/det
_mv3_db.yml -o
fpgm_export:deploy/slim/prune/export_prune_model.py -c configs/det
/det
_mv3_db.yml -o
norm_export:tools/export_model.py -c
tests/
configs/det_mv3_db.yml -o
quant_export:deploy/slim/quantization/export_model.py -c
tests/
configs/det_mv3_db.yml -o
fpgm_export:deploy/slim/prune/export_prune_model.py -c
tests/
configs/det_mv3_db.yml -o
distill_export:null
export1:null
export2:null
##
infer
_model:./inference/ch_ppocr_mobile_v2.0_det_
infer/
infer_export:
null
train
_model:./inference/ch_ppocr_mobile_v2.0_det_
train/best_accuracy
infer_export:
tools/export_model.py -c configs/det/det_mv3_db.yml -o
infer_quant:False
inference:tools/infer/predict_det.py
--use_gpu:True|False
--enable_mkldnn:True|False
--cpu_threads:
1|
6
--cpu_threads:6
--rec_batch_num:1
--use_tensorrt:False|True
--precision:fp32|fp16|int8
...
...
@@ -62,6 +62,21 @@ inference:./deploy/cpp_infer/build/ppocr det
--precision:fp32|fp16
--det_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/
--save_log_path
:null
null
:null
--benchmark:True
===========================serving_params===========================
trans_model:-m paddle_serving_client.convert
--dirname:./inference/ch_ppocr_mobile_v2.0_det_infer/
--model_filename:inference.pdmodel
--params_filename:inference.pdiparams
--serving_server:./deploy/pdserving/ppocr_det_mobile_2.0_serving/
--serving_client:./deploy/pdserving/ppocr_det_mobile_2.0_client/
serving_dir:./deploy/pdserving
web_service:web_service_det.py --config=config.yml --opt op.det.concurrency=1
op.det.local_service_conf.devices:null|0
op.det.local_service_conf.use_mkldnn:True|False
op.det.local_service_conf.thread_num:1|6
op.det.local_service_conf.use_trt:False|True
op.det.local_service_conf.precision:fp32|fp16|int8
pipline:pipeline_http_client.py --image_dir=../../doc/imgs
tests/ocr_det_server_params.txt
View file @
ac98415b
...
...
@@ -13,7 +13,7 @@ train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
null:null
##
trainer:norm_train|pact_train
norm_train:tools/train.py -c configs/det
/det
_r50_vd_db.yml -o Global.pretrained_model=""
norm_train:tools/train.py -c
tests/
configs/det_r50_vd_db.yml -o Global.pretrained_model=""
pact_train:null
fpgm_train:null
distill_train:null
...
...
@@ -21,13 +21,13 @@ null:null
null:null
##
===========================eval_params===========================
eval:tools/eval.py -c configs/det
/det_mv3
_db.yml -o
eval:tools/eval.py -c
tests/
configs/det
_r50_vd
_db.yml -o
null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.pretrained_model:
norm_export:tools/export_model.py -c configs/det
/det
_r50_vd_db.yml -o
norm_export:tools/export_model.py -c
tests/
configs/det_r50_vd_db.yml -o
quant_export:null
fpgm_export:null
distill_export:null
...
...
tests/ocr_ppocr_mobile_params.txt
0 → 100644
View file @
ac98415b
===========================train_params===========================
model_name:ocr_system_mobile
python:python3.7
gpu_list:null
Global.use_gpu:null
Global.auto_cast:null
Global.epoch_num:null
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:null
Global.pretrained_model:null
train_model_name:null
train_infer_img_dir:null
null:null
##
trainer:
norm_train:null
pact_train:null
fpgm_train:null
distill_train:null
null:null
null:null
##
===========================eval_params===========================
eval:null
null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.pretrained_model:
norm_export:null
quant_export:null
fpgm_export:null
distill_export:null
export1:null
export2:null
##
infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/
infer_export:null
infer_quant:False
inference:tools/infer/predict_system.py
--use_gpu:True
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1
--use_tensorrt:False|True
--precision:fp32|fp16|int8
--det_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/
--save_log_path:null
--benchmark:True
--rec_model_dir:./inference/ch_ppocr_mobile_v2.0_rec_infer/
===========================cpp_infer_params===========================
use_opencv:True
infer_model:./inference/ch_ppocr_mobile_v2.0_det_infer/
infer_quant:False
inference:./deploy/cpp_infer/build/ppocr system
--use_gpu:True|False
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1
--use_tensorrt:False|True
--precision:fp32|fp16
--det_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/
--rec_model_dir:./inference/ch_ppocr_mobile_v2.0_rec_infer/
--benchmark:True
tests/ocr_ppocr_server_params.txt
0 → 100644
View file @
ac98415b
===========================train_params===========================
model_name:ocr_system_server
python:python3.7
gpu_list:null
Global.use_gpu:null
Global.auto_cast:null
Global.epoch_num:null
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:null
Global.pretrained_model:null
train_model_name:null
train_infer_img_dir:null
null:null
##
trainer:
norm_train:null
pact_train:null
fpgm_train:null
distill_train:null
null:null
null:null
##
===========================eval_params===========================
eval:null
null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.pretrained_model:
norm_export:null
quant_export:null
fpgm_export:null
distill_export:null
export1:null
export2:null
##
infer_model:./inference/ch_ppocr_server_v2.0_det_infer/
infer_export:null
infer_quant:False
inference:tools/infer/predict_system.py
--use_gpu:True
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1
--use_tensorrt:False|True
--precision:fp32|fp16|int8
--det_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/
--save_log_path:null
--benchmark:True
--rec_model_dir:./inference/ch_ppocr_server_v2.0_rec_infer/
===========================cpp_infer_params===========================
use_opencv:True
infer_model:./inference/ch_ppocr_server_v2.0_det_infer/
infer_quant:False
inference:./deploy/cpp_infer/build/ppocr system
--use_gpu:True|False
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1
--use_tensorrt:False|True
--precision:fp32|fp16
--det_model_dir:
--image_dir:./inference/ch_det_data_50/all-sum-510/
--rec_model_dir:./inference/ch_ppocr_server_v2.0_rec_infer/
--benchmark:True
\ No newline at end of file
tests/ocr_rec_params.txt
View file @
ac98415b
===========================train_params===========================
model_name:ocr_rec
python:python3.7
gpu_list:0|
2,3
gpu_list:0|
0,1
Global.use_gpu:True|True
Global.auto_cast:null
Global.epoch_num:lite_train_infer=2|whole_train_infer=300
...
...
@@ -9,7 +9,7 @@ Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_infer=128|whole_train_infer=128
Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./
train_data/ic15_data/train
train_infer_img_dir:./
inference/rec_inference
null:null
##
trainer:norm_train|pact_train
...
...
@@ -41,7 +41,7 @@ inference:tools/infer/predict_rec.py
--use_gpu:True|False
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1
--rec_batch_num:1
|6
--use_tensorrt:True|False
--precision:fp32|fp16|int8
--rec_model_dir:
...
...
@@ -49,3 +49,33 @@ inference:tools/infer/predict_rec.py
--save_log_path:./test/output/
--benchmark:True
null:null
===========================cpp_infer_params===========================
use_opencv:True
infer_model:./inference/ch_ppocr_mobile_v2.0_rec_infer/
infer_quant:False
inference:./deploy/cpp_infer/build/ppocr rec
--use_gpu:True|False
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1
--use_tensorrt:False|True
--precision:fp32|fp16
--rec_model_dir:
--image_dir:./inference/rec_inference/
null:null
--benchmark:True
===========================serving_params===========================
trans_model:-m paddle_serving_client.convert
--dirname:./inference/ch_ppocr_mobile_v2.0_rec_infer/
--model_filename:inference.pdmodel
--params_filename:inference.pdiparams
--serving_server:./deploy/pdserving/ppocr_rec_mobile_2.0_serving/
--serving_client:./deploy/pdserving/ppocr_rec_mobile_2.0_client/
serving_dir:./deploy/pdserving
web_service:web_service_rec.py --config=config.yml --opt op.rec.concurrency=1
op.rec.local_service_conf.devices:null|0
op.rec.local_service_conf.use_mkldnn:True|False
op.rec.local_service_conf.thread_num:1|6
op.rec.local_service_conf.use_trt:False|True
op.rec.local_service_conf.precision:fp32|fp16|int8
pipline:pipeline_http_client.py --image_dir=../../doc/imgs_words_en
\ No newline at end of file
tests/ocr_rec_server_params.txt
0 → 100644
View file @
ac98415b
===========================train_params===========================
model_name:ocr_server_rec
python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
Global.auto_cast:null
Global.epoch_num:lite_train_infer=2|whole_train_infer=300
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_infer=128|whole_train_infer=128
Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./inference/rec_inference
null:null
##
trainer:norm_train|pact_train
norm_train:tools/train.py -c tests/configs/rec_icdar15_r34_train.yml -o
pact_train:deploy/slim/quantization/quant.py -c tests/configs/rec_icdar15_r34_train.yml -o
fpgm_train:null
distill_train:null
null:null
null:null
##
===========================eval_params===========================
eval:tools/eval.py -c tests/configs/rec_icdar15_r34_train.yml -o
null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.pretrained_model:
norm_export:tools/export_model.py -c tests/configs/rec_icdar15_r34_train.yml -o
quant_export:deploy/slim/quantization/export_model.py -c tests/configs/rec_icdar15_r34_train.yml -o
fpgm_export:null
distill_export:null
export1:null
export2:null
##
infer_model:./inference/ch_ppocr_server_v2.0_rec_infer/
infer_export:null
infer_quant:False
inference:tools/infer/predict_rec.py
--use_gpu:True|False
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1|6
--use_tensorrt:True|False
--precision:fp32|fp16|int8
--rec_model_dir:
--image_dir:./inference/rec_inference
--save_log_path:./test/output/
--benchmark:True
null:null
===========================cpp_infer_params===========================
use_opencv:True
infer_model:./inference/ch_ppocr_server_v2.0_rec_infer/
infer_quant:False
inference:./deploy/cpp_infer/build/ppocr rec
--use_gpu:True|False
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1
--use_tensorrt:False|True
--precision:fp32|fp16
--rec_model_dir:
--image_dir:./inference/rec_inference/
null:null
--benchmark:True
===========================serving_params===========================
trans_model:-m paddle_serving_client.convert
--dirname:./inference/ch_ppocr_server_v2.0_rec_infer/
--model_filename:inference.pdmodel
--params_filename:inference.pdiparams
--serving_server:./deploy/pdserving/ppocr_rec_server_2.0_serving/
--serving_client:./deploy/pdserving/ppocr_rec_server_2.0_client/
serving_dir:./deploy/pdserving
web_service:web_service_rec.py --config=config.yml --opt op.rec.concurrency=1
op.rec.local_service_conf.devices:null|0
op.rec.local_service_conf.use_mkldnn:True|False
op.rec.local_service_conf.thread_num:1|6
op.rec.local_service_conf.use_trt:False|True
op.rec.local_service_conf.precision:fp32|fp16|int8
pipline:pipeline_http_client.py --image_dir=../../doc/imgs_words_en
\ No newline at end of file
tests/prepare.sh
View file @
ac98415b
#!/bin/bash
FILENAME
=
$1
# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'cpp_infer']
# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer', 'infer', 'cpp_infer', 'serving_infer']
MODE
=
$2
dataline
=
$(
cat
${
FILENAME
}
)
...
...
@@ -40,11 +41,13 @@ if [ ${MODE} = "lite_train_infer" ];then
rm
-rf
./train_data/ic15_data
wget
-nc
-P
./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar
wget
-nc
-P
./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar
# todo change to bcebos
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar
wget
-nc
-P
./deploy/slim/prune https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/sen.pickle
cd
./train_data/
&&
tar
xf icdar2015_lite.tar
&&
tar
xf ic15_data.tar
ln
-s
./icdar2015_lite ./icdar2015
cd
../
cd
./inference
&&
tar
xf rec_inference.tar
&&
cd
../
elif
[
${
MODE
}
=
"whole_train_infer"
]
;
then
wget
-nc
-P
./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams
rm
-rf
./train_data/icdar2015
...
...
@@ -61,64 +64,113 @@ elif [ ${MODE} = "whole_infer" ];then
cd
./train_data/
&&
tar
xf icdar2015_infer.tar
&&
tar
xf ic15_data.tar
ln
-s
./icdar2015_infer ./icdar2015
cd
../
elif
[
${
MODE
}
=
"infer"
]
||
[
${
MODE
}
=
"cpp_infer"
]
;
then
elif
[
${
MODE
}
=
"infer"
]
;
then
if
[
${
model_name
}
=
"ocr_det"
]
;
then
eval_model_name
=
"ch_ppocr_mobile_v2.0_det_
infer
"
eval_model_name
=
"ch_ppocr_mobile_v2.0_det_
train
"
rm
-rf
./train_data/icdar2015
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_
infer
.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_
train
.tar
cd
./inference
&&
tar
xf
${
eval_model_name
}
.tar
&&
tar
xf ch_det_data_50.tar
&&
cd
../
elif
[
${
model_name
}
=
"ocr_server_det"
]
;
then
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
cd
./inference
&&
tar
xf ch_ppocr_server_v2.0_det_infer.tar
&&
tar
xf ch_det_data_50.tar
&&
cd
../
else
elif
[
${
model_name
}
=
"ocr_system_mobile"
]
;
then
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
cd
./inference
&&
tar
xf ch_ppocr_mobile_v2.0_det_infer.tar
&&
tar
xf ch_ppocr_mobile_v2.0_rec_infer.tar
&&
tar
xf ch_det_data_50.tar
&&
cd
../
elif
[
${
model_name
}
=
"ocr_system_server"
]
;
then
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar
cd
./inference
&&
tar
xf ch_ppocr_server_v2.0_det_infer.tar
&&
tar
xf ch_ppocr_server_v2.0_rec_infer.tar
&&
tar
xf ch_det_data_50.tar
&&
cd
../
elif
[
${
model_name
}
=
"ocr_rec"
]
;
then
rm
-rf
./train_data/ic15_data
eval_model_name
=
"ch_ppocr_mobile_v2.0_rec_infer"
wget
-nc
-P
./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar
wget
-nc
-P
./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
cd
./inference
&&
tar
xf
${
eval_model_name
}
.tar
&&
tar
xf rec_inference.tar
&&
cd
../
elif
[
${
model_name
}
=
"ocr_server_rec"
]
;
then
rm
-rf
./train_data/ic15_data
eval_model_name
=
"ch_ppocr_server_v2.0_rec_infer"
wget
-nc
-P
./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar
cd
./inference
&&
tar
xf
${
eval_model_name
}
.tar
&&
tar
xf rec_inference.tar
&&
cd
../
fi
elif
[
${
MODE
}
=
"cpp_infer"
]
;
then
if
[
${
model_name
}
=
"ocr_det"
]
;
then
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
cd
./inference
&&
tar
xf ch_ppocr_mobile_v2.0_det_infer.tar
&&
tar
xf ch_det_data_50.tar
&&
cd
../
elif
[
${
model_name
}
=
"ocr_rec"
]
;
then
wget
-nc
-P
./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
cd
./inference
&&
tar
xf
${
eval_model_name
}
.tar
&&
tar
xf ic15_data.tar
&&
cd
../
cd
./inference
&&
tar
xf ch_ppocr_mobile_v2.0_rec_infer.tar
&&
tar
xf rec_inference.tar
&&
cd
../
elif
[
${
model_name
}
=
"ocr_system"
]
;
then
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
cd
./inference
&&
tar
xf ch_ppocr_mobile_v2.0_det_infer.tar
&&
tar
xf ch_ppocr_mobile_v2.0_rec_infer.tar
&&
tar
xf ch_det_data_50.tar
&&
cd
../
fi
fi
if
[
${
MODE
}
=
"serving_infer"
]
;
then
# prepare serving env
python_name
=
$(
func_parser_value
"
${
lines
[2]
}
"
)
wget https://paddle-serving.bj.bcebos.com/chain/paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl
${
python_name
}
-m
pip
install install
paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl
${
python_name
}
-m
pip
install
paddle_serving_client
==
0.6.1
${
python_name
}
-m
pip
install
paddle-serving-app
==
0.6.3
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar
wget
-nc
-P
./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar
cd
./inference
&&
tar
xf ch_ppocr_mobile_v2.0_det_infer.tar
&&
tar
xf ch_ppocr_mobile_v2.0_rec_infer.tar
&&
tar
xf ch_ppocr_server_v2.0_rec_infer.tar
&&
tar
xf ch_ppocr_server_v2.0_det_infer.tar
cd
../
fi
if
[
${
MODE
}
=
"cpp_infer"
]
;
then
cd
deploy/cpp_infer
use_opencv
=
$(
func_parser_value
"
${
lines
[52]
}
"
)
if
[
${
use_opencv
}
=
"True"
]
;
then
echo
"################### build opencv ###################"
rm
-rf
3.4.7.tar.gz opencv-3.4.7/
wget https://github.com/opencv/opencv/archive/3.4.7.tar.gz
tar
-xf
3.4.7.tar.gz
if
[
-d
"opencv-3.4.7/opencv3/"
]
&&
[
$(
md5sum
opencv-3.4.7.tar.gz |
awk
-F
' '
'{print $1}'
)
=
"faa2b5950f8bee3f03118e600c74746a"
]
;
then
echo
"################### build opencv skipped ###################"
else
echo
"################### build opencv ###################"
rm
-rf
opencv-3.4.7.tar.gz opencv-3.4.7/
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/opencv-3.4.7.tar.gz
tar
-xf
opencv-3.4.7.tar.gz
cd
opencv-3.4.7/
install_path
=
$(
pwd
)
/
opencv-3.4.7/
opencv3
cd
opencv-3.4.7/
install_path
=
$(
pwd
)
/opencv3
rm
-rf
build
mkdir
build
cd
build
rm
-rf
build
mkdir
build
cd
build
cmake ..
\
-DCMAKE_INSTALL_PREFIX
=
${
install_path
}
\
-DCMAKE_BUILD_TYPE
=
Release
\
-DBUILD_SHARED_LIBS
=
OFF
\
-DWITH_IPP
=
OFF
\
-DBUILD_IPP_IW
=
OFF
\
-DWITH_LAPACK
=
OFF
\
-DWITH_EIGEN
=
OFF
\
-DCMAKE_INSTALL_LIBDIR
=
lib64
\
-DWITH_ZLIB
=
ON
\
-DBUILD_ZLIB
=
ON
\
-DWITH_JPEG
=
ON
\
-DBUILD_JPEG
=
ON
\
-DWITH_PNG
=
ON
\
-DBUILD_PNG
=
ON
\
-DWITH_TIFF
=
ON
\
-DBUILD_TIFF
=
ON
cmake ..
\
-DCMAKE_INSTALL_PREFIX
=
${
install_path
}
\
-DCMAKE_BUILD_TYPE
=
Release
\
-DBUILD_SHARED_LIBS
=
OFF
\
-DWITH_IPP
=
OFF
\
-DBUILD_IPP_IW
=
OFF
\
-DWITH_LAPACK
=
OFF
\
-DWITH_EIGEN
=
OFF
\
-DCMAKE_INSTALL_LIBDIR
=
lib64
\
-DWITH_ZLIB
=
ON
\
-DBUILD_ZLIB
=
ON
\
-DWITH_JPEG
=
ON
\
-DBUILD_JPEG
=
ON
\
-DWITH_PNG
=
ON
\
-DBUILD_PNG
=
ON
\
-DWITH_TIFF
=
ON
\
-DBUILD_TIFF
=
ON
make
-j
make
install
cd
../
echo
"################### build opencv finished ###################"
make
-j
make
install
cd
../
echo
"################### build opencv finished ###################"
fi
fi
...
...
@@ -149,4 +201,4 @@ if [ ${MODE} = "cpp_infer" ];then
make
-j
echo
"################### build PaddleOCR demo finished ###################"
fi
\ No newline at end of file
fi
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment