Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
9f62b610
Commit
9f62b610
authored
Jan 27, 2022
by
zhiminzhang0830
Browse files
add fcenet
parent
5876f3f4
Changes
17
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
2630 additions
and
38 deletions
+2630
-38
configs/det/det_r50_fce_ctw.yml
configs/det/det_r50_fce_ctw.yml
+141
-0
ppocr/data/imaug/__init__.py
ppocr/data/imaug/__init__.py
+3
-0
ppocr/data/imaug/fce_aug.py
ppocr/data/imaug/fce_aug.py
+633
-0
ppocr/data/imaug/fce_targets.py
ppocr/data/imaug/fce_targets.py
+670
-0
ppocr/data/imaug/operators.py
ppocr/data/imaug/operators.py
+11
-2
ppocr/losses/__init__.py
ppocr/losses/__init__.py
+4
-3
ppocr/losses/det_fce_loss.py
ppocr/losses/det_fce_loss.py
+212
-0
ppocr/metrics/__init__.py
ppocr/metrics/__init__.py
+2
-2
ppocr/metrics/det_metric.py
ppocr/metrics/det_metric.py
+83
-2
ppocr/modeling/backbones/det_resnet_vd.py
ppocr/modeling/backbones/det_resnet_vd.py
+127
-22
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+3
-2
ppocr/modeling/heads/det_fce_head.py
ppocr/modeling/heads/det_fce_head.py
+100
-0
ppocr/modeling/necks/__init__.py
ppocr/modeling/necks/__init__.py
+5
-1
ppocr/modeling/necks/fce_fpn.py
ppocr/modeling/necks/fce_fpn.py
+262
-0
ppocr/postprocess/__init__.py
ppocr/postprocess/__init__.py
+4
-3
ppocr/postprocess/fce_postprocess.py
ppocr/postprocess/fce_postprocess.py
+368
-0
train.sh
train.sh
+2
-1
No files found.
configs/det/det_r50_fce_ctw.yml
0 → 100755
View file @
9f62b610
Global
:
use_gpu
:
true
epoch_num
:
1500
log_smooth_window
:
20
print_batch_step
:
20
save_model_dir
:
./output/fce_r50_ctw/
save_epoch_step
:
100
# evaluation is run every 835 iterations
eval_batch_step
:
[
0
,
835
]
cal_metric_during_train
:
False
pretrained_model
:
../pretrain_models/ResNet50_vd_ssld_pretrained
checkpoints
:
#output/fce_r50_ctw/latest
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_en/img_10.jpg
save_res_path
:
./output/fce_r50_ctw/predicts_ctw.txt
Architecture
:
model_type
:
det
algorithm
:
FCE
Transform
:
Backbone
:
name
:
ResNet
layers
:
50
dcn_stage
:
[
False
,
True
,
True
,
True
]
out_indices
:
[
1
,
2
,
3
]
Neck
:
name
:
FCEFPN
in_channels
:
[
512
,
1024
,
2048
]
out_channels
:
256
has_extra_convs
:
False
extra_stage
:
0
Head
:
name
:
FCEHead
in_channels
:
256
scales
:
[
8
,
16
,
32
]
fourier_degree
:
5
Loss
:
name
:
FCELoss
fourier_degree
:
5
num_sample
:
50
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
learning_rate
:
0.0001
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
FCEPostProcess
scales
:
[
8
,
16
,
32
]
alpha
:
1.0
beta
:
1.0
fourier_degree
:
5
Metric
:
name
:
DetFCEMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
/data/Dataset/OCR_det/ctw1500/imgs/
label_file_list
:
-
/data/Dataset/OCR_det/ctw1500/imgs/training.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
ignore_orientation
:
True
-
DetLabelEncode
:
# Class handling label
-
ColorJitter
:
brightness
:
0.142
saturation
:
0.5
contrast
:
0.5
-
RandomScaling
:
-
RandomCropFlip
:
crop_ratio
:
0.5
-
RandomCropPolyInstances
:
crop_ratio
:
0.8
min_side_ratio
:
0.3
-
RandomRotatePolyInstances
:
rotate_ratio
:
0.5
max_angle
:
30
pad_with_fixed_color
:
False
-
SquareResizePad
:
target_size
:
800
pad_ratio
:
0.6
-
IaaAugment
:
augmenter_args
:
-
{
'
type'
:
Fliplr
,
'
args'
:
{
'
p'
:
0.5
}
}
-
FCENetTargets
:
fourier_degree
:
5
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
p3_maps'
,
'
p4_maps'
,
'
p5_maps'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
6
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
/data/Dataset/OCR_det/ctw1500/imgs/
label_file_list
:
-
/data/Dataset/OCR_det/ctw1500/imgs/test.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
ignore_orientation
:
True
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
# resize_long: 1280
rescale_img
:
[
1080
,
736
]
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
Pad
:
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
\ No newline at end of file
ppocr/data/imaug/__init__.py
View file @
9f62b610
...
@@ -36,6 +36,9 @@ from .gen_table_mask import *
...
@@ -36,6 +36,9 @@ from .gen_table_mask import *
from
.vqa
import
*
from
.vqa
import
*
from
.fce_aug
import
*
from
.fce_targets
import
FCENetTargets
def
transform
(
data
,
ops
=
None
):
def
transform
(
data
,
ops
=
None
):
""" transform """
""" transform """
...
...
ppocr/data/imaug/fce_aug.py
0 → 100644
View file @
9f62b610
This diff is collapsed.
Click to expand it.
ppocr/data/imaug/fce_targets.py
0 → 100644
View file @
9f62b610
This diff is collapsed.
Click to expand it.
ppocr/data/imaug/operators.py
View file @
9f62b610
...
@@ -60,9 +60,14 @@ class DecodeImage(object):
...
@@ -60,9 +60,14 @@ class DecodeImage(object):
class
NRTRDecodeImage
(
object
):
class
NRTRDecodeImage
(
object
):
""" decode image """
""" decode image """
def
__init__
(
self
,
img_mode
=
'RGB'
,
channel_first
=
False
,
**
kwargs
):
def
__init__
(
self
,
img_mode
=
'RGB'
,
channel_first
=
False
,
ignore_orientation
=
False
,
**
kwargs
):
self
.
img_mode
=
img_mode
self
.
img_mode
=
img_mode
self
.
channel_first
=
channel_first
self
.
channel_first
=
channel_first
self
.
ignore_orientation
=
ignore_orientation
def
__call__
(
self
,
data
):
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
img
=
data
[
'image'
]
...
@@ -74,6 +79,10 @@ class NRTRDecodeImage(object):
...
@@ -74,6 +79,10 @@ class NRTRDecodeImage(object):
img
)
>
0
,
"invalid input 'img' in DecodeImage"
img
)
>
0
,
"invalid input 'img' in DecodeImage"
img
=
np
.
frombuffer
(
img
,
dtype
=
'uint8'
)
img
=
np
.
frombuffer
(
img
,
dtype
=
'uint8'
)
if
self
.
ignore_orientation
:
img
=
cv2
.
imdecode
(
img
,
cv2
.
IMREAD_IGNORE_ORIENTATION
|
cv2
.
IMREAD_COLOR
)
else
:
img
=
cv2
.
imdecode
(
img
,
1
)
img
=
cv2
.
imdecode
(
img
,
1
)
if
img
is
None
:
if
img
is
None
:
...
...
ppocr/losses/__init__.py
View file @
9f62b610
...
@@ -24,6 +24,7 @@ from .det_db_loss import DBLoss
...
@@ -24,6 +24,7 @@ from .det_db_loss import DBLoss
from
.det_east_loss
import
EASTLoss
from
.det_east_loss
import
EASTLoss
from
.det_sast_loss
import
SASTLoss
from
.det_sast_loss
import
SASTLoss
from
.det_pse_loss
import
PSELoss
from
.det_pse_loss
import
PSELoss
from
.det_fce_loss
import
FCELoss
# rec loss
# rec loss
from
.rec_ctc_loss
import
CTCLoss
from
.rec_ctc_loss
import
CTCLoss
...
@@ -55,9 +56,9 @@ from .vqa_token_layoutlm_loss import VQASerTokenLayoutLMLoss
...
@@ -55,9 +56,9 @@ from .vqa_token_layoutlm_loss import VQASerTokenLayoutLMLoss
def
build_loss
(
config
):
def
build_loss
(
config
):
support_dict
=
[
support_dict
=
[
'DBLoss'
,
'PSELoss'
,
'EASTLoss'
,
'SASTLoss'
,
'
CTC
Loss'
,
'C
ls
Loss'
,
'DBLoss'
,
'PSELoss'
,
'EASTLoss'
,
'SASTLoss'
,
'
FCE
Loss'
,
'C
TC
Loss'
,
'AttentionLoss'
,
'SRNLoss'
,
'PGLoss'
,
'CombinedLoss'
,
'NRTRLoss'
,
'ClsLoss'
,
'AttentionLoss'
,
'SRNLoss'
,
'PGLoss'
,
'CombinedLoss'
,
'TableAttentionLoss'
,
'SARLoss'
,
'AsterLoss'
,
'SDMGRLoss'
,
'NRTRLoss'
,
'TableAttentionLoss'
,
'SARLoss'
,
'AsterLoss'
,
'SDMGRLoss'
,
'VQASerTokenLayoutLMLoss'
,
'LossFromOutput'
'VQASerTokenLayoutLMLoss'
,
'LossFromOutput'
]
]
config
=
copy
.
deepcopy
(
config
)
config
=
copy
.
deepcopy
(
config
)
...
...
ppocr/losses/det_fce_loss.py
0 → 100644
View file @
9f62b610
import
numpy
as
np
from
paddle
import
nn
import
paddle
import
paddle.nn.functional
as
F
from
functools
import
partial
def
multi_apply
(
func
,
*
args
,
**
kwargs
):
pfunc
=
partial
(
func
,
**
kwargs
)
if
kwargs
else
func
map_results
=
map
(
pfunc
,
*
args
)
return
tuple
(
map
(
list
,
zip
(
*
map_results
)))
class
FCELoss
(
nn
.
Layer
):
"""The class for implementing FCENet loss
FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped
Text Detection
[https://arxiv.org/abs/2104.10442]
Args:
fourier_degree (int) : The maximum Fourier transform degree k.
num_sample (int) : The sampling points number of regression
loss. If it is too small, fcenet tends to be overfitting.
ohem_ratio (float): the negative/positive ratio in OHEM.
"""
def
__init__
(
self
,
fourier_degree
,
num_sample
,
ohem_ratio
=
3.
):
super
().
__init__
()
self
.
fourier_degree
=
fourier_degree
self
.
num_sample
=
num_sample
self
.
ohem_ratio
=
ohem_ratio
def
forward
(
self
,
preds
,
labels
):
assert
isinstance
(
preds
,
dict
)
preds
=
preds
[
'levels'
]
p3_maps
,
p4_maps
,
p5_maps
=
labels
[
1
:]
assert
p3_maps
[
0
].
shape
[
0
]
==
4
*
self
.
fourier_degree
+
5
,
\
'fourier degree not equal in FCEhead and FCEtarget'
# device = preds[0][0].device
# to tensor
gts
=
[
p3_maps
,
p4_maps
,
p5_maps
]
for
idx
,
maps
in
enumerate
(
gts
):
gts
[
idx
]
=
paddle
.
to_tensor
(
np
.
stack
(
maps
))
losses
=
multi_apply
(
self
.
forward_single
,
preds
,
gts
)
loss_tr
=
paddle
.
to_tensor
(
0.
).
astype
(
'float32'
)
loss_tcl
=
paddle
.
to_tensor
(
0.
).
astype
(
'float32'
)
loss_reg_x
=
paddle
.
to_tensor
(
0.
).
astype
(
'float32'
)
loss_reg_y
=
paddle
.
to_tensor
(
0.
).
astype
(
'float32'
)
loss_all
=
paddle
.
to_tensor
(
0.
).
astype
(
'float32'
)
for
idx
,
loss
in
enumerate
(
losses
):
loss_all
+=
sum
(
loss
)
if
idx
==
0
:
loss_tr
+=
sum
(
loss
)
elif
idx
==
1
:
loss_tcl
+=
sum
(
loss
)
elif
idx
==
2
:
loss_reg_x
+=
sum
(
loss
)
else
:
loss_reg_y
+=
sum
(
loss
)
results
=
dict
(
loss
=
loss_all
,
loss_text
=
loss_tr
,
loss_center
=
loss_tcl
,
loss_reg_x
=
loss_reg_x
,
loss_reg_y
=
loss_reg_y
,
)
return
results
def
forward_single
(
self
,
pred
,
gt
):
cls_pred
=
paddle
.
transpose
(
pred
[
0
],
(
0
,
2
,
3
,
1
))
reg_pred
=
paddle
.
transpose
(
pred
[
1
],
(
0
,
2
,
3
,
1
))
gt
=
paddle
.
transpose
(
gt
,
(
0
,
2
,
3
,
1
))
k
=
2
*
self
.
fourier_degree
+
1
tr_pred
=
paddle
.
reshape
(
cls_pred
[:,
:,
:,
:
2
],
(
-
1
,
2
))
tcl_pred
=
paddle
.
reshape
(
cls_pred
[:,
:,
:,
2
:],
(
-
1
,
2
))
x_pred
=
paddle
.
reshape
(
reg_pred
[:,
:,
:,
0
:
k
],
(
-
1
,
k
))
y_pred
=
paddle
.
reshape
(
reg_pred
[:,
:,
:,
k
:
2
*
k
],
(
-
1
,
k
))
tr_mask
=
gt
[:,
:,
:,
:
1
].
reshape
([
-
1
])
tcl_mask
=
gt
[:,
:,
:,
1
:
2
].
reshape
([
-
1
])
train_mask
=
gt
[:,
:,
:,
2
:
3
].
reshape
([
-
1
])
x_map
=
paddle
.
reshape
(
gt
[:,
:,
:,
3
:
3
+
k
],
(
-
1
,
k
))
y_map
=
paddle
.
reshape
(
gt
[:,
:,
:,
3
+
k
:],
(
-
1
,
k
))
tr_train_mask
=
(
train_mask
*
tr_mask
).
astype
(
'bool'
)
tr_train_mask2
=
paddle
.
concat
(
[
tr_train_mask
.
unsqueeze
(
1
),
tr_train_mask
.
unsqueeze
(
1
)],
axis
=
1
)
# tr loss
loss_tr
=
self
.
ohem
(
tr_pred
,
tr_mask
,
train_mask
)
# import pdb; pdb.set_trace()
# tcl loss
loss_tcl
=
paddle
.
to_tensor
(
0.
).
astype
(
'float32'
)
tr_neg_mask
=
tr_train_mask
.
logical_not
()
tr_neg_mask2
=
paddle
.
concat
(
[
tr_neg_mask
.
unsqueeze
(
1
),
tr_neg_mask
.
unsqueeze
(
1
)],
axis
=
1
)
if
tr_train_mask
.
sum
().
item
()
>
0
:
loss_tcl_pos
=
F
.
cross_entropy
(
tcl_pred
.
masked_select
(
tr_train_mask2
).
reshape
([
-
1
,
2
]),
tcl_mask
.
masked_select
(
tr_train_mask
).
astype
(
'int64'
))
loss_tcl_neg
=
F
.
cross_entropy
(
tcl_pred
.
masked_select
(
tr_neg_mask2
).
reshape
([
-
1
,
2
]),
tcl_mask
.
masked_select
(
tr_neg_mask
).
astype
(
'int64'
))
loss_tcl
=
loss_tcl_pos
+
0.5
*
loss_tcl_neg
# regression loss
loss_reg_x
=
paddle
.
to_tensor
(
0.
).
astype
(
'float32'
)
loss_reg_y
=
paddle
.
to_tensor
(
0.
).
astype
(
'float32'
)
if
tr_train_mask
.
sum
().
item
()
>
0
:
weight
=
(
tr_mask
.
masked_select
(
tr_train_mask
.
astype
(
'bool'
))
.
astype
(
'float32'
)
+
tcl_mask
.
masked_select
(
tr_train_mask
.
astype
(
'bool'
)).
astype
(
'float32'
))
/
2
weight
=
weight
.
reshape
([
-
1
,
1
])
ft_x
,
ft_y
=
self
.
fourier2poly
(
x_map
,
y_map
)
ft_x_pre
,
ft_y_pre
=
self
.
fourier2poly
(
x_pred
,
y_pred
)
dim
=
ft_x
.
shape
[
1
]
tr_train_mask3
=
paddle
.
concat
(
[
tr_train_mask
.
unsqueeze
(
1
)
for
i
in
range
(
dim
)],
axis
=
1
)
loss_reg_x
=
paddle
.
mean
(
weight
*
F
.
smooth_l1_loss
(
ft_x_pre
.
masked_select
(
tr_train_mask3
).
reshape
([
-
1
,
dim
]),
ft_x
.
masked_select
(
tr_train_mask3
).
reshape
([
-
1
,
dim
]),
reduction
=
'none'
))
loss_reg_y
=
paddle
.
mean
(
weight
*
F
.
smooth_l1_loss
(
ft_y_pre
.
masked_select
(
tr_train_mask3
).
reshape
([
-
1
,
dim
]),
ft_y
.
masked_select
(
tr_train_mask3
).
reshape
([
-
1
,
dim
]),
reduction
=
'none'
))
return
loss_tr
,
loss_tcl
,
loss_reg_x
,
loss_reg_y
def
ohem
(
self
,
predict
,
target
,
train_mask
):
# device = train_mask.device
pos
=
(
target
*
train_mask
).
astype
(
'bool'
)
neg
=
((
1
-
target
)
*
train_mask
).
astype
(
'bool'
)
pos2
=
paddle
.
concat
([
pos
.
unsqueeze
(
1
),
pos
.
unsqueeze
(
1
)],
axis
=
1
)
neg2
=
paddle
.
concat
([
neg
.
unsqueeze
(
1
),
neg
.
unsqueeze
(
1
)],
axis
=
1
)
n_pos
=
pos
.
astype
(
'float32'
).
sum
()
if
n_pos
.
item
()
>
0
:
loss_pos
=
F
.
cross_entropy
(
predict
.
masked_select
(
pos2
).
reshape
([
-
1
,
2
]),
target
.
masked_select
(
pos
).
astype
(
'int64'
),
reduction
=
'sum'
)
loss_neg
=
F
.
cross_entropy
(
predict
.
masked_select
(
neg2
).
reshape
([
-
1
,
2
]),
target
.
masked_select
(
neg
).
astype
(
'int64'
),
reduction
=
'none'
)
n_neg
=
min
(
int
(
neg
.
astype
(
'float32'
).
sum
().
item
()),
int
(
self
.
ohem_ratio
*
n_pos
.
astype
(
'float32'
)))
else
:
loss_pos
=
paddle
.
to_tensor
(
0.
)
loss_neg
=
F
.
cross_entropy
(
predict
.
masked_select
(
neg2
).
reshape
([
-
1
,
2
]),
target
.
masked_select
(
neg
).
astype
(
'int64'
),
reduction
=
'none'
)
n_neg
=
100
if
len
(
loss_neg
)
>
n_neg
:
loss_neg
,
_
=
paddle
.
topk
(
loss_neg
,
n_neg
)
return
(
loss_pos
+
loss_neg
.
sum
())
/
(
n_pos
+
n_neg
).
astype
(
'float32'
)
def
fourier2poly
(
self
,
real_maps
,
imag_maps
):
"""Transform Fourier coefficient maps to polygon maps.
Args:
real_maps (tensor): A map composed of the real parts of the
Fourier coefficients, whose shape is (-1, 2k+1)
imag_maps (tensor):A map composed of the imag parts of the
Fourier coefficients, whose shape is (-1, 2k+1)
Returns
x_maps (tensor): A map composed of the x value of the polygon
represented by n sample points (xn, yn), whose shape is (-1, n)
y_maps (tensor): A map composed of the y value of the polygon
represented by n sample points (xn, yn), whose shape is (-1, n)
"""
k_vect
=
paddle
.
arange
(
-
self
.
fourier_degree
,
self
.
fourier_degree
+
1
,
dtype
=
'float32'
).
reshape
([
-
1
,
1
])
i_vect
=
paddle
.
arange
(
0
,
self
.
num_sample
,
dtype
=
'float32'
).
reshape
([
1
,
-
1
])
transform_matrix
=
2
*
np
.
pi
/
self
.
num_sample
*
paddle
.
matmul
(
k_vect
,
i_vect
)
x1
=
paddle
.
einsum
(
'ak, kn-> an'
,
real_maps
,
paddle
.
cos
(
transform_matrix
))
x2
=
paddle
.
einsum
(
'ak, kn-> an'
,
imag_maps
,
paddle
.
sin
(
transform_matrix
))
y1
=
paddle
.
einsum
(
'ak, kn-> an'
,
real_maps
,
paddle
.
sin
(
transform_matrix
))
y2
=
paddle
.
einsum
(
'ak, kn-> an'
,
imag_maps
,
paddle
.
cos
(
transform_matrix
))
x_maps
=
x1
-
x2
y_maps
=
y1
+
y2
return
x_maps
,
y_maps
ppocr/metrics/__init__.py
View file @
9f62b610
...
@@ -21,7 +21,7 @@ import copy
...
@@ -21,7 +21,7 @@ import copy
__all__
=
[
"build_metric"
]
__all__
=
[
"build_metric"
]
from
.det_metric
import
DetMetric
from
.det_metric
import
DetMetric
,
DetFCEMetric
from
.rec_metric
import
RecMetric
from
.rec_metric
import
RecMetric
from
.cls_metric
import
ClsMetric
from
.cls_metric
import
ClsMetric
from
.e2e_metric
import
E2EMetric
from
.e2e_metric
import
E2EMetric
...
@@ -34,7 +34,7 @@ from .vqa_token_re_metric import VQAReTokenMetric
...
@@ -34,7 +34,7 @@ from .vqa_token_re_metric import VQAReTokenMetric
def
build_metric
(
config
):
def
build_metric
(
config
):
support_dict
=
[
support_dict
=
[
"DetMetric"
,
"RecMetric"
,
"ClsMetric"
,
"E2EMetric"
,
"DetMetric"
,
"DetFCEMetric"
,
"RecMetric"
,
"ClsMetric"
,
"E2EMetric"
,
"DistillationMetric"
,
"TableMetric"
,
'KIEMetric'
,
'VQASerTokenMetric'
,
"DistillationMetric"
,
"TableMetric"
,
'KIEMetric'
,
'VQASerTokenMetric'
,
'VQAReTokenMetric'
'VQAReTokenMetric'
]
]
...
...
ppocr/metrics/det_metric.py
View file @
9f62b610
...
@@ -16,7 +16,7 @@ from __future__ import absolute_import
...
@@ -16,7 +16,7 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
__all__
=
[
'DetMetric'
]
__all__
=
[
'DetMetric'
,
'DetFCEMetric'
]
from
.eval_det_iou
import
DetectionIoUEvaluator
from
.eval_det_iou
import
DetectionIoUEvaluator
...
@@ -55,7 +55,6 @@ class DetMetric(object):
...
@@ -55,7 +55,6 @@ class DetMetric(object):
result
=
self
.
evaluator
.
evaluate_image
(
gt_info_list
,
det_info_list
)
result
=
self
.
evaluator
.
evaluate_image
(
gt_info_list
,
det_info_list
)
self
.
results
.
append
(
result
)
self
.
results
.
append
(
result
)
def
get_metric
(
self
):
def
get_metric
(
self
):
"""
"""
return metrics {
return metrics {
...
@@ -71,3 +70,85 @@ class DetMetric(object):
...
@@ -71,3 +70,85 @@ class DetMetric(object):
def
reset
(
self
):
def
reset
(
self
):
self
.
results
=
[]
# clear results
self
.
results
=
[]
# clear results
class
DetFCEMetric
(
object
):
def
__init__
(
self
,
main_indicator
=
'hmean'
,
**
kwargs
):
self
.
evaluator
=
DetectionIoUEvaluator
()
self
.
main_indicator
=
main_indicator
self
.
reset
()
def
__call__
(
self
,
preds
,
batch
,
**
kwargs
):
'''
batch: a list produced by dataloaders.
image: np.ndarray of shape (N, C, H, W).
ratio_list: np.ndarray of shape(N,2)
polygons: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions.
ignore_tags: np.ndarray of shape (N, K), indicates whether a region is ignorable or not.
preds: a list of dict produced by post process
points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions.
'''
gt_polyons_batch
=
batch
[
2
]
ignore_tags_batch
=
batch
[
3
]
for
pred
,
gt_polyons
,
ignore_tags
in
zip
(
preds
,
gt_polyons_batch
,
ignore_tags_batch
):
# prepare gt
gt_info_list
=
[{
'points'
:
gt_polyon
,
'text'
:
''
,
'ignore'
:
ignore_tag
}
for
gt_polyon
,
ignore_tag
in
zip
(
gt_polyons
,
ignore_tags
)]
# prepare det
det_info_list
=
[{
'points'
:
det_polyon
,
'text'
:
''
,
'score'
:
score
}
for
det_polyon
,
score
in
zip
(
pred
[
'points'
],
pred
[
'scores'
])]
for
score_thr
in
self
.
results
.
keys
():
det_info_list_thr
=
[
det_info
for
det_info
in
det_info_list
if
det_info
[
'score'
]
>=
score_thr
]
result
=
self
.
evaluator
.
evaluate_image
(
gt_info_list
,
det_info_list_thr
)
self
.
results
[
score_thr
].
append
(
result
)
def
get_metric
(
self
):
"""
return metrics {'heman':0,
'thr 0.3':'precision: 0 recall: 0 hmean: 0',
'thr 0.4':'precision: 0 recall: 0 hmean: 0',
'thr 0.5':'precision: 0 recall: 0 hmean: 0',
'thr 0.6':'precision: 0 recall: 0 hmean: 0',
'thr 0.7':'precision: 0 recall: 0 hmean: 0',
'thr 0.8':'precision: 0 recall: 0 hmean: 0',
'thr 0.9':'precision: 0 recall: 0 hmean: 0',
}
"""
metircs
=
{}
hmean
=
0
for
score_thr
in
self
.
results
.
keys
():
metirc
=
self
.
evaluator
.
combine_results
(
self
.
results
[
score_thr
])
# for key, value in metirc.items():
# metircs['{}_{}'.format(key, score_thr)] = value
metirc_str
=
'precision:{:.5f} recall:{:.5f} hmean:{:.5f}'
.
format
(
metirc
[
'precision'
],
metirc
[
'recall'
],
metirc
[
'hmean'
])
metircs
[
'
\n
thr {}'
.
format
(
score_thr
)]
=
metirc_str
hmean
=
max
(
hmean
,
metirc
[
'hmean'
])
metircs
[
'hmean'
]
=
hmean
self
.
reset
()
return
metircs
def
reset
(
self
):
self
.
results
=
{
0.3
:
[],
0.4
:
[],
0.5
:
[],
0.6
:
[],
0.7
:
[],
0.8
:
[],
0.9
:
[]
}
# clear results
ppocr/modeling/backbones/det_resnet_vd.py
View file @
9f62b610
...
@@ -21,9 +21,82 @@ from paddle import ParamAttr
...
@@ -21,9 +21,82 @@ from paddle import ParamAttr
import
paddle.nn
as
nn
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
import
paddle.nn.functional
as
F
from
paddle.vision.ops
import
DeformConv2D
from
paddle.regularizer
import
L2Decay
from
paddle.nn.initializer
import
Normal
,
Constant
,
XavierUniform
__all__
=
[
"ResNet"
]
__all__
=
[
"ResNet"
]
class
DeformableConvV2
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
weight_attr
=
None
,
bias_attr
=
None
,
lr_scale
=
1
,
regularizer
=
None
,
skip_quant
=
False
,
dcn_bias_regularizer
=
L2Decay
(
0.
),
dcn_bias_lr_scale
=
2.
):
super
(
DeformableConvV2
,
self
).
__init__
()
self
.
offset_channel
=
2
*
kernel_size
**
2
*
groups
self
.
mask_channel
=
kernel_size
**
2
*
groups
if
bias_attr
:
# in FCOS-DCN head, specifically need learning_rate and regularizer
dcn_bias_attr
=
ParamAttr
(
initializer
=
Constant
(
value
=
0
),
regularizer
=
dcn_bias_regularizer
,
learning_rate
=
dcn_bias_lr_scale
)
else
:
# in ResNet backbone, do not need bias
dcn_bias_attr
=
False
self
.
conv_dcn
=
DeformConv2D
(
in_channels
,
out_channels
,
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
*
dilation
,
dilation
=
dilation
,
deformable_groups
=
groups
,
weight_attr
=
weight_attr
,
bias_attr
=
dcn_bias_attr
)
if
lr_scale
==
1
and
regularizer
is
None
:
offset_bias_attr
=
ParamAttr
(
initializer
=
Constant
(
0.
))
else
:
offset_bias_attr
=
ParamAttr
(
initializer
=
Constant
(
0.
),
learning_rate
=
lr_scale
,
regularizer
=
regularizer
)
self
.
conv_offset
=
nn
.
Conv2D
(
in_channels
,
groups
*
3
*
kernel_size
**
2
,
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
weight_attr
=
ParamAttr
(
initializer
=
Constant
(
0.0
)),
bias_attr
=
offset_bias_attr
)
if
skip_quant
:
self
.
conv_offset
.
skip_quant
=
True
def
forward
(
self
,
x
):
offset_mask
=
self
.
conv_offset
(
x
)
offset
,
mask
=
paddle
.
split
(
offset_mask
,
num_or_sections
=
[
self
.
offset_channel
,
self
.
mask_channel
],
axis
=
1
)
mask
=
F
.
sigmoid
(
mask
)
y
=
self
.
conv_dcn
(
x
,
offset
,
mask
=
mask
)
return
y
class
ConvBNLayer
(
nn
.
Layer
):
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
def
__init__
(
self
,
in_channels
,
in_channels
,
...
@@ -32,12 +105,14 @@ class ConvBNLayer(nn.Layer):
...
@@ -32,12 +105,14 @@ class ConvBNLayer(nn.Layer):
stride
=
1
,
stride
=
1
,
groups
=
1
,
groups
=
1
,
is_vd_mode
=
False
,
is_vd_mode
=
False
,
act
=
None
):
act
=
None
,
is_dcn
=
False
):
super
(
ConvBNLayer
,
self
).
__init__
()
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
is_vd_mode
=
is_vd_mode
self
.
is_vd_mode
=
is_vd_mode
self
.
_pool2d_avg
=
nn
.
AvgPool2D
(
self
.
_pool2d_avg
=
nn
.
AvgPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
if
not
is_dcn
:
self
.
_conv
=
nn
.
Conv2D
(
self
.
_conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
in_channels
=
in_channels
,
out_channels
=
out_channels
,
out_channels
=
out_channels
,
...
@@ -46,6 +121,15 @@ class ConvBNLayer(nn.Layer):
...
@@ -46,6 +121,15 @@ class ConvBNLayer(nn.Layer):
padding
=
(
kernel_size
-
1
)
//
2
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
groups
=
groups
,
bias_attr
=
False
)
bias_attr
=
False
)
else
:
self
.
_conv
=
DeformableConvV2
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
2
,
#groups,
bias_attr
=
False
)
self
.
_batch_norm
=
nn
.
BatchNorm
(
out_channels
,
act
=
act
)
self
.
_batch_norm
=
nn
.
BatchNorm
(
out_channels
,
act
=
act
)
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
...
@@ -57,12 +141,14 @@ class ConvBNLayer(nn.Layer):
...
@@ -57,12 +141,14 @@ class ConvBNLayer(nn.Layer):
class
BottleneckBlock
(
nn
.
Layer
):
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
def
__init__
(
self
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
stride
,
stride
,
shortcut
=
True
,
shortcut
=
True
,
if_first
=
False
):
if_first
=
False
,
is_dcn
=
False
,
):
super
(
BottleneckBlock
,
self
).
__init__
()
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
self
.
conv0
=
ConvBNLayer
(
...
@@ -75,7 +161,8 @@ class BottleneckBlock(nn.Layer):
...
@@ -75,7 +161,8 @@ class BottleneckBlock(nn.Layer):
out_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
kernel_size
=
3
,
stride
=
stride
,
stride
=
stride
,
act
=
'relu'
)
act
=
'relu'
,
is_dcn
=
is_dcn
)
self
.
conv2
=
ConvBNLayer
(
self
.
conv2
=
ConvBNLayer
(
in_channels
=
out_channels
,
in_channels
=
out_channels
,
out_channels
=
out_channels
*
4
,
out_channels
=
out_channels
*
4
,
...
@@ -152,7 +239,12 @@ class BasicBlock(nn.Layer):
...
@@ -152,7 +239,12 @@ class BasicBlock(nn.Layer):
class
ResNet
(
nn
.
Layer
):
class
ResNet
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
layers
=
50
,
**
kwargs
):
def
__init__
(
self
,
in_channels
=
3
,
layers
=
50
,
dcn_stage
=
None
,
out_indices
=
None
,
**
kwargs
):
super
(
ResNet
,
self
).
__init__
()
super
(
ResNet
,
self
).
__init__
()
self
.
layers
=
layers
self
.
layers
=
layers
...
@@ -175,6 +267,13 @@ class ResNet(nn.Layer):
...
@@ -175,6 +267,13 @@ class ResNet(nn.Layer):
1024
]
if
layers
>=
50
else
[
64
,
64
,
128
,
256
]
1024
]
if
layers
>=
50
else
[
64
,
64
,
128
,
256
]
num_filters
=
[
64
,
128
,
256
,
512
]
num_filters
=
[
64
,
128
,
256
,
512
]
self
.
dcn_stage
=
dcn_stage
if
dcn_stage
is
not
None
else
[
False
,
False
,
False
,
False
]
self
.
out_indices
=
out_indices
if
out_indices
is
not
None
else
[
0
,
1
,
2
,
3
]
self
.
conv1_1
=
ConvBNLayer
(
self
.
conv1_1
=
ConvBNLayer
(
in_channels
=
in_channels
,
in_channels
=
in_channels
,
out_channels
=
32
,
out_channels
=
32
,
...
@@ -201,6 +300,7 @@ class ResNet(nn.Layer):
...
@@ -201,6 +300,7 @@ class ResNet(nn.Layer):
for
block
in
range
(
len
(
depth
)):
for
block
in
range
(
len
(
depth
)):
block_list
=
[]
block_list
=
[]
shortcut
=
False
shortcut
=
False
is_dcn
=
self
.
dcn_stage
[
block
]
for
i
in
range
(
depth
[
block
]):
for
i
in
range
(
depth
[
block
]):
bottleneck_block
=
self
.
add_sublayer
(
bottleneck_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
'bb_%d_%d'
%
(
block
,
i
),
...
@@ -210,15 +310,18 @@ class ResNet(nn.Layer):
...
@@ -210,15 +310,18 @@ class ResNet(nn.Layer):
out_channels
=
num_filters
[
block
],
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
))
if_first
=
block
==
i
==
0
,
is_dcn
=
is_dcn
))
shortcut
=
True
shortcut
=
True
block_list
.
append
(
bottleneck_block
)
block_list
.
append
(
bottleneck_block
)
if
block
in
self
.
out_indices
:
self
.
out_channels
.
append
(
num_filters
[
block
]
*
4
)
self
.
out_channels
.
append
(
num_filters
[
block
]
*
4
)
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
else
:
else
:
for
block
in
range
(
len
(
depth
)):
for
block
in
range
(
len
(
depth
)):
block_list
=
[]
block_list
=
[]
shortcut
=
False
shortcut
=
False
# is_dcn = self.dcn_stage[block]
for
i
in
range
(
depth
[
block
]):
for
i
in
range
(
depth
[
block
]):
basic_block
=
self
.
add_sublayer
(
basic_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
'bb_%d_%d'
%
(
block
,
i
),
...
@@ -231,6 +334,7 @@ class ResNet(nn.Layer):
...
@@ -231,6 +334,7 @@ class ResNet(nn.Layer):
if_first
=
block
==
i
==
0
))
if_first
=
block
==
i
==
0
))
shortcut
=
True
shortcut
=
True
block_list
.
append
(
basic_block
)
block_list
.
append
(
basic_block
)
if
block
in
self
.
out_indices
:
self
.
out_channels
.
append
(
num_filters
[
block
])
self
.
out_channels
.
append
(
num_filters
[
block
])
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
...
@@ -240,7 +344,8 @@ class ResNet(nn.Layer):
...
@@ -240,7 +344,8 @@ class ResNet(nn.Layer):
y
=
self
.
conv1_3
(
y
)
y
=
self
.
conv1_3
(
y
)
y
=
self
.
pool2d_max
(
y
)
y
=
self
.
pool2d_max
(
y
)
out
=
[]
out
=
[]
for
block
in
self
.
stages
:
for
i
,
block
in
enumerate
(
self
.
stages
)
:
y
=
block
(
y
)
y
=
block
(
y
)
if
i
in
self
.
out_indices
:
out
.
append
(
y
)
out
.
append
(
y
)
return
out
return
out
ppocr/modeling/heads/__init__.py
View file @
9f62b610
...
@@ -21,6 +21,7 @@ def build_head(config):
...
@@ -21,6 +21,7 @@ def build_head(config):
from
.det_east_head
import
EASTHead
from
.det_east_head
import
EASTHead
from
.det_sast_head
import
SASTHead
from
.det_sast_head
import
SASTHead
from
.det_pse_head
import
PSEHead
from
.det_pse_head
import
PSEHead
from
.det_fce_head
import
FCEHead
from
.e2e_pg_head
import
PGHead
from
.e2e_pg_head
import
PGHead
# rec head
# rec head
...
@@ -40,8 +41,8 @@ def build_head(config):
...
@@ -40,8 +41,8 @@ def build_head(config):
from
.table_att_head
import
TableAttentionHead
from
.table_att_head
import
TableAttentionHead
support_dict
=
[
support_dict
=
[
'DBHead'
,
'PSEHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
,
'DBHead'
,
'PSEHead'
,
'FCEHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'AttentionHead'
,
'SRNHead'
,
'PGHead'
,
'Transformer'
,
'ClsHead'
,
'AttentionHead'
,
'SRNHead'
,
'PGHead'
,
'Transformer'
,
'TableAttentionHead'
,
'SARHead'
,
'AsterHead'
,
'SDMGRHead'
'TableAttentionHead'
,
'SARHead'
,
'AsterHead'
,
'SDMGRHead'
]
]
...
...
ppocr/modeling/heads/det_fce_head.py
0 → 100644
View file @
9f62b610
from
paddle
import
nn
from
paddle
import
ParamAttr
import
paddle.nn.functional
as
F
from
paddle.nn.initializer
import
Normal
import
paddle
from
functools
import
partial
def
multi_apply
(
func
,
*
args
,
**
kwargs
):
"""Apply function to a list of arguments.
Note:
This function applies the ``func`` to multiple inputs and
map the multiple outputs of the ``func`` into different
list. Each list contains the same type of outputs corresponding
to different inputs.
Args:
func (Function): A function that will be applied to a list of
arguments
Returns:
tuple(list): A tuple containing multiple list, each list contains
\
a kind of returned results by the function
"""
pfunc
=
partial
(
func
,
**
kwargs
)
if
kwargs
else
func
map_results
=
map
(
pfunc
,
*
args
)
return
tuple
(
map
(
list
,
zip
(
*
map_results
)))
class
FCEHead
(
nn
.
Layer
):
"""The class for implementing FCENet head.
FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped Text
Detection.
[https://arxiv.org/abs/2104.10442]
Args:
in_channels (int): The number of input channels.
scales (list[int]) : The scale of each layer.
fourier_degree (int) : The maximum Fourier transform degree k.
"""
def
__init__
(
self
,
in_channels
,
scales
,
fourier_degree
=
5
):
super
().
__init__
()
assert
isinstance
(
in_channels
,
int
)
self
.
downsample_ratio
=
1.0
self
.
in_channels
=
in_channels
self
.
scales
=
scales
self
.
fourier_degree
=
fourier_degree
self
.
out_channels_cls
=
4
self
.
out_channels_reg
=
(
2
*
self
.
fourier_degree
+
1
)
*
2
self
.
out_conv_cls
=
nn
.
Conv2D
(
in_channels
=
self
.
in_channels
,
out_channels
=
self
.
out_channels_cls
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
groups
=
1
,
weight_attr
=
ParamAttr
(
name
=
'cls_weights'
,
initializer
=
Normal
(
mean
=
paddle
.
to_tensor
(
0.
),
std
=
paddle
.
to_tensor
(
0.01
))),
bias_attr
=
True
)
self
.
out_conv_reg
=
nn
.
Conv2D
(
in_channels
=
self
.
in_channels
,
out_channels
=
self
.
out_channels_reg
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
groups
=
1
,
weight_attr
=
ParamAttr
(
name
=
'reg_weights'
,
initializer
=
Normal
(
mean
=
paddle
.
to_tensor
(
0.
),
std
=
paddle
.
to_tensor
(
0.01
))),
bias_attr
=
True
)
def
forward
(
self
,
feats
,
targets
=
None
):
cls_res
,
reg_res
=
multi_apply
(
self
.
forward_single
,
feats
)
level_num
=
len
(
cls_res
)
# import pdb;pdb.set_trace()
outs
=
{}
if
not
self
.
training
:
for
i
in
range
(
level_num
):
tr_pred
=
F
.
softmax
(
cls_res
[
i
][:,
0
:
2
,
:,
:],
axis
=
1
)
tcl_pred
=
F
.
softmax
(
cls_res
[
i
][:,
2
:,
:,
:],
axis
=
1
)
outs
[
'level_{}'
.
format
(
i
)]
=
paddle
.
concat
(
[
tr_pred
,
tcl_pred
,
reg_res
[
i
]],
axis
=
1
)
else
:
preds
=
[[
cls_res
[
i
],
reg_res
[
i
]]
for
i
in
range
(
level_num
)]
outs
[
'levels'
]
=
preds
return
outs
def
forward_single
(
self
,
x
):
cls_predict
=
self
.
out_conv_cls
(
x
)
reg_predict
=
self
.
out_conv_reg
(
x
)
return
cls_predict
,
reg_predict
ppocr/modeling/necks/__init__.py
View file @
9f62b610
...
@@ -23,7 +23,11 @@ def build_neck(config):
...
@@ -23,7 +23,11 @@ def build_neck(config):
from
.pg_fpn
import
PGFPN
from
.pg_fpn
import
PGFPN
from
.table_fpn
import
TableFPN
from
.table_fpn
import
TableFPN
from
.fpn
import
FPN
from
.fpn
import
FPN
support_dict
=
[
'FPN'
,
'DBFPN'
,
'EASTFPN'
,
'SASTFPN'
,
'SequenceEncoder'
,
'PGFPN'
,
'TableFPN'
]
from
.fce_fpn
import
FCEFPN
support_dict
=
[
'FPN'
,
'FCEFPN'
,
'DBFPN'
,
'EASTFPN'
,
'SASTFPN'
,
'SequenceEncoder'
,
'PGFPN'
,
'TableFPN'
]
module_name
=
config
.
pop
(
'name'
)
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'neck only support {}'
.
format
(
assert
module_name
in
support_dict
,
Exception
(
'neck only support {}'
.
format
(
...
...
ppocr/modeling/necks/fce_fpn.py
0 → 100644
View file @
9f62b610
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
from
paddle.nn.initializer
import
XavierUniform
from
paddle.nn.initializer
import
Normal
from
paddle.regularizer
import
L2Decay
__all__
=
[
'FCEFPN'
]
class
ConvNormLayer
(
nn
.
Layer
):
def
__init__
(
self
,
ch_in
,
ch_out
,
filter_size
,
stride
,
groups
=
1
,
norm_type
=
'bn'
,
norm_decay
=
0.
,
norm_groups
=
32
,
lr_scale
=
1.
,
freeze_norm
=
False
,
initializer
=
Normal
(
mean
=
0.
,
std
=
0.01
)):
super
(
ConvNormLayer
,
self
).
__init__
()
assert
norm_type
in
[
'bn'
,
'sync_bn'
,
'gn'
]
bias_attr
=
False
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
ch_in
,
out_channels
=
ch_out
,
kernel_size
=
filter_size
,
stride
=
stride
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
initializer
=
initializer
,
learning_rate
=
1.
),
bias_attr
=
bias_attr
)
norm_lr
=
0.
if
freeze_norm
else
1.
param_attr
=
ParamAttr
(
learning_rate
=
norm_lr
,
regularizer
=
L2Decay
(
norm_decay
)
if
norm_decay
is
not
None
else
None
)
bias_attr
=
ParamAttr
(
learning_rate
=
norm_lr
,
regularizer
=
L2Decay
(
norm_decay
)
if
norm_decay
is
not
None
else
None
)
if
norm_type
==
'bn'
:
self
.
norm
=
nn
.
BatchNorm2D
(
ch_out
,
weight_attr
=
param_attr
,
bias_attr
=
bias_attr
)
elif
norm_type
==
'sync_bn'
:
self
.
norm
=
nn
.
SyncBatchNorm
(
ch_out
,
weight_attr
=
param_attr
,
bias_attr
=
bias_attr
)
elif
norm_type
==
'gn'
:
self
.
norm
=
nn
.
GroupNorm
(
num_groups
=
norm_groups
,
num_channels
=
ch_out
,
weight_attr
=
param_attr
,
bias_attr
=
bias_attr
)
def
forward
(
self
,
inputs
):
out
=
self
.
conv
(
inputs
)
out
=
self
.
norm
(
out
)
return
out
class
FCEFPN
(
nn
.
Layer
):
"""
Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
Args:
in_channels (list[int]): input channels of each level which can be
derived from the output shape of backbone by from_config
out_channels (list[int]): output channel of each level
spatial_scales (list[float]): the spatial scales between input feature
maps and original input image which can be derived from the output
shape of backbone by from_config
has_extra_convs (bool): whether to add extra conv to the last level.
default False
extra_stage (int): the number of extra stages added to the last level.
default 1
use_c5 (bool): Whether to use c5 as the input of extra stage,
otherwise p5 is used. default True
norm_type (string|None): The normalization type in FPN module. If
norm_type is None, norm will not be used after conv and if
norm_type is string, bn, gn, sync_bn are available. default None
norm_decay (float): weight decay for normalization layer weights.
default 0.
freeze_norm (bool): whether to freeze normalization layer.
default False
relu_before_extra_convs (bool): whether to add relu before extra convs.
default False
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
spatial_scales
=
[
0.25
,
0.125
,
0.0625
,
0.03125
],
has_extra_convs
=
False
,
extra_stage
=
1
,
use_c5
=
True
,
norm_type
=
None
,
norm_decay
=
0.
,
freeze_norm
=
False
,
relu_before_extra_convs
=
True
):
super
(
FCEFPN
,
self
).
__init__
()
self
.
out_channels
=
out_channels
for
s
in
range
(
extra_stage
):
spatial_scales
=
spatial_scales
+
[
spatial_scales
[
-
1
]
/
2.
]
self
.
spatial_scales
=
spatial_scales
self
.
has_extra_convs
=
has_extra_convs
self
.
extra_stage
=
extra_stage
self
.
use_c5
=
use_c5
self
.
relu_before_extra_convs
=
relu_before_extra_convs
self
.
norm_type
=
norm_type
self
.
norm_decay
=
norm_decay
self
.
freeze_norm
=
freeze_norm
self
.
lateral_convs
=
[]
self
.
fpn_convs
=
[]
fan
=
out_channels
*
3
*
3
# stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
# 0 <= st_stage < ed_stage <= 3
st_stage
=
4
-
len
(
in_channels
)
ed_stage
=
st_stage
+
len
(
in_channels
)
-
1
for
i
in
range
(
st_stage
,
ed_stage
+
1
):
if
i
==
3
:
lateral_name
=
'fpn_inner_res5_sum'
else
:
lateral_name
=
'fpn_inner_res{}_sum_lateral'
.
format
(
i
+
2
)
in_c
=
in_channels
[
i
-
st_stage
]
if
self
.
norm_type
is
not
None
:
lateral
=
self
.
add_sublayer
(
lateral_name
,
ConvNormLayer
(
ch_in
=
in_c
,
ch_out
=
out_channels
,
filter_size
=
1
,
stride
=
1
,
norm_type
=
self
.
norm_type
,
norm_decay
=
self
.
norm_decay
,
freeze_norm
=
self
.
freeze_norm
,
initializer
=
XavierUniform
(
fan_out
=
in_c
)))
else
:
lateral
=
self
.
add_sublayer
(
lateral_name
,
nn
.
Conv2D
(
in_channels
=
in_c
,
out_channels
=
out_channels
,
kernel_size
=
1
,
weight_attr
=
ParamAttr
(
initializer
=
XavierUniform
(
fan_out
=
in_c
))))
self
.
lateral_convs
.
append
(
lateral
)
for
i
in
range
(
st_stage
,
ed_stage
+
1
):
fpn_name
=
'fpn_res{}_sum'
.
format
(
i
+
2
)
if
self
.
norm_type
is
not
None
:
fpn_conv
=
self
.
add_sublayer
(
fpn_name
,
ConvNormLayer
(
ch_in
=
out_channels
,
ch_out
=
out_channels
,
filter_size
=
3
,
stride
=
1
,
norm_type
=
self
.
norm_type
,
norm_decay
=
self
.
norm_decay
,
freeze_norm
=
self
.
freeze_norm
,
initializer
=
XavierUniform
(
fan_out
=
fan
)))
else
:
fpn_conv
=
self
.
add_sublayer
(
fpn_name
,
nn
.
Conv2D
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
padding
=
1
,
weight_attr
=
ParamAttr
(
initializer
=
XavierUniform
(
fan_out
=
fan
))))
self
.
fpn_convs
.
append
(
fpn_conv
)
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
if
self
.
has_extra_convs
:
for
i
in
range
(
self
.
extra_stage
):
lvl
=
ed_stage
+
1
+
i
if
i
==
0
and
self
.
use_c5
:
in_c
=
in_channels
[
-
1
]
else
:
in_c
=
out_channels
extra_fpn_name
=
'fpn_{}'
.
format
(
lvl
+
2
)
if
self
.
norm_type
is
not
None
:
extra_fpn_conv
=
self
.
add_sublayer
(
extra_fpn_name
,
ConvNormLayer
(
ch_in
=
in_c
,
ch_out
=
out_channels
,
filter_size
=
3
,
stride
=
2
,
norm_type
=
self
.
norm_type
,
norm_decay
=
self
.
norm_decay
,
freeze_norm
=
self
.
freeze_norm
,
initializer
=
XavierUniform
(
fan_out
=
fan
)))
else
:
extra_fpn_conv
=
self
.
add_sublayer
(
extra_fpn_name
,
nn
.
Conv2D
(
in_channels
=
in_c
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
weight_attr
=
ParamAttr
(
initializer
=
XavierUniform
(
fan_out
=
fan
))))
self
.
fpn_convs
.
append
(
extra_fpn_conv
)
@
classmethod
def
from_config
(
cls
,
cfg
,
input_shape
):
return
{
'in_channels'
:
[
i
.
channels
for
i
in
input_shape
],
'spatial_scales'
:
[
1.0
/
i
.
stride
for
i
in
input_shape
],
}
def
forward
(
self
,
body_feats
):
laterals
=
[]
num_levels
=
len
(
body_feats
)
for
i
in
range
(
num_levels
):
laterals
.
append
(
self
.
lateral_convs
[
i
](
body_feats
[
i
]))
for
i
in
range
(
1
,
num_levels
):
lvl
=
num_levels
-
i
upsample
=
F
.
interpolate
(
laterals
[
lvl
],
scale_factor
=
2.
,
mode
=
'nearest'
,
)
laterals
[
lvl
-
1
]
+=
upsample
fpn_output
=
[]
for
lvl
in
range
(
num_levels
):
fpn_output
.
append
(
self
.
fpn_convs
[
lvl
](
laterals
[
lvl
]))
if
self
.
extra_stage
>
0
:
# use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
if
not
self
.
has_extra_convs
:
assert
self
.
extra_stage
==
1
,
'extra_stage should be 1 if FPN has not extra convs'
fpn_output
.
append
(
F
.
max_pool2d
(
fpn_output
[
-
1
],
1
,
stride
=
2
))
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
else
:
if
self
.
use_c5
:
extra_source
=
body_feats
[
-
1
]
else
:
extra_source
=
fpn_output
[
-
1
]
fpn_output
.
append
(
self
.
fpn_convs
[
num_levels
](
extra_source
))
for
i
in
range
(
1
,
self
.
extra_stage
):
if
self
.
relu_before_extra_convs
:
fpn_output
.
append
(
self
.
fpn_convs
[
num_levels
+
i
](
F
.
relu
(
fpn_output
[
-
1
])))
else
:
fpn_output
.
append
(
self
.
fpn_convs
[
num_levels
+
i
](
fpn_output
[
-
1
]))
return
fpn_output
ppocr/postprocess/__init__.py
View file @
9f62b610
...
@@ -24,6 +24,7 @@ __all__ = ['build_post_process']
...
@@ -24,6 +24,7 @@ __all__ = ['build_post_process']
from
.db_postprocess
import
DBPostProcess
,
DistillationDBPostProcess
from
.db_postprocess
import
DBPostProcess
,
DistillationDBPostProcess
from
.east_postprocess
import
EASTPostProcess
from
.east_postprocess
import
EASTPostProcess
from
.sast_postprocess
import
SASTPostProcess
from
.sast_postprocess
import
SASTPostProcess
from
.fce_postprocess
import
FCEPostProcess
from
.rec_postprocess
import
CTCLabelDecode
,
AttnLabelDecode
,
SRNLabelDecode
,
DistillationCTCLabelDecode
,
\
from
.rec_postprocess
import
CTCLabelDecode
,
AttnLabelDecode
,
SRNLabelDecode
,
DistillationCTCLabelDecode
,
\
TableLabelDecode
,
NRTRLabelDecode
,
SARLabelDecode
,
SEEDLabelDecode
TableLabelDecode
,
NRTRLabelDecode
,
SARLabelDecode
,
SEEDLabelDecode
from
.cls_postprocess
import
ClsPostProcess
from
.cls_postprocess
import
ClsPostProcess
...
@@ -34,9 +35,9 @@ from .vqa_token_re_layoutlm_postprocess import VQAReTokenLayoutLMPostProcess
...
@@ -34,9 +35,9 @@ from .vqa_token_re_layoutlm_postprocess import VQAReTokenLayoutLMPostProcess
def
build_post_process
(
config
,
global_config
=
None
):
def
build_post_process
(
config
,
global_config
=
None
):
support_dict
=
[
support_dict
=
[
'DBPostProcess'
,
'EASTPostProcess'
,
'SASTPostProcess'
,
'
CTCLabelDecode
'
,
'DBPostProcess'
,
'EASTPostProcess'
,
'SASTPostProcess'
,
'
FCEPostProcess
'
,
'AttnLabelDecode'
,
'ClsPostProcess'
,
'SRNLabelDecode'
,
'PGPostProcess'
,
'CTCLabelDecode'
,
'AttnLabelDecode'
,
'ClsPostProcess'
,
'SRNLabelDecode'
,
'DistillationCTCLabelDecode'
,
'TableLabelDecode'
,
'PGPostProcess'
,
'DistillationCTCLabelDecode'
,
'TableLabelDecode'
,
'DistillationDBPostProcess'
,
'NRTRLabelDecode'
,
'SARLabelDecode'
,
'DistillationDBPostProcess'
,
'NRTRLabelDecode'
,
'SARLabelDecode'
,
'SEEDLabelDecode'
,
'VQASerTokenLayoutLMPostProcess'
,
'SEEDLabelDecode'
,
'VQASerTokenLayoutLMPostProcess'
,
'VQAReTokenLayoutLMPostProcess'
'VQAReTokenLayoutLMPostProcess'
...
...
ppocr/postprocess/fce_postprocess.py
0 → 100755
View file @
9f62b610
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
cv2
import
paddle
from
numpy.fft
import
ifft
import
Polygon
as
plg
def
points2polygon
(
points
):
"""Convert k points to 1 polygon.
Args:
points (ndarray or list): A ndarray or a list of shape (2k)
that indicates k points.
Returns:
polygon (Polygon): A polygon object.
"""
if
isinstance
(
points
,
list
):
points
=
np
.
array
(
points
)
assert
isinstance
(
points
,
np
.
ndarray
)
assert
(
points
.
size
%
2
==
0
)
and
(
points
.
size
>=
8
)
point_mat
=
points
.
reshape
([
-
1
,
2
])
return
plg
.
Polygon
(
point_mat
)
def
poly_intersection
(
poly_det
,
poly_gt
):
"""Calculate the intersection area between two polygon.
Args:
poly_det (Polygon): A polygon predicted by detector.
poly_gt (Polygon): A gt polygon.
Returns:
intersection_area (float): The intersection area between two polygons.
"""
assert
isinstance
(
poly_det
,
plg
.
Polygon
)
assert
isinstance
(
poly_gt
,
plg
.
Polygon
)
poly_inter
=
poly_det
&
poly_gt
if
len
(
poly_inter
)
==
0
:
return
0
,
poly_inter
return
poly_inter
.
area
(),
poly_inter
def
poly_union
(
poly_det
,
poly_gt
):
"""Calculate the union area between two polygon.
Args:
poly_det (Polygon): A polygon predicted by detector.
poly_gt (Polygon): A gt polygon.
Returns:
union_area (float): The union area between two polygons.
"""
assert
isinstance
(
poly_det
,
plg
.
Polygon
)
assert
isinstance
(
poly_gt
,
plg
.
Polygon
)
area_det
=
poly_det
.
area
()
area_gt
=
poly_gt
.
area
()
area_inters
,
_
=
poly_intersection
(
poly_det
,
poly_gt
)
return
area_det
+
area_gt
-
area_inters
def
valid_boundary
(
x
,
with_score
=
True
):
num
=
len
(
x
)
if
num
<
8
:
return
False
if
num
%
2
==
0
and
(
not
with_score
):
return
True
if
num
%
2
==
1
and
with_score
:
return
True
return
False
def
boundary_iou
(
src
,
target
):
"""Calculate the IOU between two boundaries.
Args:
src (list): Source boundary.
target (list): Target boundary.
Returns:
iou (float): The iou between two boundaries.
"""
assert
valid_boundary
(
src
,
False
)
assert
valid_boundary
(
target
,
False
)
src_poly
=
points2polygon
(
src
)
target_poly
=
points2polygon
(
target
)
return
poly_iou
(
src_poly
,
target_poly
)
def
poly_iou
(
poly_det
,
poly_gt
):
"""Calculate the IOU between two polygons.
Args:
poly_det (Polygon): A polygon predicted by detector.
poly_gt (Polygon): A gt polygon.
Returns:
iou (float): The IOU between two polygons.
"""
assert
isinstance
(
poly_det
,
plg
.
Polygon
)
assert
isinstance
(
poly_gt
,
plg
.
Polygon
)
area_inters
,
_
=
poly_intersection
(
poly_det
,
poly_gt
)
area_union
=
poly_union
(
poly_det
,
poly_gt
)
if
area_union
==
0
:
return
0.0
return
area_inters
/
area_union
def
poly_nms
(
polygons
,
threshold
):
assert
isinstance
(
polygons
,
list
)
polygons
=
np
.
array
(
sorted
(
polygons
,
key
=
lambda
x
:
x
[
-
1
]))
keep_poly
=
[]
index
=
[
i
for
i
in
range
(
polygons
.
shape
[
0
])]
while
len
(
index
)
>
0
:
keep_poly
.
append
(
polygons
[
index
[
-
1
]].
tolist
())
A
=
polygons
[
index
[
-
1
]][:
-
1
]
index
=
np
.
delete
(
index
,
-
1
)
iou_list
=
np
.
zeros
((
len
(
index
),
))
for
i
in
range
(
len
(
index
)):
B
=
polygons
[
index
[
i
]][:
-
1
]
iou_list
[
i
]
=
boundary_iou
(
A
,
B
)
remove_index
=
np
.
where
(
iou_list
>
threshold
)
index
=
np
.
delete
(
index
,
remove_index
)
return
keep_poly
def
fill_hole
(
input_mask
):
h
,
w
=
input_mask
.
shape
canvas
=
np
.
zeros
((
h
+
2
,
w
+
2
),
np
.
uint8
)
canvas
[
1
:
h
+
1
,
1
:
w
+
1
]
=
input_mask
.
copy
()
mask
=
np
.
zeros
((
h
+
4
,
w
+
4
),
np
.
uint8
)
cv2
.
floodFill
(
canvas
,
mask
,
(
0
,
0
),
1
)
canvas
=
canvas
[
1
:
h
+
1
,
1
:
w
+
1
].
astype
(
np
.
bool
)
return
~
canvas
|
input_mask
def
fourier2poly
(
fourier_coeff
,
num_reconstr_points
=
50
):
""" Inverse Fourier transform
Args:
fourier_coeff (ndarray): Fourier coefficients shaped (n, 2k+1),
with n and k being candidates number and Fourier degree
respectively.
num_reconstr_points (int): Number of reconstructed polygon points.
Returns:
Polygons (ndarray): The reconstructed polygons shaped (n, n')
"""
a
=
np
.
zeros
((
len
(
fourier_coeff
),
num_reconstr_points
),
dtype
=
'complex'
)
k
=
(
len
(
fourier_coeff
[
0
])
-
1
)
//
2
a
[:,
0
:
k
+
1
]
=
fourier_coeff
[:,
k
:]
a
[:,
-
k
:]
=
fourier_coeff
[:,
:
k
]
poly_complex
=
ifft
(
a
)
*
num_reconstr_points
polygon
=
np
.
zeros
((
len
(
fourier_coeff
),
num_reconstr_points
,
2
))
polygon
[:,
:,
0
]
=
poly_complex
.
real
polygon
[:,
:,
1
]
=
poly_complex
.
imag
return
polygon
.
astype
(
'int32'
).
reshape
((
len
(
fourier_coeff
),
-
1
))
def
fcenet_decode
(
preds
,
fourier_degree
,
num_reconstr_points
,
scale
,
alpha
=
1.0
,
beta
=
2.0
,
text_repr_type
=
'poly'
,
score_thr
=
0.3
,
nms_thr
=
0.1
):
"""Decoding predictions of FCENet to instances.
Args:
preds (list(Tensor)): The head output tensors.
fourier_degree (int): The maximum Fourier transform degree k.
num_reconstr_points (int): The points number of the polygon
reconstructed from predicted Fourier coefficients.
scale (int): The down-sample scale of the prediction.
alpha (float) : The parameter to calculate final scores. Score_{final}
= (Score_{text region} ^ alpha)
* (Score_{text center region}^ beta)
beta (float) : The parameter to calculate final score.
text_repr_type (str): Boundary encoding type 'poly' or 'quad'.
score_thr (float) : The threshold used to filter out the final
candidates.
nms_thr (float) : The threshold of nms.
Returns:
boundaries (list[list[float]]): The instance boundary and confidence
list.
"""
assert
isinstance
(
preds
,
list
)
assert
len
(
preds
)
==
2
assert
text_repr_type
in
[
'poly'
,
'quad'
]
# import pdb;pdb.set_trace()
cls_pred
=
preds
[
0
][
0
]
# tr_pred = F.softmax(cls_pred[0:2], axis=0).cpu().numpy()
# tcl_pred = F.softmax(cls_pred[2:], axis=0).cpu().numpy()
tr_pred
=
cls_pred
[
0
:
2
]
tcl_pred
=
cls_pred
[
2
:]
reg_pred
=
preds
[
1
][
0
].
transpose
([
1
,
2
,
0
])
#.cpu().numpy()
x_pred
=
reg_pred
[:,
:,
:
2
*
fourier_degree
+
1
]
y_pred
=
reg_pred
[:,
:,
2
*
fourier_degree
+
1
:]
score_pred
=
(
tr_pred
[
1
]
**
alpha
)
*
(
tcl_pred
[
1
]
**
beta
)
tr_pred_mask
=
(
score_pred
)
>
score_thr
tr_mask
=
fill_hole
(
tr_pred_mask
)
tr_contours
,
_
=
cv2
.
findContours
(
tr_mask
.
astype
(
np
.
uint8
),
cv2
.
RETR_TREE
,
cv2
.
CHAIN_APPROX_SIMPLE
)
# opencv4
mask
=
np
.
zeros_like
(
tr_mask
)
boundaries
=
[]
for
cont
in
tr_contours
:
deal_map
=
mask
.
copy
().
astype
(
np
.
int8
)
cv2
.
drawContours
(
deal_map
,
[
cont
],
-
1
,
1
,
-
1
)
score_map
=
score_pred
*
deal_map
score_mask
=
score_map
>
0
xy_text
=
np
.
argwhere
(
score_mask
)
dxy
=
xy_text
[:,
1
]
+
xy_text
[:,
0
]
*
1j
x
,
y
=
x_pred
[
score_mask
],
y_pred
[
score_mask
]
c
=
x
+
y
*
1j
c
[:,
fourier_degree
]
=
c
[:,
fourier_degree
]
+
dxy
c
*=
scale
polygons
=
fourier2poly
(
c
,
num_reconstr_points
)
score
=
score_map
[
score_mask
].
reshape
(
-
1
,
1
)
polygons
=
poly_nms
(
np
.
hstack
((
polygons
,
score
)).
tolist
(),
nms_thr
)
boundaries
=
boundaries
+
polygons
boundaries
=
poly_nms
(
boundaries
,
nms_thr
)
if
text_repr_type
==
'quad'
:
new_boundaries
=
[]
for
boundary
in
boundaries
:
poly
=
np
.
array
(
boundary
[:
-
1
]).
reshape
(
-
1
,
2
).
astype
(
np
.
float32
)
score
=
boundary
[
-
1
]
points
=
cv2
.
boxPoints
(
cv2
.
minAreaRect
(
poly
))
points
=
np
.
int0
(
points
)
new_boundaries
.
append
(
points
.
reshape
(
-
1
).
tolist
()
+
[
score
])
return
boundaries
class
FCEPostProcess
(
object
):
"""
The post process for FCENet.
"""
def
__init__
(
self
,
scales
,
fourier_degree
=
5
,
num_reconstr_points
=
50
,
decoding_type
=
'fcenet'
,
score_thr
=
0.3
,
nms_thr
=
0.1
,
alpha
=
1.0
,
beta
=
1.0
,
text_repr_type
=
'poly'
,
**
kwargs
):
self
.
scales
=
scales
self
.
fourier_degree
=
fourier_degree
self
.
num_reconstr_points
=
num_reconstr_points
self
.
decoding_type
=
decoding_type
self
.
score_thr
=
score_thr
self
.
nms_thr
=
nms_thr
self
.
alpha
=
alpha
self
.
beta
=
beta
self
.
text_repr_type
=
text_repr_type
def
__call__
(
self
,
preds
,
shape_list
):
score_maps
=
[]
for
key
,
value
in
preds
.
items
():
if
isinstance
(
value
,
paddle
.
Tensor
):
value
=
value
.
numpy
()
cls_res
=
value
[:,
:
4
,
:,
:]
reg_res
=
value
[:,
4
:,
:,
:]
score_maps
.
append
([
cls_res
,
reg_res
])
return
self
.
get_boundary
(
score_maps
,
shape_list
)
def
resize_boundary
(
self
,
boundaries
,
scale_factor
):
"""Rescale boundaries via scale_factor.
Args:
boundaries (list[list[float]]): The boundary list. Each boundary
with size 2k+1 with k>=4.
scale_factor(ndarray): The scale factor of size (4,).
Returns:
boundaries (list[list[float]]): The scaled boundaries.
"""
# assert check_argument.is_2dlist(boundaries)
# assert isinstance(scale_factor, np.ndarray)
# assert scale_factor.shape[0] == 4
boxes
=
[]
scores
=
[]
for
b
in
boundaries
:
sz
=
len
(
b
)
valid_boundary
(
b
,
True
)
scores
.
append
(
b
[
-
1
])
b
=
(
np
.
array
(
b
[:
sz
-
1
])
*
(
np
.
tile
(
scale_factor
[:
2
],
int
(
(
sz
-
1
)
/
2
)).
reshape
(
1
,
sz
-
1
))).
flatten
().
tolist
()
boxes
.
append
(
np
.
array
(
b
).
reshape
([
-
1
,
2
]))
return
np
.
array
(
boxes
,
dtype
=
np
.
float32
),
scores
def
get_boundary
(
self
,
score_maps
,
shape_list
):
assert
len
(
score_maps
)
==
len
(
self
.
scales
)
# import pdb;pdb.set_trace()
boundaries
=
[]
for
idx
,
score_map
in
enumerate
(
score_maps
):
scale
=
self
.
scales
[
idx
]
boundaries
=
boundaries
+
self
.
_get_boundary_single
(
score_map
,
scale
)
# nms
boundaries
=
poly_nms
(
boundaries
,
self
.
nms_thr
)
# if rescale:
# import pdb;pdb.set_trace()
boundaries
,
scores
=
self
.
resize_boundary
(
boundaries
,
(
1
/
shape_list
[
0
,
2
:]).
tolist
()[::
-
1
])
boxes_batch
=
[
dict
(
points
=
boundaries
,
scores
=
scores
)]
return
boxes_batch
def
_get_boundary_single
(
self
,
score_map
,
scale
):
assert
len
(
score_map
)
==
2
assert
score_map
[
1
].
shape
[
1
]
==
4
*
self
.
fourier_degree
+
2
return
fcenet_decode
(
preds
=
score_map
,
fourier_degree
=
self
.
fourier_degree
,
num_reconstr_points
=
self
.
num_reconstr_points
,
scale
=
scale
,
alpha
=
self
.
alpha
,
beta
=
self
.
beta
,
text_repr_type
=
self
.
text_repr_type
,
score_thr
=
self
.
score_thr
,
nms_thr
=
self
.
nms_thr
)
train.sh
View file @
9f62b610
# recommended paddle.__version__ == 2.0.0
# recommended paddle.__version__ == 2.0.0
python3
-m
paddle.distributed.launch
--log_dir
=
./debug/
--gpus
'0,1,2,3,4,5,6,7'
tools/train.py
-c
configs/rec/rec_mv3_none_bilstm_ctc.yml
# python3 -m paddle.distributed.launch --log_dir=./debug/ --gpus '0,1,2,3,4,5,6,7' tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml
python
-m
paddle.distributed.launch
--gpus
'7'
tools/train.py
-c
configs/det/det_r50_fce_ctw.yml
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment