Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
de88d73d
Commit
de88d73d
authored
Oct 09, 2021
by
LDOUBLEV
Browse files
add kie
parent
c5f33b00
Changes
21
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1034 additions
and
25 deletions
+1034
-25
configs/e2e/e2e_r50_vd_pg.yml
configs/e2e/e2e_r50_vd_pg.yml
+2
-2
configs/kie/kie_unet_sdmgr.yml
configs/kie/kie_unet_sdmgr.yml
+111
-0
doc/doc_ch/pgnet.md
doc/doc_ch/pgnet.md
+3
-3
doc/doc_en/pgnet_en.md
doc/doc_en/pgnet_en.md
+3
-3
ppocr/data/imaug/label_ops.py
ppocr/data/imaug/label_ops.py
+172
-7
ppocr/data/imaug/operators.py
ppocr/data/imaug/operators.py
+46
-1
ppocr/losses/__init__.py
ppocr/losses/__init__.py
+6
-1
ppocr/losses/kie_sdmgr_loss.py
ppocr/losses/kie_sdmgr_loss.py
+113
-0
ppocr/metrics/__init__.py
ppocr/metrics/__init__.py
+4
-1
ppocr/metrics/kie_metric.py
ppocr/metrics/kie_metric.py
+60
-0
ppocr/modeling/backbones/__init__.py
ppocr/modeling/backbones/__init__.py
+4
-0
ppocr/modeling/backbones/kie_unet_sdmgr.py
ppocr/modeling/backbones/kie_unet_sdmgr.py
+153
-0
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+6
-2
ppocr/modeling/heads/kie_sdmgr_head.py
ppocr/modeling/heads/kie_sdmgr_head.py
+206
-0
ppocr/postprocess/__init__.py
ppocr/postprocess/__init__.py
+2
-0
pretrain_model/iter_epoch_60.pdopt
pretrain_model/iter_epoch_60.pdopt
+0
-0
pretrain_model/iter_epoch_60.pdparams
pretrain_model/iter_epoch_60.pdparams
+0
-0
pretrain_model/iter_epoch_60.states
pretrain_model/iter_epoch_60.states
+0
-0
tools/infer_kie.py
tools/infer_kie.py
+137
-0
tools/program.py
tools/program.py
+6
-5
No files found.
configs/e2e/e2e_r50_vd_pg.yml
View file @
de88d73d
...
@@ -69,7 +69,7 @@ Metric:
...
@@ -69,7 +69,7 @@ Metric:
Train
:
Train
:
dataset
:
dataset
:
name
:
PGDataSet
name
:
PGDataSet
label_file_list
:
[
.././train_data/total_text/train/
]
label_file_list
:
[
.././train_data/total_text/train/
total_text.txt
]
ratio_list
:
[
1.0
]
ratio_list
:
[
1.0
]
data_format
:
icdar
#two data format: icdar/textnet
data_format
:
icdar
#two data format: icdar/textnet
transforms
:
transforms
:
...
@@ -93,7 +93,7 @@ Eval:
...
@@ -93,7 +93,7 @@ Eval:
dataset
:
dataset
:
name
:
PGDataSet
name
:
PGDataSet
data_dir
:
./train_data/
data_dir
:
./train_data/
label_file_list
:
[
./train_data/total_text/test/
]
label_file_list
:
[
./train_data/total_text/test/
total_text.txt
]
transforms
:
transforms
:
-
DecodeImage
:
# load image
-
DecodeImage
:
# load image
img_mode
:
RGB
img_mode
:
RGB
...
...
configs/kie/kie_unet_sdmgr.yml
0 → 100644
View file @
de88d73d
Global
:
use_gpu
:
True
epoch_num
:
300
log_smooth_window
:
20
print_batch_step
:
50
save_model_dir
:
./output/kie_5/
save_epoch_step
:
50
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
0
,
80
]
# 1. If pretrained_model is saved in static mode, such as classification pretrained model
# from static branch, load_static_weights must be set as True.
# 2. If you want to finetune the pretrained models we provide in the docs,
# you should set load_static_weights as False.
load_static_weights
:
False
cal_metric_during_train
:
False
pretrained_model
:
./output/kie_4/best_accuracy
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
class_path
:
./train_data/wildreceipt/class_list.txt
infer_img
:
./train_data/wildreceipt/1.txt
save_res_path
:
./output/sdmgr_kie/predicts_kie.txt
img_scale
:
[
1024
,
512
]
Architecture
:
model_type
:
kie
algorithm
:
SDMGR
Transform
:
Backbone
:
name
:
Kie_backbone
Head
:
name
:
SDMGRHead
Loss
:
name
:
SDMGRLoss
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
name
:
Piecewise
learning_rate
:
0.001
decay_epochs
:
[
60
,
80
,
100
]
values
:
[
0.001
,
0.0001
,
0.00001
]
warmup_epoch
:
2
regularizer
:
name
:
'
L2'
factor
:
0.00005
PostProcess
:
name
:
None
Metric
:
name
:
KIEMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/wildreceipt/
label_file_list
:
[
'
./train_data/wildreceipt/wildreceipt_train.txt'
]
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
KieLabelEncode
:
# Class handling label
character_dict_path
:
./train_data/wildreceipt/dict.txt
-
KieResize
:
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
relations'
,
'
texts'
,
'
points'
,
'
labels'
,
'
tag'
,
'
shape'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
4
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/wildreceipt
label_file_list
:
-
./train_data/wildreceipt/wildreceipt_test.txt
# - /paddle/data/PaddleOCR/train_data/wildreceipt/1.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
RGB
channel_first
:
False
-
KieLabelEncode
:
# Class handling label
character_dict_path
:
./train_data/wildreceipt/dict.txt
-
KieResize
:
-
NormalizeImage
:
scale
:
1
mean
:
[
123.675
,
116.28
,
103.53
]
std
:
[
58.395
,
57.12
,
57.375
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
relations'
,
'
texts'
,
'
points'
,
'
labels'
,
'
tag'
,
'
ori_image'
,
'
ori_boxes'
,
'
shape'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
4
\ No newline at end of file
doc/doc_ch/pgnet.md
View file @
de88d73d
...
@@ -87,15 +87,15 @@ python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/im
...
@@ -87,15 +87,15 @@ python3 tools/infer/predict_e2e.py --e2e_algorithm="PGNet" --image_dir="./doc/im
```
```
/PaddleOCR/train_data/total_text/train/
/PaddleOCR/train_data/total_text/train/
|- rgb/ # total_text数据集的训练数据
|- rgb/ # total_text数据集的训练数据
|-
gt_0.pn
g
|-
img11.jp
g
| ...
| ...
|- t
otal_text.txt
# total_text数据集的训练标注
|- t
rain.txt
# total_text数据集的训练标注
```
```
total_text.txt标注文件格式如下,文件名和标注信息中间用"
\t
"分隔:
total_text.txt标注文件格式如下,文件名和标注信息中间用"
\t
"分隔:
```
```
" 图像文件名 json.dumps编码的图像标注信息"
" 图像文件名 json.dumps编码的图像标注信息"
rgb/
gt_0.pn
g [{"transcription": "
EST
", "points": [[1
00
4.0,
689.0],[1019.0,698.0],[1034.0,708.0],[1049.0,718.0],[1064.0,728.0],[1079.0,738.0],[1095.0,748.0],[1094.0,774.0],[1079.0,765.0],[1065.0,756.0],[1050.0,747.0],[1036.0,738.0],[1021.0,729.0],[1007.0,721
.0]]}, {...}]
rgb/
img11.jp
g [{"transcription": "
ASRAMA
", "points": [[
2
14.0,
325.0], [235.0, 308.0], [259.0, 296.0], [286.0, 291.0], [313.0, 295.0], [338.0, 305.0], [362.0, 320.0], [349.0, 347.0], [330.0, 337.0], [310.0, 329.0], [290.0, 324.0], [269.0, 328.0], [249.0, 336.0], [231.0, 346
.0]]}, {...}]
```
```
json.dumps编码前的图像标注信息是包含多个字典的list,字典中的
`points`
表示文本框的四个点的坐标(x, y),从左上角的点开始顺时针排列。
json.dumps编码前的图像标注信息是包含多个字典的list,字典中的
`points`
表示文本框的四个点的坐标(x, y),从左上角的点开始顺时针排列。
`transcription`
表示当前文本框的文字,
**当其内容为“###”时,表示该文本框无效,在训练时会跳过。**
`transcription`
表示当前文本框的文字,
**当其内容为“###”时,表示该文本框无效,在训练时会跳过。**
...
...
doc/doc_en/pgnet_en.md
View file @
de88d73d
...
@@ -80,15 +80,15 @@ Download and unzip [totaltext](https://github.com/cs-chan/Total-Text-Dataset/blo
...
@@ -80,15 +80,15 @@ Download and unzip [totaltext](https://github.com/cs-chan/Total-Text-Dataset/blo
```
```
/PaddleOCR/train_data/total_text/train/
/PaddleOCR/train_data/total_text/train/
|- rgb/ # total_text training data of dataset
|- rgb/ # total_text training data of dataset
|-
gt_0
.png
|-
img11
.png
| ...
| ...
|- t
otal_text.txt
# total_text training annotation of dataset
|- t
rain.txt
# total_text training annotation of dataset
```
```
total_text.txt: the format of dimension file is as follows,the file name and annotation information are separated by "
\t
":
total_text.txt: the format of dimension file is as follows,the file name and annotation information are separated by "
\t
":
```
```
" Image file name Image annotation information encoded by json.dumps"
" Image file name Image annotation information encoded by json.dumps"
rgb/
gt_0.pn
g [{"transcription": "
EST
", "points": [[1
00
4.0,
689.0],[1019.0,698.0],[1034.0,708.0],[1049.0,718.0],[1064.0,728.0],[1079.0,738.0],[1095.0,748.0],[1094.0,774.0],[1079.0,765.0],[1065.0,756.0],[1050.0,747.0],[1036.0,738.0],[1021.0,729.0],[1007.0,721
.0]]}, {...}]
rgb/
img11.jp
g [{"transcription": "
ASRAMA
", "points": [[
2
14.0,
325.0], [235.0, 308.0], [259.0, 296.0], [286.0, 291.0], [313.0, 295.0], [338.0, 305.0], [362.0, 320.0], [349.0, 347.0], [330.0, 337.0], [310.0, 329.0], [290.0, 324.0], [269.0, 328.0], [249.0, 336.0], [231.0, 346
.0]]}, {...}]
```
```
The image annotation after
**json.dumps()**
encoding is a list containing multiple dictionaries.
The image annotation after
**json.dumps()**
encoding is a list containing multiple dictionaries.
...
...
ppocr/data/imaug/label_ops.py
View file @
de88d73d
...
@@ -19,6 +19,7 @@ from __future__ import unicode_literals
...
@@ -19,6 +19,7 @@ from __future__ import unicode_literals
import
numpy
as
np
import
numpy
as
np
import
string
import
string
from
shapely.geometry
import
LineString
,
Point
,
Polygon
class
ClsLabelEncode
(
object
):
class
ClsLabelEncode
(
object
):
...
@@ -200,19 +201,183 @@ class E2ELabelEncode(BaseRecLabelEncode):
...
@@ -200,19 +201,183 @@ class E2ELabelEncode(BaseRecLabelEncode):
self
.
pad_num
=
len
(
self
.
dict
)
# the length to pad
self
.
pad_num
=
len
(
self
.
dict
)
# the length to pad
def
__call__
(
self
,
data
):
def
__call__
(
self
,
data
):
text_label_index_list
,
temp_text
=
[],
[]
texts
=
data
[
'strs'
]
texts
=
data
[
'strs'
]
temp_texts
=
[]
for
text
in
texts
:
for
text
in
texts
:
text
=
text
.
lower
()
text
=
text
.
lower
()
text
=
self
.
encode
(
text
)
temp_text
=
[]
if
text
is
None
:
for
c_
in
text
:
return
None
if
c_
in
self
.
dict
:
text
=
text
+
[
self
.
pad_num
]
*
(
self
.
max_text_len
-
len
(
text
))
temp_text
.
append
(
self
.
dict
[
c_
])
temp_texts
.
append
(
text
)
temp_text
=
temp_text
+
[
self
.
pad_num
]
*
(
self
.
max_text_len
-
data
[
'strs'
]
=
np
.
array
(
temp_texts
)
len
(
temp_text
))
text_label_index_list
.
append
(
temp_text
)
data
[
'strs'
]
=
np
.
array
(
text_label_index_list
)
return
data
return
data
class
KieLabelEncode
(
object
):
def
__init__
(
self
,
character_dict_path
,
norm
=
10
,
directed
=
False
,
**
kwargs
):
super
(
KieLabelEncode
,
self
).
__init__
()
self
.
dict
=
dict
({
''
:
0
})
with
open
(
character_dict_path
,
'r'
)
as
fr
:
idx
=
1
for
line
in
fr
:
char
=
line
.
strip
()
self
.
dict
[
char
]
=
idx
idx
+=
1
self
.
norm
=
norm
self
.
directed
=
directed
def
compute_relation
(
self
,
boxes
):
"""Compute relation between every two boxes."""
x1s
,
y1s
=
boxes
[:,
0
:
1
],
boxes
[:,
1
:
2
]
x2s
,
y2s
=
boxes
[:,
4
:
5
],
boxes
[:,
5
:
6
]
ws
,
hs
=
x2s
-
x1s
+
1
,
np
.
maximum
(
y2s
-
y1s
+
1
,
1
)
dxs
=
(
x1s
[:,
0
][
None
]
-
x1s
)
/
self
.
norm
dys
=
(
y1s
[:,
0
][
None
]
-
y1s
)
/
self
.
norm
xhhs
,
xwhs
=
hs
[:,
0
][
None
]
/
hs
,
ws
[:,
0
][
None
]
/
hs
whs
=
ws
/
hs
+
np
.
zeros_like
(
xhhs
)
relations
=
np
.
stack
([
dxs
,
dys
,
whs
,
xhhs
,
xwhs
],
-
1
)
bboxes
=
np
.
concatenate
([
x1s
,
y1s
,
x2s
,
y2s
],
-
1
).
astype
(
np
.
float32
)
return
relations
,
bboxes
def
pad_text_indices
(
self
,
text_inds
):
"""Pad text index to same length."""
max_len
=
100
recoder_len
=
max
([
len
(
text_ind
)
for
text_ind
in
text_inds
])
padded_text_inds
=
-
np
.
ones
((
len
(
text_inds
),
max_len
),
np
.
int32
)
for
idx
,
text_ind
in
enumerate
(
text_inds
):
padded_text_inds
[
idx
,
:
len
(
text_ind
)]
=
np
.
array
(
text_ind
)
return
padded_text_inds
,
recoder_len
def
list_to_numpy
(
self
,
ann_infos
):
"""Convert bboxes, relations, texts and labels to ndarray."""
boxes
,
text_inds
=
ann_infos
[
'points'
],
ann_infos
[
'text_inds'
]
boxes
=
np
.
array
(
boxes
,
np
.
int32
)
relations
,
bboxes
=
self
.
compute_relation
(
boxes
)
labels
=
ann_infos
.
get
(
'labels'
,
None
)
if
labels
is
not
None
:
labels
=
np
.
array
(
labels
,
np
.
int32
)
edges
=
ann_infos
.
get
(
'edges'
,
None
)
if
edges
is
not
None
:
labels
=
labels
[:,
None
]
edges
=
np
.
array
(
edges
)
edges
=
(
edges
[:,
None
]
==
edges
[
None
,
:]).
astype
(
np
.
int32
)
if
self
.
directed
:
edges
=
(
edges
&
labels
==
1
).
astype
(
np
.
int32
)
np
.
fill_diagonal
(
edges
,
-
1
)
labels
=
np
.
concatenate
([
labels
,
edges
],
-
1
)
padded_text_inds
,
recoder_len
=
self
.
pad_text_indices
(
text_inds
)
max_num
=
100
temp_bboxes
=
np
.
zeros
([
max_num
,
4
])
h
,
_
=
bboxes
.
shape
temp_bboxes
[:
h
,
:
h
]
=
bboxes
temp_relations
=
np
.
zeros
([
max_num
,
max_num
,
5
])
temp_relations
[:
h
,
:
h
,
:]
=
relations
temp_padded_text_inds
=
np
.
zeros
([
max_num
,
100
])
temp_padded_text_inds
[:
h
,
:]
=
padded_text_inds
temp_labels
=
np
.
zeros
([
max_num
,
100
])
temp_labels
[:
h
,
:
h
+
1
]
=
labels
tag
=
np
.
array
([
h
,
recoder_len
])
return
dict
(
image
=
ann_infos
[
'image'
],
points
=
temp_bboxes
,
relations
=
temp_relations
,
texts
=
temp_padded_text_inds
,
labels
=
temp_labels
,
tag
=
tag
)
def
convert_canonical
(
self
,
points_x
,
points_y
):
assert
len
(
points_x
)
==
4
assert
len
(
points_y
)
==
4
points
=
[
Point
(
points_x
[
i
],
points_y
[
i
])
for
i
in
range
(
4
)]
polygon
=
Polygon
([(
p
.
x
,
p
.
y
)
for
p
in
points
])
min_x
,
min_y
,
_
,
_
=
polygon
.
bounds
points_to_lefttop
=
[
LineString
([
points
[
i
],
Point
(
min_x
,
min_y
)])
for
i
in
range
(
4
)
]
distances
=
np
.
array
([
line
.
length
for
line
in
points_to_lefttop
])
sort_dist_idx
=
np
.
argsort
(
distances
)
lefttop_idx
=
sort_dist_idx
[
0
]
if
lefttop_idx
==
0
:
point_orders
=
[
0
,
1
,
2
,
3
]
elif
lefttop_idx
==
1
:
point_orders
=
[
1
,
2
,
3
,
0
]
elif
lefttop_idx
==
2
:
point_orders
=
[
2
,
3
,
0
,
1
]
else
:
point_orders
=
[
3
,
0
,
1
,
2
]
sorted_points_x
=
[
points_x
[
i
]
for
i
in
point_orders
]
sorted_points_y
=
[
points_y
[
j
]
for
j
in
point_orders
]
return
sorted_points_x
,
sorted_points_y
def
sort_vertex
(
self
,
points_x
,
points_y
):
assert
len
(
points_x
)
==
4
assert
len
(
points_y
)
==
4
x
=
np
.
array
(
points_x
)
y
=
np
.
array
(
points_y
)
center_x
=
np
.
sum
(
x
)
*
0.25
center_y
=
np
.
sum
(
y
)
*
0.25
x_arr
=
np
.
array
(
x
-
center_x
)
y_arr
=
np
.
array
(
y
-
center_y
)
angle
=
np
.
arctan2
(
y_arr
,
x_arr
)
*
180.0
/
np
.
pi
sort_idx
=
np
.
argsort
(
angle
)
sorted_points_x
,
sorted_points_y
=
[],
[]
for
i
in
range
(
4
):
sorted_points_x
.
append
(
points_x
[
sort_idx
[
i
]])
sorted_points_y
.
append
(
points_y
[
sort_idx
[
i
]])
return
self
.
convert_canonical
(
sorted_points_x
,
sorted_points_y
)
def
__call__
(
self
,
data
):
import
json
label
=
data
[
'label'
]
annotations
=
json
.
loads
(
label
)
boxes
,
texts
,
text_inds
,
labels
,
edges
=
[],
[],
[],
[],
[]
for
ann
in
annotations
:
box
=
ann
[
'points'
]
x_list
=
[
box
[
i
][
0
]
for
i
in
range
(
4
)]
y_list
=
[
box
[
i
][
1
]
for
i
in
range
(
4
)]
sorted_x_list
,
sorted_y_list
=
self
.
sort_vertex
(
x_list
,
y_list
)
sorted_box
=
[]
for
x
,
y
in
zip
(
sorted_x_list
,
sorted_y_list
):
sorted_box
.
append
(
x
)
sorted_box
.
append
(
y
)
boxes
.
append
(
sorted_box
)
text
=
ann
[
'transcription'
]
texts
.
append
(
ann
[
'transcription'
])
text_ind
=
[
self
.
dict
[
c
]
for
c
in
text
if
c
in
self
.
dict
]
text_inds
.
append
(
text_ind
)
labels
.
append
(
ann
[
'label'
])
edges
.
append
(
ann
.
get
(
'edge'
,
0
))
ann_infos
=
dict
(
image
=
data
[
'image'
],
points
=
boxes
,
texts
=
texts
,
text_inds
=
text_inds
,
edges
=
edges
,
labels
=
labels
)
return
self
.
list_to_numpy
(
ann_infos
)
class
AttnLabelEncode
(
BaseRecLabelEncode
):
class
AttnLabelEncode
(
BaseRecLabelEncode
):
""" Convert between text-label and text-index """
""" Convert between text-label and text-index """
...
...
ppocr/data/imaug/operators.py
View file @
de88d73d
...
@@ -77,7 +77,6 @@ class NormalizeImage(object):
...
@@ -77,7 +77,6 @@ class NormalizeImage(object):
from
PIL
import
Image
from
PIL
import
Image
if
isinstance
(
img
,
Image
.
Image
):
if
isinstance
(
img
,
Image
.
Image
):
img
=
np
.
array
(
img
)
img
=
np
.
array
(
img
)
assert
isinstance
(
img
,
assert
isinstance
(
img
,
np
.
ndarray
),
"invalid input 'img' in NormalizeImage"
np
.
ndarray
),
"invalid input 'img' in NormalizeImage"
data
[
'image'
]
=
(
data
[
'image'
]
=
(
...
@@ -290,3 +289,49 @@ class E2EResizeForTest(object):
...
@@ -290,3 +289,49 @@ class E2EResizeForTest(object):
ratio_w
=
resize_w
/
float
(
w
)
ratio_w
=
resize_w
/
float
(
w
)
return
im
,
(
ratio_h
,
ratio_w
)
return
im
,
(
ratio_h
,
ratio_w
)
class
KieResize
(
object
):
def
__init__
(
self
,
**
kwargs
):
super
(
KieResize
,
self
).
__init__
()
self
.
max_side
,
self
.
min_side
=
kwargs
[
'img_scale'
][
0
],
kwargs
[
'img_scale'
][
1
]
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
points
=
data
[
'points'
]
src_h
,
src_w
,
_
=
img
.
shape
im_resized
,
scale_factor
,
[
ratio_h
,
ratio_w
]
=
self
.
resize_image
(
img
)
resize_points
=
self
.
resize_boxes
(
img
,
points
,
scale_factor
)
data
[
'ori_image'
]
=
img
data
[
'ori_boxes'
]
=
points
data
[
'points'
]
=
resize_points
data
[
'image'
]
=
im_resized
data
[
'shape'
]
=
np
.
array
([
src_h
,
src_w
,
ratio_h
,
ratio_w
])
return
data
def
resize_image
(
self
,
img
):
norm_img
=
np
.
zeros
([
1024
,
512
,
3
],
dtype
=
'float32'
)
scale
=
[
512
,
1024
]
h
,
w
=
img
.
shape
[:
2
]
max_long_edge
=
max
(
scale
)
max_short_edge
=
min
(
scale
)
scale_factor
=
min
(
max_long_edge
/
max
(
h
,
w
),
max_short_edge
/
min
(
h
,
w
))
new_size
=
(
int
(
w
*
float
(
scale_factor
)
+
0.5
),
int
(
h
*
float
(
scale_factor
)
+
0.5
))
im
=
cv2
.
resize
(
img
,
new_size
)
new_h
,
new_w
=
im
.
shape
[:
2
]
w_scale
=
new_w
/
w
h_scale
=
new_h
/
h
scale_factor
=
np
.
array
(
[
w_scale
,
h_scale
,
w_scale
,
h_scale
],
dtype
=
np
.
float32
)
norm_img
[:
new_h
,
:
new_w
,
:]
=
im
return
norm_img
,
scale_factor
,
[
h_scale
,
w_scale
]
def
resize_boxes
(
self
,
im
,
points
,
scale_factor
):
points
=
points
*
scale_factor
img_shape
=
im
.
shape
[:
2
]
points
[:,
0
::
2
]
=
np
.
clip
(
points
[:,
0
::
2
],
0
,
img_shape
[
1
])
points
[:,
1
::
2
]
=
np
.
clip
(
points
[:,
1
::
2
],
0
,
img_shape
[
0
])
return
points
ppocr/losses/__init__.py
View file @
de88d73d
...
@@ -31,9 +31,14 @@ def build_loss(config):
...
@@ -31,9 +31,14 @@ def build_loss(config):
# e2e loss
# e2e loss
from
.e2e_pg_loss
import
PGLoss
from
.e2e_pg_loss
import
PGLoss
# kie loss
from
.kie_sdmgr_loss
import
SDMGRLoss
support_dict
=
[
support_dict
=
[
'DBLoss'
,
'EASTLoss'
,
'SASTLoss'
,
'CTCLoss'
,
'ClsLoss'
,
'AttentionLoss'
,
'DBLoss'
,
'EASTLoss'
,
'SASTLoss'
,
'CTCLoss'
,
'ClsLoss'
,
'AttentionLoss'
,
'SRNLoss'
,
'PGLoss'
]
'SRNLoss'
,
'PGLoss'
,
'SDMGRLoss'
]
config
=
copy
.
deepcopy
(
config
)
config
=
copy
.
deepcopy
(
config
)
module_name
=
config
.
pop
(
'name'
)
module_name
=
config
.
pop
(
'name'
)
...
...
ppocr/losses/kie_sdmgr_loss.py
0 → 100644
View file @
de88d73d
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle
import
nn
import
paddle
class
SDMGRLoss
(
nn
.
Layer
):
def
__init__
(
self
,
node_weight
=
1.0
,
edge_weight
=
1.0
,
ignore
=
0
):
super
().
__init__
()
self
.
loss_node
=
nn
.
CrossEntropyLoss
(
ignore_index
=
ignore
)
self
.
loss_edge
=
nn
.
CrossEntropyLoss
(
ignore_index
=-
1
)
self
.
node_weight
=
node_weight
self
.
edge_weight
=
edge_weight
self
.
ignore
=
ignore
def
pre_process
(
self
,
gts
,
tag
):
gts
,
tag
=
gts
.
numpy
(),
tag
.
numpy
().
tolist
()
temp_gts
=
[]
batch
=
len
(
tag
)
for
i
in
range
(
batch
):
num
,
recoder_len
=
tag
[
i
][
0
],
tag
[
i
][
1
]
temp_gts
.
append
(
paddle
.
to_tensor
(
gts
[
i
,
:
num
,
:
num
+
1
],
dtype
=
'int64'
))
return
temp_gts
def
accuracy
(
self
,
pred
,
target
,
topk
=
1
,
thresh
=
None
):
"""Calculate accuracy according to the prediction and target.
Args:
pred (torch.Tensor): The model prediction, shape (N, num_class)
target (torch.Tensor): The target of each prediction, shape (N, )
topk (int | tuple[int], optional): If the predictions in ``topk``
matches the target, the predictions will be regarded as
correct ones. Defaults to 1.
thresh (float, optional): If not None, predictions with scores under
this threshold are considered incorrect. Default to None.
Returns:
float | tuple[float]: If the input ``topk`` is a single integer,
the function will return a single float as accuracy. If
``topk`` is a tuple containing multiple integers, the
function will return a tuple containing accuracies of
each ``topk`` number.
"""
assert
isinstance
(
topk
,
(
int
,
tuple
))
if
isinstance
(
topk
,
int
):
topk
=
(
topk
,
)
return_single
=
True
else
:
return_single
=
False
maxk
=
max
(
topk
)
if
pred
.
shape
[
0
]
==
0
:
accu
=
[
pred
.
new_tensor
(
0.
)
for
i
in
range
(
len
(
topk
))]
return
accu
[
0
]
if
return_single
else
accu
pred_value
,
pred_label
=
paddle
.
topk
(
pred
,
maxk
,
axis
=
1
)
pred_label
=
pred_label
.
transpose
(
[
1
,
0
])
# transpose to shape (maxk, N)
correct
=
paddle
.
equal
(
pred_label
,
(
target
.
reshape
([
1
,
-
1
]).
expand_as
(
pred_label
)))
res
=
[]
for
k
in
topk
:
correct_k
=
paddle
.
sum
(
correct
[:
k
].
reshape
([
-
1
]).
astype
(
'float32'
),
axis
=
0
,
keepdim
=
True
)
res
.
append
(
paddle
.
multiply
(
correct_k
,
paddle
.
to_tensor
(
100.0
/
pred
.
shape
[
0
])))
return
res
[
0
]
if
return_single
else
res
def
forward
(
self
,
pred
,
batch
):
node_preds
,
edge_preds
=
pred
gts
,
tag
=
batch
[
4
],
batch
[
5
]
gts
=
self
.
pre_process
(
gts
,
tag
)
node_gts
,
edge_gts
=
[],
[]
for
gt
in
gts
:
node_gts
.
append
(
gt
[:,
0
])
edge_gts
.
append
(
gt
[:,
1
:].
reshape
([
-
1
]))
node_gts
=
paddle
.
concat
(
node_gts
)
edge_gts
=
paddle
.
concat
(
edge_gts
)
node_valids
=
paddle
.
nonzero
(
node_gts
!=
self
.
ignore
).
reshape
([
-
1
])
edge_valids
=
paddle
.
nonzero
(
edge_gts
!=
-
1
).
reshape
([
-
1
])
loss_node
=
self
.
loss_node
(
node_preds
,
node_gts
)
loss_edge
=
self
.
loss_edge
(
edge_preds
,
edge_gts
)
loss
=
self
.
node_weight
*
loss_node
+
self
.
edge_weight
*
loss_edge
return
dict
(
loss
=
loss
,
loss_node
=
loss_node
,
loss_edge
=
loss_edge
,
acc_node
=
self
.
accuracy
(
paddle
.
gather
(
node_preds
,
node_valids
),
paddle
.
gather
(
node_gts
,
node_valids
)),
acc_edge
=
self
.
accuracy
(
paddle
.
gather
(
edge_preds
,
edge_valids
),
paddle
.
gather
(
edge_gts
,
edge_valids
)))
ppocr/metrics/__init__.py
View file @
de88d73d
...
@@ -27,8 +27,11 @@ def build_metric(config):
...
@@ -27,8 +27,11 @@ def build_metric(config):
from
.rec_metric
import
RecMetric
from
.rec_metric
import
RecMetric
from
.cls_metric
import
ClsMetric
from
.cls_metric
import
ClsMetric
from
.e2e_metric
import
E2EMetric
from
.e2e_metric
import
E2EMetric
from
.kie_metric
import
KIEMetric
support_dict
=
[
'DetMetric'
,
'RecMetric'
,
'ClsMetric'
,
'E2EMetric'
]
support_dict
=
[
'DetMetric'
,
'RecMetric'
,
'ClsMetric'
,
'E2EMetric'
,
'KIEMetric'
]
config
=
copy
.
deepcopy
(
config
)
config
=
copy
.
deepcopy
(
config
)
module_name
=
config
.
pop
(
'name'
)
module_name
=
config
.
pop
(
'name'
)
...
...
ppocr/metrics/kie_metric.py
0 → 100644
View file @
de88d73d
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
__all__
=
[
'KIEMetric'
]
class
KIEMetric
(
object
):
def
__init__
(
self
,
main_indicator
=
'hmean'
,
**
kwargs
):
self
.
main_indicator
=
main_indicator
self
.
reset
()
def
__call__
(
self
,
preds
,
batch
,
**
kwargs
):
nodes
,
_
=
preds
gts
,
tag
=
batch
[
4
].
squeeze
(
0
),
batch
[
5
].
tolist
()[
0
]
gts
=
gts
[:
tag
[
0
],
:
1
].
reshape
([
-
1
])
result
=
self
.
compute_f1_score
(
nodes
,
gts
)
self
.
results
.
append
(
result
)
def
compute_f1_score
(
self
,
preds
,
gts
):
preds
=
preds
.
numpy
()
ignores
=
[
0
,
2
,
4
,
6
,
8
,
10
,
12
,
14
,
16
,
18
,
20
,
22
,
24
,
25
]
C
=
preds
.
shape
[
1
]
classes
=
np
.
array
(
sorted
(
set
(
range
(
C
))
-
set
(
ignores
)))
hist
=
np
.
bincount
(
(
gts
*
C
).
astype
(
'int64'
)
+
preds
.
argmax
(
1
),
minlength
=
C
**
2
).
reshape
([
C
,
C
]).
astype
(
'float32'
)
diag
=
np
.
diag
(
hist
)
recalls
=
diag
/
hist
.
sum
(
1
).
clip
(
min
=
1
)
precisions
=
diag
/
hist
.
sum
(
0
).
clip
(
min
=
1
)
f1
=
2
*
recalls
*
precisions
/
(
recalls
+
precisions
).
clip
(
min
=
1e-8
)
return
f1
[
classes
]
def
combine_results
(
self
,
results
):
data
=
{
'hmean'
:
np
.
mean
(
results
[
0
])}
return
data
def
get_metric
(
self
):
metircs
=
self
.
combine_results
(
self
.
results
)
self
.
reset
()
return
metircs
def
reset
(
self
):
self
.
results
=
[]
# clear results
ppocr/modeling/backbones/__init__.py
View file @
de88d73d
...
@@ -29,6 +29,10 @@ def build_backbone(config, model_type):
...
@@ -29,6 +29,10 @@ def build_backbone(config, model_type):
elif
model_type
==
'e2e'
:
elif
model_type
==
'e2e'
:
from
.e2e_resnet_vd_pg
import
ResNet
from
.e2e_resnet_vd_pg
import
ResNet
support_dict
=
[
'ResNet'
]
support_dict
=
[
'ResNet'
]
elif
model_type
==
'kie'
:
from
.kie_unet_sdmgr
import
Kie_backbone
support_dict
=
[
'Kie_backbone'
]
else
:
else
:
raise
NotImplementedError
raise
NotImplementedError
...
...
ppocr/modeling/backbones/kie_unet_sdmgr.py
0 → 100644
View file @
de88d73d
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
__all__
=
[
"Kie_backbone"
]
class
Encoder
(
nn
.
Layer
):
def
__init__
(
self
,
num_channels
,
num_filters
):
super
(
Encoder
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2D
(
num_channels
,
num_filters
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn1
=
nn
.
BatchNorm
(
num_filters
,
act
=
'relu'
)
self
.
conv2
=
nn
.
Conv2D
(
num_filters
,
num_filters
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn2
=
nn
.
BatchNorm
(
num_filters
,
act
=
'relu'
)
self
.
pool
=
nn
.
MaxPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
def
forward
(
self
,
inputs
):
x
=
self
.
conv1
(
inputs
)
x
=
self
.
bn1
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
bn2
(
x
)
x_pooled
=
self
.
pool
(
x
)
return
x
,
x_pooled
class
Decoder
(
nn
.
Layer
):
def
__init__
(
self
,
num_channels
,
num_filters
):
super
(
Decoder
,
self
).
__init__
()
self
.
up
=
nn
.
Conv2DTranspose
(
in_channels
=
num_channels
,
out_channels
=
num_filters
,
kernel_size
=
2
,
stride
=
2
)
self
.
conv1
=
nn
.
Conv2D
(
num_channels
,
num_filters
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn1
=
nn
.
BatchNorm
(
num_filters
,
act
=
'relu'
)
self
.
conv2
=
nn
.
Conv2D
(
num_filters
,
num_filters
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn2
=
nn
.
BatchNorm
(
num_filters
,
act
=
'relu'
)
def
forward
(
self
,
inputs_prev
,
inputs
):
x
=
self
.
up
(
inputs
)
x
=
paddle
.
concat
([
inputs_prev
,
x
],
axis
=
1
)
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
bn2
(
x
)
return
x
class
UNet
(
nn
.
Layer
):
def
__init__
(
self
):
super
(
UNet
,
self
).
__init__
()
self
.
down1
=
Encoder
(
num_channels
=
3
,
num_filters
=
16
)
self
.
down2
=
Encoder
(
num_channels
=
16
,
num_filters
=
32
)
self
.
down3
=
Encoder
(
num_channels
=
32
,
num_filters
=
64
)
self
.
down4
=
Encoder
(
num_channels
=
64
,
num_filters
=
128
)
self
.
down5
=
Encoder
(
num_channels
=
128
,
num_filters
=
256
)
self
.
up4
=
Decoder
(
256
,
128
)
self
.
up3
=
Decoder
(
128
,
64
)
self
.
up2
=
Decoder
(
64
,
32
)
self
.
up1
=
Decoder
(
32
,
16
)
self
.
out_channels
=
16
def
forward
(
self
,
inputs
):
x1
,
x
=
self
.
down1
(
inputs
)
x2
,
x
=
self
.
down2
(
x
)
x3
,
x
=
self
.
down3
(
x
)
x4
,
x
=
self
.
down4
(
x
)
x5
,
x
=
self
.
down5
(
x
)
x
=
self
.
up4
(
x4
,
x5
)
x
=
self
.
up3
(
x3
,
x
)
x
=
self
.
up2
(
x2
,
x
)
x
=
self
.
up1
(
x1
,
x
)
return
x
class
Kie_backbone
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
**
kwargs
):
super
(
Kie_backbone
,
self
).
__init__
()
self
.
out_channels
=
16
self
.
img_feat
=
UNet
()
self
.
maxpool
=
nn
.
MaxPool2D
(
kernel_size
=
7
)
def
bbox2roi
(
self
,
bbox_list
):
rois_list
=
[]
rois_num
=
[]
for
img_id
,
bboxes
in
enumerate
(
bbox_list
):
rois_num
.
append
(
bboxes
.
shape
[
0
])
rois_list
.
append
(
bboxes
)
rois
=
paddle
.
concat
(
rois_list
,
0
)
rois_num
=
paddle
.
to_tensor
(
rois_num
,
dtype
=
'int32'
)
return
rois
,
rois_num
def
pre_process
(
self
,
relations
,
texts
,
gt_bboxes
,
tag
):
relations
,
texts
,
gt_bboxes
,
tag
=
relations
.
numpy
(),
texts
.
numpy
(
),
gt_bboxes
.
numpy
(),
tag
.
numpy
().
tolist
()
temp_relations
,
temp_texts
,
temp_gt_bboxes
=
[],
[],
[]
batch
=
len
(
tag
)
for
i
in
range
(
batch
):
num
,
recoder_len
=
tag
[
i
][
0
],
tag
[
i
][
1
]
temp_relations
.
append
(
paddle
.
to_tensor
(
relations
[
i
,
:
num
,
:
num
,
:],
dtype
=
'float32'
))
temp_texts
.
append
(
paddle
.
to_tensor
(
texts
[
i
,
:
num
,
:
recoder_len
],
dtype
=
'float32'
))
temp_gt_bboxes
.
append
(
paddle
.
to_tensor
(
gt_bboxes
[
i
,
:
num
,
...],
dtype
=
'float32'
))
return
temp_relations
,
temp_texts
,
temp_gt_bboxes
def
forward
(
self
,
inputs
):
img
,
relations
,
texts
,
gt_bboxes
,
tag
=
inputs
[
0
],
inputs
[
1
],
inputs
[
2
],
inputs
[
3
],
inputs
[
5
]
relations
,
texts
,
gt_bboxes
=
self
.
pre_process
(
relations
,
texts
,
gt_bboxes
,
tag
)
x
=
self
.
img_feat
(
img
)
boxes
,
rois_num
=
self
.
bbox2roi
(
gt_bboxes
)
feats
=
paddle
.
fluid
.
layers
.
roi_align
(
x
,
boxes
,
spatial_scale
=
1.0
,
pooled_height
=
7
,
pooled_width
=
7
,
rois_num
=
rois_num
)
feats
=
self
.
maxpool
(
feats
).
squeeze
(
-
1
).
squeeze
(
-
1
)
return
[
relations
,
texts
,
feats
]
ppocr/modeling/heads/__init__.py
View file @
de88d73d
...
@@ -29,10 +29,14 @@ def build_head(config):
...
@@ -29,10 +29,14 @@ def build_head(config):
# cls head
# cls head
from
.cls_head
import
ClsHead
from
.cls_head
import
ClsHead
#kie head
from
.kie_sdmgr_head
import
SDMGRHead
support_dict
=
[
support_dict
=
[
'DBHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
,
'AttentionHead'
,
'DBHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
,
'AttentionHead'
,
'SRNHead'
,
'PGHead'
]
'SRNHead'
,
'PGHead'
,
'SDMGRHead'
]
module_name
=
config
.
pop
(
'name'
)
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'head only support {}'
.
format
(
assert
module_name
in
support_dict
,
Exception
(
'head only support {}'
.
format
(
...
...
ppocr/modeling/heads/kie_sdmgr_head.py
0 → 100644
View file @
de88d73d
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
class
SDMGRHead
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
num_chars
=
92
,
visual_dim
=
16
,
fusion_dim
=
1024
,
node_input
=
32
,
node_embed
=
256
,
edge_input
=
5
,
edge_embed
=
256
,
num_gnn
=
2
,
num_classes
=
26
,
bidirectional
=
False
):
super
().
__init__
()
self
.
fusion
=
Block
([
visual_dim
,
node_embed
],
node_embed
,
fusion_dim
)
self
.
node_embed
=
nn
.
Embedding
(
num_chars
,
node_input
,
0
)
hidden
=
node_embed
//
2
if
bidirectional
else
node_embed
self
.
rnn
=
nn
.
LSTM
(
input_size
=
node_input
,
hidden_size
=
hidden
,
num_layers
=
1
)
self
.
edge_embed
=
nn
.
Linear
(
edge_input
,
edge_embed
)
self
.
gnn_layers
=
nn
.
LayerList
(
[
GNNLayer
(
node_embed
,
edge_embed
)
for
_
in
range
(
num_gnn
)])
self
.
node_cls
=
nn
.
Linear
(
node_embed
,
num_classes
)
self
.
edge_cls
=
nn
.
Linear
(
edge_embed
,
2
)
def
forward
(
self
,
input
):
relations
,
texts
,
x
=
input
node_nums
,
char_nums
=
[],
[]
for
text
in
texts
:
node_nums
.
append
(
text
.
shape
[
0
])
char_nums
.
append
(
paddle
.
sum
((
text
>
-
1
).
astype
(
int
),
axis
=-
1
))
max_num
=
max
([
char_num
.
max
()
for
char_num
in
char_nums
])
all_nodes
=
paddle
.
concat
([
paddle
.
concat
(
[
text
,
paddle
.
zeros
(
(
text
.
shape
[
0
],
max_num
-
text
.
shape
[
1
]))],
-
1
)
for
text
in
texts
])
temp
=
paddle
.
clip
(
all_nodes
,
min
=
0
).
astype
(
int
)
embed_nodes
=
self
.
node_embed
(
temp
)
rnn_nodes
,
_
=
self
.
rnn
(
embed_nodes
)
b
,
h
,
w
=
rnn_nodes
.
shape
nodes
=
paddle
.
zeros
([
b
,
w
])
all_nums
=
paddle
.
concat
(
char_nums
)
valid
=
paddle
.
nonzero
((
all_nums
>
0
).
astype
(
int
))
temp_all_nums
=
(
paddle
.
gather
(
all_nums
,
valid
)
-
1
).
unsqueeze
(
-
1
).
unsqueeze
(
-
1
)
temp_all_nums
=
paddle
.
expand
(
temp_all_nums
,
[
temp_all_nums
.
shape
[
0
],
temp_all_nums
.
shape
[
1
],
rnn_nodes
.
shape
[
-
1
]
])
temp_all_nodes
=
paddle
.
gather
(
rnn_nodes
,
valid
)
N
,
C
,
A
=
temp_all_nodes
.
shape
one_hot
=
F
.
one_hot
(
temp_all_nums
[:,
0
,
:],
num_classes
=
C
).
transpose
([
0
,
2
,
1
])
one_hot
=
paddle
.
multiply
(
temp_all_nodes
,
one_hot
.
astype
(
"float32"
)).
sum
(
axis
=
1
,
keepdim
=
True
)
t
=
one_hot
.
expand
([
N
,
1
,
A
]).
squeeze
(
1
)
nodes
=
paddle
.
scatter
(
nodes
,
valid
.
squeeze
(
1
),
t
)
if
x
is
not
None
:
nodes
=
self
.
fusion
([
x
,
nodes
])
all_edges
=
paddle
.
concat
(
[
rel
.
reshape
([
-
1
,
rel
.
shape
[
-
1
]])
for
rel
in
relations
])
embed_edges
=
self
.
edge_embed
(
all_edges
.
astype
(
'float32'
))
embed_edges
=
F
.
normalize
(
embed_edges
)
for
gnn_layer
in
self
.
gnn_layers
:
nodes
,
cat_nodes
=
gnn_layer
(
nodes
,
embed_edges
,
node_nums
)
node_cls
,
edge_cls
=
self
.
node_cls
(
nodes
),
self
.
edge_cls
(
cat_nodes
)
return
node_cls
,
edge_cls
class
GNNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
node_dim
=
256
,
edge_dim
=
256
):
super
().
__init__
()
self
.
in_fc
=
nn
.
Linear
(
node_dim
*
2
+
edge_dim
,
node_dim
)
self
.
coef_fc
=
nn
.
Linear
(
node_dim
,
1
)
self
.
out_fc
=
nn
.
Linear
(
node_dim
,
node_dim
)
self
.
relu
=
nn
.
ReLU
()
def
forward
(
self
,
nodes
,
edges
,
nums
):
start
,
cat_nodes
=
0
,
[]
for
num
in
nums
:
sample_nodes
=
nodes
[
start
:
start
+
num
]
cat_nodes
.
append
(
paddle
.
concat
([
paddle
.
expand
(
sample_nodes
.
unsqueeze
(
1
),
[
-
1
,
num
,
-
1
]),
paddle
.
expand
(
sample_nodes
.
unsqueeze
(
0
),
[
num
,
-
1
,
-
1
])
],
-
1
).
reshape
([
num
**
2
,
-
1
]))
start
+=
num
cat_nodes
=
paddle
.
concat
([
paddle
.
concat
(
cat_nodes
),
edges
],
-
1
)
cat_nodes
=
self
.
relu
(
self
.
in_fc
(
cat_nodes
))
coefs
=
self
.
coef_fc
(
cat_nodes
)
start
,
residuals
=
0
,
[]
for
num
in
nums
:
residual
=
F
.
softmax
(
-
paddle
.
eye
(
num
).
unsqueeze
(
-
1
)
*
1e9
+
coefs
[
start
:
start
+
num
**
2
].
reshape
([
num
,
num
,
-
1
]),
1
)
residuals
.
append
((
residual
*
cat_nodes
[
start
:
start
+
num
**
2
]
.
reshape
([
num
,
num
,
-
1
])).
sum
(
1
))
start
+=
num
**
2
nodes
+=
self
.
relu
(
self
.
out_fc
(
paddle
.
concat
(
residuals
)))
return
[
nodes
,
cat_nodes
]
class
Block
(
nn
.
Layer
):
def
__init__
(
self
,
input_dims
,
output_dim
,
mm_dim
=
1600
,
chunks
=
20
,
rank
=
15
,
shared
=
False
,
dropout_input
=
0.
,
dropout_pre_lin
=
0.
,
dropout_output
=
0.
,
pos_norm
=
'before_cat'
):
super
().
__init__
()
self
.
rank
=
rank
self
.
dropout_input
=
dropout_input
self
.
dropout_pre_lin
=
dropout_pre_lin
self
.
dropout_output
=
dropout_output
assert
(
pos_norm
in
[
'before_cat'
,
'after_cat'
])
self
.
pos_norm
=
pos_norm
# Modules
self
.
linear0
=
nn
.
Linear
(
input_dims
[
0
],
mm_dim
)
self
.
linear1
=
(
self
.
linear0
if
shared
else
nn
.
Linear
(
input_dims
[
1
],
mm_dim
))
self
.
merge_linears0
=
nn
.
LayerList
()
self
.
merge_linears1
=
nn
.
LayerList
()
self
.
chunks
=
self
.
chunk_sizes
(
mm_dim
,
chunks
)
for
size
in
self
.
chunks
:
ml0
=
nn
.
Linear
(
size
,
size
*
rank
)
self
.
merge_linears0
.
append
(
ml0
)
ml1
=
ml0
if
shared
else
nn
.
Linear
(
size
,
size
*
rank
)
self
.
merge_linears1
.
append
(
ml1
)
self
.
linear_out
=
nn
.
Linear
(
mm_dim
,
output_dim
)
def
forward
(
self
,
x
):
x0
=
self
.
linear0
(
x
[
0
])
x1
=
self
.
linear1
(
x
[
1
])
bs
=
x1
.
shape
[
0
]
if
self
.
dropout_input
>
0
:
x0
=
F
.
dropout
(
x0
,
p
=
self
.
dropout_input
,
training
=
self
.
training
)
x1
=
F
.
dropout
(
x1
,
p
=
self
.
dropout_input
,
training
=
self
.
training
)
x0_chunks
=
paddle
.
split
(
x0
,
self
.
chunks
,
-
1
)
x1_chunks
=
paddle
.
split
(
x1
,
self
.
chunks
,
-
1
)
zs
=
[]
for
x0_c
,
x1_c
,
m0
,
m1
in
zip
(
x0_chunks
,
x1_chunks
,
self
.
merge_linears0
,
self
.
merge_linears1
):
m
=
m0
(
x0_c
)
*
m1
(
x1_c
)
# bs x split_size*rank
m
=
m
.
reshape
([
bs
,
self
.
rank
,
-
1
])
z
=
paddle
.
sum
(
m
,
1
)
if
self
.
pos_norm
==
'before_cat'
:
z
=
paddle
.
sqrt
(
F
.
relu
(
z
))
-
paddle
.
sqrt
(
F
.
relu
(
-
z
))
z
=
F
.
normalize
(
z
)
zs
.
append
(
z
)
z
=
paddle
.
concat
(
zs
,
1
)
if
self
.
pos_norm
==
'after_cat'
:
z
=
paddle
.
sqrt
(
F
.
relu
(
z
))
-
paddle
.
sqrt
(
F
.
relu
(
-
z
))
z
=
F
.
normalize
(
z
)
if
self
.
dropout_pre_lin
>
0
:
z
=
F
.
dropout
(
z
,
p
=
self
.
dropout_pre_lin
,
training
=
self
.
training
)
z
=
self
.
linear_out
(
z
)
if
self
.
dropout_output
>
0
:
z
=
F
.
dropout
(
z
,
p
=
self
.
dropout_output
,
training
=
self
.
training
)
return
z
def
chunk_sizes
(
self
,
dim
,
chunks
):
split_size
=
(
dim
+
chunks
-
1
)
//
chunks
sizes_list
=
[
split_size
]
*
chunks
sizes_list
[
-
1
]
=
sizes_list
[
-
1
]
-
(
sum
(
sizes_list
)
-
dim
)
return
sizes_list
ppocr/postprocess/__init__.py
View file @
de88d73d
...
@@ -37,6 +37,8 @@ def build_post_process(config, global_config=None):
...
@@ -37,6 +37,8 @@ def build_post_process(config, global_config=None):
config
=
copy
.
deepcopy
(
config
)
config
=
copy
.
deepcopy
(
config
)
module_name
=
config
.
pop
(
'name'
)
module_name
=
config
.
pop
(
'name'
)
if
module_name
==
"None"
:
return
if
global_config
is
not
None
:
if
global_config
is
not
None
:
config
.
update
(
global_config
)
config
.
update
(
global_config
)
assert
module_name
in
support_dict
,
Exception
(
assert
module_name
in
support_dict
,
Exception
(
...
...
pretrain_model/iter_epoch_60.pdopt
0 → 100644
View file @
de88d73d
File added
pretrain_model/iter_epoch_60.pdparams
0 → 100644
View file @
de88d73d
File added
pretrain_model/iter_epoch_60.states
0 → 100644
View file @
de88d73d
File added
tools/infer_kie.py
0 → 100755
View file @
de88d73d
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
paddle.nn.functional
as
F
import
os
import
sys
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
__dir__
,
'..'
)))
os
.
environ
[
"FLAGS_allocator_strategy"
]
=
'auto_growth'
import
cv2
import
paddle
from
ppocr.data
import
create_operators
,
transform
from
ppocr.modeling.architectures
import
build_model
from
ppocr.utils.save_load
import
init_model
import
tools.program
as
program
def
read_class_list
(
filepath
):
dict
=
{}
with
open
(
filepath
,
"r"
)
as
f
:
lines
=
f
.
readlines
()
for
line
in
lines
:
key
,
value
=
line
.
split
(
" "
)
dict
[
key
]
=
value
.
rstrip
()
return
dict
def
draw_kie_result
(
batch
,
node
,
idx_to_cls
):
img
=
batch
[
-
2
]
boxes
=
batch
[
-
1
]
h
,
w
=
img
.
shape
[:
2
]
pred_img
=
np
.
ones
((
h
,
w
*
2
,
3
),
dtype
=
np
.
uint8
)
*
255
max_value
,
max_idx
=
paddle
.
max
(
node
,
-
1
),
paddle
.
argmax
(
node
,
-
1
)
node_pred_label
=
max_idx
.
numpy
().
tolist
()
node_pred_score
=
max_value
.
numpy
().
tolist
()
for
i
,
box
in
enumerate
(
boxes
):
if
i
>=
len
(
node_pred_label
):
break
new_box
=
[[
box
[
0
],
box
[
1
]],
[
box
[
2
],
box
[
1
]],
[
box
[
2
],
box
[
3
]],
[
box
[
0
],
box
[
3
]]]
Pts
=
np
.
array
([
new_box
],
np
.
int32
)
cv2
.
polylines
(
img
,
[
Pts
.
reshape
((
-
1
,
1
,
2
))],
True
,
color
=
(
255
,
255
,
0
),
thickness
=
1
)
x_min
=
int
(
min
([
point
[
0
]
for
point
in
new_box
]))
y_min
=
int
(
min
([
point
[
1
]
for
point
in
new_box
]))
pred_label
=
str
(
node_pred_label
[
i
])
if
pred_label
in
idx_to_cls
:
pred_label
=
idx_to_cls
[
pred_label
]
pred_score
=
'{:.2f}'
.
format
(
node_pred_score
[
i
])
text
=
pred_label
+
'('
+
pred_score
+
')'
cv2
.
putText
(
pred_img
,
text
,
(
x_min
*
2
,
y_min
),
cv2
.
FONT_HERSHEY_SIMPLEX
,
0.5
,
(
255
,
0
,
0
),
1
)
vis_img
=
np
.
ones
((
h
,
w
*
3
,
3
),
dtype
=
np
.
uint8
)
*
255
vis_img
[:,
:
w
]
=
img
vis_img
[:,
w
:]
=
pred_img
return
vis_img
def
main
():
global_config
=
config
[
'Global'
]
# build model
model
=
build_model
(
config
[
'Architecture'
])
init_model
(
config
,
model
,
logger
)
# create data ops
transforms
=
[]
for
op
in
config
[
'Eval'
][
'dataset'
][
'transforms'
]:
transforms
.
append
(
op
)
ops
=
create_operators
(
transforms
,
global_config
)
save_res_path
=
config
[
'Global'
][
'save_res_path'
]
class_path
=
config
[
'Global'
][
'class_path'
]
idx_to_cls
=
read_class_list
(
class_path
)
if
not
os
.
path
.
exists
(
os
.
path
.
dirname
(
save_res_path
)):
os
.
makedirs
(
os
.
path
.
dirname
(
save_res_path
))
model
.
eval
()
with
open
(
save_res_path
,
"wb"
)
as
fout
:
with
open
(
config
[
'Global'
][
'infer_img'
],
"rb"
)
as
f
:
lines
=
f
.
readlines
()
for
data_line
in
lines
:
data_line
=
data_line
.
decode
(
'utf-8'
)
substr
=
data_line
.
strip
(
"
\n
"
).
split
(
"
\t
"
)
img_path
,
label
=
"/Users/hongyongjie/project/PaddleOCR/train_data/wildreceipt/"
+
substr
[
0
],
substr
[
1
]
data
=
{
'img_path'
:
img_path
,
'label'
:
label
}
with
open
(
data
[
'img_path'
],
'rb'
)
as
f
:
img
=
f
.
read
()
data
[
'image'
]
=
img
batch
=
transform
(
data
,
ops
)
batch_pred
=
[
0
]
*
len
(
batch
)
for
i
in
range
(
len
(
batch
)):
batch_pred
[
i
]
=
paddle
.
to_tensor
(
np
.
expand_dims
(
batch
[
i
],
axis
=
0
))
node
,
edge
=
model
(
batch_pred
)
node
=
F
.
softmax
(
node
,
-
1
)
img
=
draw_kie_result
(
batch
,
node
,
idx_to_cls
)
cv2
.
imwrite
(
'1.png'
,
img
)
exit
()
logger
.
info
(
"success!"
)
if
__name__
==
'__main__'
:
config
,
device
,
logger
,
vdl_writer
=
program
.
preprocess
()
main
()
tools/program.py
View file @
de88d73d
...
@@ -44,6 +44,7 @@ class ArgsParser(ArgumentParser):
...
@@ -44,6 +44,7 @@ class ArgsParser(ArgumentParser):
def
parse_args
(
self
,
argv
=
None
):
def
parse_args
(
self
,
argv
=
None
):
args
=
super
(
ArgsParser
,
self
).
parse_args
(
argv
)
args
=
super
(
ArgsParser
,
self
).
parse_args
(
argv
)
# args.config = "/Users/hongyongjie/project/PaddleOCR/configs/kie/kie_unet_sdmgr.yml"
assert
args
.
config
is
not
None
,
\
assert
args
.
config
is
not
None
,
\
"Please specify --config=configure_file_path."
"Please specify --config=configure_file_path."
args
.
opt
=
self
.
_parse_opt
(
args
.
opt
)
args
.
opt
=
self
.
_parse_opt
(
args
.
opt
)
...
@@ -207,7 +208,7 @@ def train(config,
...
@@ -207,7 +208,7 @@ def train(config,
preds
=
model
(
images
,
others
)
preds
=
model
(
images
,
others
)
model_average
=
True
model_average
=
True
else
:
else
:
preds
=
model
(
images
)
preds
=
model
(
batch
)
loss
=
loss_class
(
preds
,
batch
)
loss
=
loss_class
(
preds
,
batch
)
avg_loss
=
loss
[
'loss'
]
avg_loss
=
loss
[
'loss'
]
avg_loss
.
backward
()
avg_loss
.
backward
()
...
@@ -345,14 +346,14 @@ def eval(model, valid_dataloader, post_process_class, eval_class,
...
@@ -345,14 +346,14 @@ def eval(model, valid_dataloader, post_process_class, eval_class,
others
=
batch
[
-
4
:]
others
=
batch
[
-
4
:]
preds
=
model
(
images
,
others
)
preds
=
model
(
images
,
others
)
else
:
else
:
preds
=
model
(
images
)
preds
=
model
(
batch
)
batch
=
[
item
.
numpy
()
for
item
in
batch
]
batch
=
[
item
.
numpy
()
for
item
in
batch
]
# Obtain usable results from post-processing methods
# Obtain usable results from post-processing methods
post_result
=
post_process_class
(
preds
,
batch
[
1
])
#
post_result = post_process_class(preds, batch[1])
total_time
+=
time
.
time
()
-
start
total_time
+=
time
.
time
()
-
start
# Evaluate the results of the current batch
# Evaluate the results of the current batch
eval_class
(
p
ost_result
,
batch
)
eval_class
(
p
reds
,
batch
)
pbar
.
update
(
1
)
pbar
.
update
(
1
)
total_frame
+=
len
(
images
)
total_frame
+=
len
(
images
)
# Get final metric,eg. acc or hmean
# Get final metric,eg. acc or hmean
...
@@ -376,7 +377,7 @@ def preprocess(is_train=False):
...
@@ -376,7 +377,7 @@ def preprocess(is_train=False):
alg
=
config
[
'Architecture'
][
'algorithm'
]
alg
=
config
[
'Architecture'
][
'algorithm'
]
assert
alg
in
[
assert
alg
in
[
'EAST'
,
'DB'
,
'SAST'
,
'Rosetta'
,
'CRNN'
,
'STARNet'
,
'RARE'
,
'SRN'
,
'EAST'
,
'DB'
,
'SAST'
,
'Rosetta'
,
'CRNN'
,
'STARNet'
,
'RARE'
,
'SRN'
,
'CLS'
,
'PGNet'
'CLS'
,
'PGNet'
,
'SDMGR'
]
]
device
=
'gpu:{}'
.
format
(
dist
.
ParallelEnv
().
dev_id
)
if
use_gpu
else
'cpu'
device
=
'gpu:{}'
.
format
(
dist
.
ParallelEnv
().
dev_id
)
if
use_gpu
else
'cpu'
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment