"sgl-router/tests/vscode:/vscode.git/clone" did not exist on "72392f290812e207a91025b669c4dbb40aa05e49"
Unverified Commit adc62fcd authored by topduke's avatar topduke Committed by GitHub
Browse files

Merge branch 'dygraph' into dygraph

parents 8227ad1b a81b88a0
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
class TableFPN(nn.Layer):
def __init__(self, in_channels, out_channels, **kwargs):
super(TableFPN, self).__init__()
self.out_channels = 512
weight_attr = paddle.nn.initializer.KaimingUniform()
self.in2_conv = nn.Conv2D(
in_channels=in_channels[0],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.in3_conv = nn.Conv2D(
in_channels=in_channels[1],
out_channels=self.out_channels,
kernel_size=1,
stride = 1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.in4_conv = nn.Conv2D(
in_channels=in_channels[2],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.in5_conv = nn.Conv2D(
in_channels=in_channels[3],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.p5_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.p4_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.p3_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.p2_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.fuse_conv = nn.Conv2D(
in_channels=self.out_channels * 4,
out_channels=512,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr), bias_attr=False)
def forward(self, x):
c2, c3, c4, c5 = x
in5 = self.in5_conv(c5)
in4 = self.in4_conv(c4)
in3 = self.in3_conv(c3)
in2 = self.in2_conv(c2)
out4 = in4 + F.upsample(
in5, size=in4.shape[2:4], mode="nearest", align_mode=1) # 1/16
out3 = in3 + F.upsample(
out4, size=in3.shape[2:4], mode="nearest", align_mode=1) # 1/8
out2 = in2 + F.upsample(
out3, size=in2.shape[2:4], mode="nearest", align_mode=1) # 1/4
p4 = F.upsample(out4, size=in5.shape[2:4], mode="nearest", align_mode=1)
p3 = F.upsample(out3, size=in5.shape[2:4], mode="nearest", align_mode=1)
p2 = F.upsample(out2, size=in5.shape[2:4], mode="nearest", align_mode=1)
fuse = paddle.concat([in5, p4, p3, p2], axis=1)
fuse_conv = self.fuse_conv(fuse) * 0.005
return [c5 + fuse_conv]
...@@ -230,15 +230,8 @@ class GridGenerator(nn.Layer): ...@@ -230,15 +230,8 @@ class GridGenerator(nn.Layer):
def build_inv_delta_C_paddle(self, C): def build_inv_delta_C_paddle(self, C):
""" Return inv_delta_C which is needed to calculate T """ """ Return inv_delta_C which is needed to calculate T """
F = self.F F = self.F
hat_C = paddle.zeros((F, F), dtype='float64') # F x F hat_eye = paddle.eye(F, dtype='float64') # F x F
for i in range(0, F): hat_C = paddle.norm(C.reshape([1, F, 2]) - C.reshape([F, 1, 2]), axis=2) + hat_eye
for j in range(i, F):
if i == j:
hat_C[i, j] = 1
else:
r = paddle.norm(C[i] - C[j])
hat_C[i, j] = r
hat_C[j, i] = r
hat_C = (hat_C**2) * paddle.log(hat_C) hat_C = (hat_C**2) * paddle.log(hat_C)
delta_C = paddle.concat( # F+3 x F+3 delta_C = paddle.concat( # F+3 x F+3
[ [
......
...@@ -21,10 +21,13 @@ import copy ...@@ -21,10 +21,13 @@ import copy
__all__ = ['build_post_process'] __all__ = ['build_post_process']
from .db_postprocess import DBPostProcess from .db_postprocess import DBPostProcess, DistillationDBPostProcess
from .east_postprocess import EASTPostProcess from .east_postprocess import EASTPostProcess
from .sast_postprocess import SASTPostProcess from .sast_postprocess import SASTPostProcess
from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, NRTRLabelDecode
from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, NRTRLabelDecode, \
TableLabelDecode
from .cls_postprocess import ClsPostProcess from .cls_postprocess import ClsPostProcess
from .pg_postprocess import PGPostProcess from .pg_postprocess import PGPostProcess
...@@ -32,7 +35,7 @@ def build_post_process(config, global_config=None): ...@@ -32,7 +35,7 @@ def build_post_process(config, global_config=None):
support_dict = [ support_dict = [
'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode', 'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode',
'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess', 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess',
'DistillationCTCLabelDecode', 'NRTRLabelDecode' 'DistillationCTCLabelDecode', 'NRTRLabelDecode', 'TableLabelDecode', 'DistillationDBPostProcess'
] ]
config = copy.deepcopy(config) config = copy.deepcopy(config)
......
...@@ -187,3 +187,29 @@ class DBPostProcess(object): ...@@ -187,3 +187,29 @@ class DBPostProcess(object):
boxes_batch.append({'points': boxes}) boxes_batch.append({'points': boxes})
return boxes_batch return boxes_batch
class DistillationDBPostProcess(object):
def __init__(self, model_name=["student"],
key=None,
thresh=0.3,
box_thresh=0.6,
max_candidates=1000,
unclip_ratio=1.5,
use_dilation=False,
score_mode="fast",
**kwargs):
self.model_name = model_name
self.key = key
self.post_process = DBPostProcess(thresh=thresh,
box_thresh=box_thresh,
max_candidates=max_candidates,
unclip_ratio=unclip_ratio,
use_dilation=use_dilation,
score_mode=score_mode)
def __call__(self, predicts, shape_list):
results = {}
for k in self.model_name:
results[k] = self.post_process(predicts[k], shape_list=shape_list)
return results
...@@ -44,16 +44,16 @@ class BaseRecLabelDecode(object): ...@@ -44,16 +44,16 @@ class BaseRecLabelDecode(object):
self.character_str = string.printable[:-6] self.character_str = string.printable[:-6]
dict_character = list(self.character_str) dict_character = list(self.character_str)
elif character_type in support_character_type: elif character_type in support_character_type:
self.character_str = "" self.character_str = []
assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format( assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
character_type) character_type)
with open(character_dict_path, "rb") as fin: with open(character_dict_path, "rb") as fin:
lines = fin.readlines() lines = fin.readlines()
for line in lines: for line in lines:
line = line.decode('utf-8').strip("\n").strip("\r\n") line = line.decode('utf-8').strip("\n").strip("\r\n")
self.character_str += line self.character_str.append(line)
if use_space_char: if use_space_char:
self.character_str += " " self.character_str.append(" ")
dict_character = list(self.character_str) dict_character = list(self.character_str)
else: else:
...@@ -381,3 +381,138 @@ class SRNLabelDecode(BaseRecLabelDecode): ...@@ -381,3 +381,138 @@ class SRNLabelDecode(BaseRecLabelDecode):
assert False, "unsupport type %s in get_beg_end_flag_idx" \ assert False, "unsupport type %s in get_beg_end_flag_idx" \
% beg_or_end % beg_or_end
return idx return idx
class TableLabelDecode(object):
""" """
def __init__(self,
character_dict_path,
**kwargs):
list_character, list_elem = self.load_char_elem_dict(character_dict_path)
list_character = self.add_special_char(list_character)
list_elem = self.add_special_char(list_elem)
self.dict_character = {}
self.dict_idx_character = {}
for i, char in enumerate(list_character):
self.dict_idx_character[i] = char
self.dict_character[char] = i
self.dict_elem = {}
self.dict_idx_elem = {}
for i, elem in enumerate(list_elem):
self.dict_idx_elem[i] = elem
self.dict_elem[elem] = i
def load_char_elem_dict(self, character_dict_path):
list_character = []
list_elem = []
with open(character_dict_path, "rb") as fin:
lines = fin.readlines()
substr = lines[0].decode('utf-8').strip("\n").strip("\r\n").split("\t")
character_num = int(substr[0])
elem_num = int(substr[1])
for cno in range(1, 1 + character_num):
character = lines[cno].decode('utf-8').strip("\n").strip("\r\n")
list_character.append(character)
for eno in range(1 + character_num, 1 + character_num + elem_num):
elem = lines[eno].decode('utf-8').strip("\n").strip("\r\n")
list_elem.append(elem)
return list_character, list_elem
def add_special_char(self, list_character):
self.beg_str = "sos"
self.end_str = "eos"
list_character = [self.beg_str] + list_character + [self.end_str]
return list_character
def __call__(self, preds):
structure_probs = preds['structure_probs']
loc_preds = preds['loc_preds']
if isinstance(structure_probs,paddle.Tensor):
structure_probs = structure_probs.numpy()
if isinstance(loc_preds,paddle.Tensor):
loc_preds = loc_preds.numpy()
structure_idx = structure_probs.argmax(axis=2)
structure_probs = structure_probs.max(axis=2)
structure_str, structure_pos, result_score_list, result_elem_idx_list = self.decode(structure_idx,
structure_probs, 'elem')
res_html_code_list = []
res_loc_list = []
batch_num = len(structure_str)
for bno in range(batch_num):
res_loc = []
for sno in range(len(structure_str[bno])):
text = structure_str[bno][sno]
if text in ['<td>', '<td']:
pos = structure_pos[bno][sno]
res_loc.append(loc_preds[bno, pos])
res_html_code = ''.join(structure_str[bno])
res_loc = np.array(res_loc)
res_html_code_list.append(res_html_code)
res_loc_list.append(res_loc)
return {'res_html_code': res_html_code_list, 'res_loc': res_loc_list, 'res_score_list': result_score_list,
'res_elem_idx_list': result_elem_idx_list,'structure_str_list':structure_str}
def decode(self, text_index, structure_probs, char_or_elem):
"""convert text-label into text-index.
"""
if char_or_elem == "char":
current_dict = self.dict_idx_character
else:
current_dict = self.dict_idx_elem
ignored_tokens = self.get_ignored_tokens('elem')
beg_idx, end_idx = ignored_tokens
result_list = []
result_pos_list = []
result_score_list = []
result_elem_idx_list = []
batch_size = len(text_index)
for batch_idx in range(batch_size):
char_list = []
elem_pos_list = []
elem_idx_list = []
score_list = []
for idx in range(len(text_index[batch_idx])):
tmp_elem_idx = int(text_index[batch_idx][idx])
if idx > 0 and tmp_elem_idx == end_idx:
break
if tmp_elem_idx in ignored_tokens:
continue
char_list.append(current_dict[tmp_elem_idx])
elem_pos_list.append(idx)
score_list.append(structure_probs[batch_idx, idx])
elem_idx_list.append(tmp_elem_idx)
result_list.append(char_list)
result_pos_list.append(elem_pos_list)
result_score_list.append(score_list)
result_elem_idx_list.append(elem_idx_list)
return result_list, result_pos_list, result_score_list, result_elem_idx_list
def get_ignored_tokens(self, char_or_elem):
beg_idx = self.get_beg_end_flag_idx("beg", char_or_elem)
end_idx = self.get_beg_end_flag_idx("end", char_or_elem)
return [beg_idx, end_idx]
def get_beg_end_flag_idx(self, beg_or_end, char_or_elem):
if char_or_elem == "char":
if beg_or_end == "beg":
idx = self.dict_character[self.beg_str]
elif beg_or_end == "end":
idx = self.dict_character[self.end_str]
else:
assert False, "Unsupport type %s in get_beg_end_flag_idx of char" \
% beg_or_end
elif char_or_elem == "elem":
if beg_or_end == "beg":
idx = self.dict_elem[self.beg_str]
elif beg_or_end == "end":
idx = self.dict_elem[self.end_str]
else:
assert False, "Unsupport type %s in get_beg_end_flag_idx of elem" \
% beg_or_end
else:
assert False, "Unsupport type %s in char_or_elem" \
% char_or_elem
return idx
</overline>
α

$
ω
ψ
χ
(
υ
σ
,
ρ
ε
0
4
8
b
<
Ψ
Ω
D
3
Π
H
</strike>
L
Φ
Χ
θ
P
κ
λ
μ
T
ξ
X
β
γ
δ
\
ζ
η
`
d
<strike>
h
f
l
Θ
p
t
</sub>
x
Β
Γ
Δ
|
ǂ
ɛ
j
̧
̌
«
#
</b>
'
Ι
+
/
·
7
;
?
C
÷
G
K
<sup>
O
S
С
W
Α
[
_
c
z
g
<i>
o
<sub>
s
w
φ
ʹ
{
»
̆
e
ˆ
τ
ι
Ø
ß
×
˃
˂
"
i
&
π
*
æ
.
ø
Q
6
:
>
a
B
F
J
̄
N
R
V
<overline>
Z
^
¤
¥
§
<underline>
¢
£
­
Λ
©
n
r
°
±
v
<b>
k
~
̇
@
ł
®
!
</sup>
%
)
-
1
5
9
=
А
A
Σ
E
I
M
m
̨
</i>
U
Y
]
̸
2
̂
̀
́
̊
̈
q
u
ı
y
</underline>
̃
}
ν
277 28 1267 1186
<b>
V
a
r
i
b
l
e
</b>
H
z
d
t
o
9
5
%
C
I
<i>
p
</i>
v
u
*
A
g
(
m
n
)
0
.
7
1
6
>
8
3
2
G
4
M
F
T
y
f
s
L
w
c
U
h
D
S
Q
R
x
P
-
E
O
/
k
,
+
N
K
q
[
]
<
<sup>
</sup>
μ
±
J
j
W
_
Δ
B
:
Y
α
λ
;
<sub>
</sub>
?
=
°
#
̊
̈
̂
Z
X
β
'
~
@
"
γ
&
χ
σ
§
|
×
$
\
π
®
^
<underline>
</underline>
́
·
£
φ
Ψ
ß
η
̃
Φ
ρ
̄
δ
̧
Ω
{
}
̀
ø
κ
ε
¥
`
ω
Σ
Β
̸
Χ
Α
ψ
ǂ
ζ
!
Γ
θ
υ
τ
Ø
©
С
˂
ɛ
¢
˃
­
Π
̌
<overline>
</overline>
¤
̆
ξ
÷

ι
ν
<strike>
</strike>
«
»
ł
ı
Θ
̇
æ
ʹ
ˆ
̨
Ι
Λ
А
<thead>
<tr>
<td>
</td>
</tr>
</thead>
<tbody>
</tbody>
<td
colspan="5"
>
colspan="2"
colspan="3"
rowspan="2"
colspan="4"
colspan="6"
rowspan="3"
colspan="9"
colspan="10"
colspan="7"
rowspan="4"
rowspan="5"
rowspan="9"
colspan="8"
rowspan="8"
rowspan="6"
rowspan="7"
rowspan="10"
0 2924682
1 3405345
2 2363468
3 2709165
4 4078680
5 3250792
6 1923159
7 1617890
8 1450532
9 1717624
10 1477550
11 1489223
12 915528
13 819193
14 593660
15 518924
16 682065
17 494584
18 400591
19 396421
20 340994
21 280688
22 250328
23 226786
24 199927
25 182707
26 164629
27 141613
28 127554
29 116286
30 107682
31 96367
32 88002
33 79234
34 72186
35 65921
36 60374
37 55976
38 52166
39 47414
40 44932
41 41279
42 38232
43 35463
44 33703
45 30557
46 29639
47 27000
48 25447
49 23186
50 22093
51 20412
52 19844
53 18261
54 17561
55 16499
56 15597
57 14558
58 14372
59 13445
60 13514
61 12058
62 11145
63 10767
64 10370
65 9630
66 9337
67 8881
68 8727
69 8060
70 7994
71 7740
72 7189
73 6729
74 6749
75 6548
76 6321
77 5957
78 5740
79 5407
80 5370
81 5035
82 4921
83 4656
84 4600
85 4519
86 4277
87 4023
88 3939
89 3910
90 3861
91 3560
92 3483
93 3406
94 3346
95 3229
96 3122
97 3086
98 3001
99 2884
100 2822
101 2677
102 2670
103 2610
104 2452
105 2446
106 2400
107 2300
108 2316
109 2196
110 2089
111 2083
112 2041
113 1881
114 1838
115 1896
116 1795
117 1786
118 1743
119 1765
120 1750
121 1683
122 1563
123 1499
124 1513
125 1462
126 1388
127 1441
128 1417
129 1392
130 1306
131 1321
132 1274
133 1294
134 1240
135 1126
136 1157
137 1130
138 1084
139 1130
140 1083
141 1040
142 980
143 1031
144 974
145 980
146 932
147 898
148 960
149 907
150 852
151 912
152 859
153 847
154 876
155 792
156 791
157 765
158 788
159 787
160 744
161 673
162 683
163 697
164 666
165 680
166 632
167 677
168 657
169 618
170 587
171 585
172 567
173 549
174 562
175 548
176 542
177 539
178 542
179 549
180 547
181 526
182 525
183 514
184 512
185 505
186 515
187 467
188 475
189 458
190 435
191 443
192 427
193 424
194 404
195 389
196 429
197 404
198 386
199 351
200 388
201 408
202 361
203 346
204 324
205 361
206 363
207 364
208 323
209 336
210 342
211 315
212 325
213 328
214 314
215 327
216 320
217 300
218 295
219 315
220 310
221 295
222 275
223 248
224 274
225 232
226 293
227 259
228 286
229 263
230 242
231 214
232 261
233 231
234 211
235 250
236 233
237 206
238 224
239 210
240 233
241 223
242 216
243 222
244 207
245 212
246 196
247 205
248 201
249 202
250 211
251 201
252 215
253 179
254 163
255 179
256 191
257 188
258 196
259 150
260 154
261 176
262 211
263 166
264 171
265 165
266 149
267 182
268 159
269 161
270 164
271 161
272 141
273 151
274 127
275 129
276 142
277 158
278 148
279 135
280 127
281 134
282 138
283 131
284 126
285 125
286 130
287 126
288 135
289 125
290 135
291 131
292 95
293 135
294 106
295 117
296 136
297 128
298 128
299 118
300 109
301 112
302 117
303 108
304 120
305 100
306 95
307 108
308 112
309 77
310 120
311 104
312 109
313 89
314 98
315 82
316 98
317 93
318 77
319 93
320 77
321 98
322 93
323 86
324 89
325 73
326 70
327 71
328 77
329 87
330 77
331 93
332 100
333 83
334 72
335 74
336 69
337 77
338 68
339 78
340 90
341 98
342 75
343 80
344 63
345 71
346 83
347 66
348 71
349 70
350 62
351 62
352 59
353 63
354 62
355 52
356 64
357 64
358 56
359 49
360 57
361 63
362 60
363 68
364 62
365 55
366 54
367 40
368 75
369 70
370 53
371 58
372 57
373 55
374 69
375 57
376 53
377 43
378 45
379 47
380 56
381 51
382 59
383 51
384 43
385 34
386 57
387 49
388 39
389 46
390 48
391 43
392 40
393 54
394 50
395 41
396 43
397 33
398 27
399 49
400 44
401 44
402 38
403 30
404 32
405 37
406 39
407 42
408 53
409 39
410 34
411 31
412 32
413 52
414 27
415 41
416 34
417 36
418 50
419 35
420 32
421 33
422 45
423 35
424 40
425 29
426 41
427 40
428 39
429 32
430 31
431 34
432 29
433 27
434 26
435 22
436 34
437 28
438 30
439 38
440 35
441 36
442 36
443 27
444 24
445 33
446 31
447 25
448 33
449 27
450 32
451 46
452 31
453 35
454 35
455 34
456 26
457 21
458 25
459 26
460 24
461 27
462 33
463 30
464 35
465 21
466 32
467 19
468 27
469 16
470 28
471 26
472 27
473 26
474 25
475 25
476 27
477 20
478 28
479 22
480 23
481 16
482 25
483 27
484 19
485 23
486 19
487 15
488 15
489 23
490 24
491 19
492 20
493 18
494 17
495 30
496 28
497 20
498 29
499 17
500 19
501 21
502 15
503 24
504 15
505 19
506 25
507 16
508 23
509 26
510 21
511 15
512 12
513 16
514 18
515 24
516 26
517 18
518 8
519 25
520 14
521 8
522 24
523 20
524 18
525 15
526 13
527 17
528 18
529 22
530 21
531 9
532 16
533 17
534 13
535 17
536 15
537 13
538 20
539 13
540 19
541 29
542 10
543 8
544 18
545 13
546 9
547 18
548 10
549 18
550 18
551 9
552 9
553 15
554 13
555 15
556 14
557 14
558 18
559 8
560 13
561 9
562 7
563 12
564 6
565 9
566 9
567 18
568 9
569 10
570 13
571 14
572 13
573 21
574 8
575 16
576 12
577 9
578 16
579 17
580 22
581 6
582 14
583 13
584 15
585 11
586 13
587 5
588 12
589 13
590 15
591 13
592 15
593 12
594 7
595 18
596 12
597 13
598 13
599 13
600 12
601 12
602 10
603 11
604 6
605 6
606 2
607 9
608 8
609 12
610 9
611 12
612 13
613 12
614 14
615 9
616 8
617 9
618 14
619 13
620 12
621 6
622 8
623 8
624 8
625 12
626 8
627 7
628 5
629 8
630 12
631 6
632 10
633 10
634 7
635 8
636 9
637 6
638 9
639 4
640 12
641 4
642 3
643 11
644 10
645 6
646 12
647 12
648 4
649 4
650 9
651 8
652 6
653 5
654 14
655 10
656 11
657 8
658 5
659 5
660 9
661 13
662 4
663 5
664 9
665 11
666 12
667 7
668 13
669 2
670 1
671 7
672 7
673 7
674 10
675 9
676 6
677 5
678 7
679 6
680 3
681 3
682 4
683 9
684 8
685 5
686 3
687 11
688 9
689 2
690 6
691 5
692 9
693 5
694 6
695 5
696 9
697 8
698 3
699 7
700 5
701 9
702 8
703 7
704 2
705 3
706 7
707 6
708 6
709 10
710 2
711 10
712 6
713 7
714 5
715 6
716 4
717 6
718 8
719 4
720 6
721 7
722 5
723 7
724 3
725 10
726 10
727 3
728 7
729 7
730 5
731 2
732 1
733 5
734 1
735 5
736 6
737 2
738 2
739 3
740 7
741 2
742 7
743 4
744 5
745 4
746 5
747 3
748 1
749 4
750 4
751 2
752 4
753 6
754 6
755 6
756 3
757 2
758 5
759 5
760 3
761 4
762 2
763 1
764 8
765 3
766 4
767 3
768 1
769 5
770 3
771 3
772 4
773 4
774 1
775 3
776 2
777 2
778 3
779 3
780 1
781 4
782 3
783 4
784 6
785 3
786 5
787 4
788 2
789 4
790 5
791 4
792 6
794 4
795 1
796 1
797 4
798 2
799 3
800 3
801 1
802 5
803 5
804 3
805 3
806 3
807 4
808 4
809 2
811 5
812 4
813 6
814 3
815 2
816 2
817 3
818 5
819 3
820 1
821 1
822 4
823 3
824 4
825 8
826 3
827 5
828 5
829 3
830 6
831 3
832 4
833 8
834 5
835 3
836 3
837 2
838 4
839 2
840 1
841 3
842 2
843 1
844 3
846 4
847 4
848 3
849 3
850 2
851 3
853 1
854 4
855 4
856 2
857 4
858 1
859 2
860 5
861 1
862 1
863 4
864 2
865 2
867 5
868 1
869 4
870 1
871 1
872 1
873 2
875 5
876 3
877 1
878 3
879 3
880 3
881 2
882 1
883 6
884 2
885 2
886 1
887 1
888 3
889 2
890 2
891 3
892 1
893 3
894 1
895 5
896 1
897 3
899 2
900 2
902 1
903 2
904 4
905 4
906 3
907 1
908 1
909 2
910 5
911 2
912 3
914 1
915 1
916 2
918 2
919 2
920 4
921 4
922 1
923 1
924 4
925 5
926 1
928 2
929 1
930 1
931 1
932 1
933 1
934 2
935 1
936 1
937 1
938 2
939 1
941 1
942 4
944 2
945 2
946 2
947 1
948 1
950 1
951 2
953 1
954 2
955 1
956 1
957 2
958 1
960 3
962 4
963 1
964 1
965 3
966 2
967 2
968 1
969 3
970 3
972 1
974 4
975 3
976 3
977 2
979 2
980 1
981 1
983 5
984 1
985 3
986 1
987 2
988 4
989 2
991 2
992 2
993 1
994 1
996 2
997 2
998 1
999 3
1000 2
1001 1
1002 3
1003 3
1004 2
1005 3
1006 1
1007 2
1009 1
1011 1
1013 3
1014 1
1016 2
1017 1
1018 1
1019 1
1020 4
1021 1
1022 2
1025 1
1026 1
1027 2
1028 1
1030 1
1031 2
1032 4
1034 3
1035 2
1036 1
1038 1
1039 1
1040 1
1041 1
1042 2
1043 1
1044 2
1045 4
1048 1
1050 1
1051 1
1052 2
1054 1
1055 3
1056 2
1057 1
1059 1
1061 2
1063 1
1064 1
1065 1
1066 1
1067 1
1068 1
1069 2
1074 1
1075 1
1077 1
1078 1
1079 1
1082 1
1085 1
1088 1
1090 1
1091 1
1092 2
1094 2
1097 2
1098 1
1099 2
1101 2
1102 1
1104 1
1105 1
1107 1
1109 1
1111 2
1112 1
1114 2
1115 2
1116 2
1117 1
1118 1
1119 1
1120 1
1122 1
1123 1
1127 1
1128 3
1132 2
1138 3
1142 1
1145 4
1150 1
1153 2
1154 1
1158 1
1159 1
1163 1
1165 1
1169 2
1174 1
1176 1
1177 1
1178 2
1179 1
1180 2
1181 1
1182 1
1183 2
1185 1
1187 1
1191 2
1193 1
1195 3
1196 1
1201 3
1203 1
1206 1
1210 1
1213 1
1214 1
1215 2
1218 1
1220 1
1221 1
1225 1
1226 1
1233 2
1241 1
1243 1
1249 1
1250 2
1251 1
1254 1
1255 2
1260 1
1268 1
1270 1
1273 1
1274 1
1277 1
1284 1
1287 1
1291 1
1292 2
1294 1
1295 2
1297 1
1298 1
1301 1
1307 1
1308 3
1311 2
1313 1
1316 1
1321 1
1324 1
1325 1
1330 1
1333 1
1334 1
1338 2
1340 1
1341 1
1342 1
1343 1
1345 1
1355 1
1357 1
1360 2
1375 1
1376 1
1380 1
1383 1
1387 1
1389 1
1393 1
1394 1
1396 1
1398 1
1410 1
1414 1
1419 1
1425 1
1434 1
1435 1
1438 1
1439 1
1447 1
1455 2
1460 1
1461 1
1463 1
1466 1
1470 1
1473 1
1478 1
1480 1
1483 1
1484 1
1485 2
1492 2
1499 1
1509 1
1512 1
1513 1
1523 1
1524 1
1525 2
1529 1
1539 1
1544 1
1568 1
1584 1
1591 1
1598 1
1600 1
1604 1
1614 1
1617 1
1621 1
1622 1
1626 1
1638 1
1648 1
1658 1
1661 1
1679 1
1682 1
1693 1
1700 1
1705 1
1707 1
1722 1
1728 1
1758 1
1762 1
1763 1
1775 1
1776 1
1801 1
1810 1
1812 1
1827 1
1834 1
1846 1
1847 1
1848 1
1851 1
1862 1
1866 1
1877 2
1884 1
1888 1
1903 1
1912 1
1925 1
1938 1
1955 1
1998 1
2054 1
2058 1
2065 1
2069 1
2076 1
2089 1
2104 1
2111 1
2133 1
2138 1
2156 1
2204 1
2212 1
2237 1
2246 2
2298 1
2304 1
2360 1
2400 1
2481 1
2544 1
2586 1
2622 1
2666 1
2682 1
2725 1
2920 1
3997 1
4019 1
5211 1
12 19
14 1
16 401
18 2
20 421
22 557
24 625
26 50
28 4481
30 52
32 550
34 5840
36 4644
38 87
40 5794
41 33
42 571
44 11805
46 4711
47 7
48 597
49 12
50 678
51 2
52 14715
53 3
54 7322
55 3
56 508
57 39
58 3486
59 11
60 8974
61 45
62 1276
63 4
64 15693
65 15
66 657
67 13
68 6409
69 10
70 3188
71 25
72 1889
73 27
74 10370
75 9
76 12432
77 23
78 520
79 15
80 1534
81 29
82 2944
83 23
84 12071
85 36
86 1502
87 10
88 10978
89 11
90 889
91 16
92 4571
93 17
94 7855
95 21
96 2271
97 33
98 1423
99 15
100 11096
101 21
102 4082
103 13
104 5442
105 25
106 2113
107 26
108 3779
109 43
110 1294
111 29
112 7860
113 29
114 4965
115 22
116 7898
117 25
118 1772
119 28
120 1149
121 38
122 1483
123 32
124 10572
125 25
126 1147
127 31
128 1699
129 22
130 5533
131 22
132 4669
133 34
134 3777
135 10
136 5412
137 21
138 855
139 26
140 2485
141 46
142 1970
143 27
144 6565
145 40
146 933
147 15
148 7923
149 16
150 735
151 23
152 1111
153 33
154 3714
155 27
156 2445
157 30
158 3367
159 10
160 4646
161 27
162 990
163 23
164 5679
165 25
166 2186
167 17
168 899
169 32
170 1034
171 22
172 6185
173 32
174 2685
175 17
176 1354
177 38
178 1460
179 15
180 3478
181 20
182 958
183 20
184 6055
185 23
186 2180
187 15
188 1416
189 30
190 1284
191 22
192 1341
193 21
194 2413
195 18
196 4984
197 13
198 830
199 22
200 1834
201 19
202 2238
203 9
204 3050
205 22
206 616
207 17
208 2892
209 22
210 711
211 30
212 2631
213 19
214 3341
215 21
216 987
217 26
218 823
219 9
220 3588
221 20
222 692
223 7
224 2925
225 31
226 1075
227 16
228 2909
229 18
230 673
231 20
232 2215
233 14
234 1584
235 21
236 1292
237 29
238 1647
239 25
240 1014
241 30
242 1648
243 19
244 4465
245 10
246 787
247 11
248 480
249 25
250 842
251 15
252 1219
253 23
254 1508
255 8
256 3525
257 16
258 490
259 12
260 1678
261 14
262 822
263 16
264 1729
265 28
266 604
267 11
268 2572
269 7
270 1242
271 15
272 725
273 18
274 1983
275 13
276 1662
277 19
278 491
279 12
280 1586
281 14
282 563
283 10
284 2363
285 10
286 656
287 14
288 725
289 28
290 871
291 9
292 2606
293 12
294 961
295 9
296 478
297 13
298 1252
299 10
300 736
301 19
302 466
303 13
304 2254
305 12
306 486
307 14
308 1145
309 13
310 955
311 13
312 1235
313 13
314 931
315 14
316 1768
317 11
318 330
319 10
320 539
321 23
322 570
323 12
324 1789
325 13
326 884
327 5
328 1422
329 14
330 317
331 11
332 509
333 13
334 1062
335 12
336 577
337 27
338 378
339 10
340 2313
341 9
342 391
343 13
344 894
345 17
346 664
347 9
348 453
349 6
350 363
351 15
352 1115
353 13
354 1054
355 8
356 1108
357 12
358 354
359 7
360 363
361 16
362 344
363 11
364 1734
365 12
366 265
367 10
368 969
369 16
370 316
371 12
372 757
373 7
374 563
375 15
376 857
377 9
378 469
379 9
380 385
381 12
382 921
383 15
384 764
385 14
386 246
387 6
388 1108
389 14
390 230
391 8
392 266
393 11
394 641
395 8
396 719
397 9
398 243
399 4
400 1108
401 7
402 229
403 7
404 903
405 7
406 257
407 12
408 244
409 3
410 541
411 6
412 744
413 8
414 419
415 8
416 388
417 19
418 470
419 14
420 612
421 6
422 342
423 3
424 1179
425 3
426 116
427 14
428 207
429 6
430 255
431 4
432 288
433 12
434 343
435 6
436 1015
437 3
438 538
439 10
440 194
441 6
442 188
443 15
444 524
445 7
446 214
447 7
448 574
449 6
450 214
451 5
452 635
453 9
454 464
455 5
456 205
457 9
458 163
459 2
460 558
461 4
462 171
463 14
464 444
465 11
466 543
467 5
468 388
469 6
470 141
471 4
472 647
473 3
474 210
475 4
476 193
477 7
478 195
479 7
480 443
481 10
482 198
483 3
484 816
485 6
486 128
487 9
488 215
489 9
490 328
491 7
492 158
493 11
494 335
495 8
496 435
497 6
498 174
499 1
500 373
501 5
502 140
503 7
504 330
505 9
506 149
507 5
508 642
509 3
510 179
511 3
512 159
513 8
514 204
515 7
516 306
517 4
518 110
519 5
520 326
521 6
522 305
523 6
524 294
525 7
526 268
527 5
528 149
529 4
530 133
531 2
532 513
533 10
534 116
535 5
536 258
537 4
538 113
539 4
540 138
541 6
542 116
544 485
545 4
546 93
547 9
548 299
549 3
550 256
551 6
552 92
553 3
554 175
555 6
556 253
557 7
558 95
559 2
560 128
561 4
562 206
563 2
564 465
565 3
566 69
567 3
568 157
569 7
570 97
571 8
572 118
573 5
574 130
575 4
576 301
577 6
578 177
579 2
580 397
581 3
582 80
583 1
584 128
585 5
586 52
587 2
588 72
589 1
590 84
591 6
592 323
593 11
594 77
595 5
596 205
597 1
598 244
599 4
600 69
601 3
602 89
603 5
604 254
605 6
606 147
607 3
608 83
609 3
610 77
611 3
612 194
613 1
614 98
615 3
616 243
617 3
618 50
619 8
620 188
621 4
622 67
623 4
624 123
625 2
626 50
627 1
628 239
629 2
630 51
631 4
632 65
633 5
634 188
636 81
637 3
638 46
639 3
640 103
641 1
642 136
643 3
644 188
645 3
646 58
648 122
649 4
650 47
651 2
652 155
653 4
654 71
655 1
656 71
657 3
658 50
659 2
660 177
661 5
662 66
663 2
664 183
665 3
666 50
667 2
668 53
669 2
670 115
672 66
673 2
674 47
675 1
676 197
677 2
678 46
679 3
680 95
681 3
682 46
683 3
684 107
685 1
686 86
687 2
688 158
689 4
690 51
691 1
692 80
694 56
695 4
696 40
698 43
699 3
700 95
701 2
702 51
703 2
704 133
705 1
706 100
707 2
708 121
709 2
710 15
711 3
712 35
713 2
714 20
715 3
716 37
717 2
718 78
720 55
721 1
722 42
723 2
724 218
725 3
726 23
727 2
728 26
729 1
730 64
731 2
732 65
734 24
735 2
736 53
737 1
738 32
739 1
740 60
742 81
743 1
744 77
745 1
746 47
747 1
748 62
749 1
750 19
751 1
752 86
753 3
754 40
756 55
757 2
758 38
759 1
760 101
761 1
762 22
764 67
765 2
766 35
767 1
768 38
769 1
770 22
771 1
772 82
773 1
774 73
776 29
777 1
778 55
780 23
781 1
782 16
784 84
785 3
786 28
788 59
789 1
790 33
791 3
792 24
794 13
795 1
796 110
797 2
798 15
800 22
801 3
802 29
803 1
804 87
806 21
808 29
810 48
812 28
813 1
814 58
815 1
816 48
817 1
818 31
819 1
820 66
822 17
823 2
824 58
826 10
827 2
828 25
829 1
830 29
831 1
832 63
833 1
834 26
835 3
836 52
837 1
838 18
840 27
841 2
842 12
843 1
844 83
845 1
846 7
847 1
848 10
850 26
852 25
853 1
854 15
856 27
858 32
859 1
860 15
862 43
864 32
865 1
866 6
868 39
870 11
872 25
873 1
874 10
875 1
876 20
877 2
878 19
879 1
880 30
882 11
884 53
886 25
887 1
888 28
890 6
892 36
894 10
896 13
898 14
900 31
902 14
903 2
904 43
906 25
908 9
910 11
911 1
912 16
913 1
914 24
916 27
918 6
920 15
922 27
923 1
924 23
926 13
928 42
929 1
930 3
932 27
934 17
936 8
937 1
938 11
940 33
942 4
943 1
944 18
946 15
948 13
950 18
952 12
954 11
956 21
958 10
960 13
962 5
964 32
966 13
968 8
970 8
971 1
972 23
973 2
974 12
975 1
976 22
978 7
979 1
980 14
982 8
984 22
985 1
986 6
988 17
989 1
990 6
992 13
994 19
996 11
998 4
1000 9
1002 2
1004 14
1006 5
1008 3
1010 9
1012 29
1014 6
1016 22
1017 1
1018 8
1019 1
1020 7
1022 6
1023 1
1024 10
1026 2
1028 8
1030 11
1031 2
1032 8
1034 9
1036 13
1038 12
1040 12
1042 3
1044 12
1046 3
1048 11
1050 2
1051 1
1052 2
1054 11
1056 6
1058 8
1059 1
1060 23
1062 6
1063 1
1064 8
1066 3
1068 6
1070 8
1071 1
1072 5
1074 3
1076 5
1078 3
1080 11
1081 1
1082 7
1084 18
1086 4
1087 1
1088 3
1090 3
1092 7
1094 3
1096 12
1098 6
1099 1
1100 2
1102 6
1104 14
1106 3
1108 6
1110 5
1112 2
1114 8
1116 3
1118 3
1120 7
1122 10
1124 6
1126 8
1128 1
1130 4
1132 3
1134 2
1136 5
1138 5
1140 8
1142 3
1144 7
1146 3
1148 11
1150 1
1152 5
1154 1
1156 5
1158 1
1160 5
1162 3
1164 6
1165 1
1166 1
1168 4
1169 1
1170 3
1171 1
1172 2
1174 5
1176 3
1177 1
1180 8
1182 2
1184 4
1186 2
1188 3
1190 2
1192 5
1194 6
1196 1
1198 2
1200 2
1204 10
1206 2
1208 9
1210 1
1214 6
1216 3
1218 4
1220 9
1221 2
1222 1
1224 5
1226 4
1228 8
1230 1
1232 1
1234 3
1236 5
1240 3
1242 1
1244 3
1245 1
1246 4
1248 6
1250 2
1252 7
1256 3
1258 2
1260 2
1262 3
1264 4
1265 1
1266 1
1270 1
1271 1
1272 2
1274 3
1276 3
1278 1
1280 3
1284 1
1286 1
1290 1
1292 3
1294 1
1296 7
1300 2
1302 4
1304 3
1306 2
1308 2
1312 1
1314 1
1316 3
1318 2
1320 1
1324 8
1326 1
1330 1
1331 1
1336 2
1338 1
1340 3
1341 1
1344 1
1346 2
1347 1
1348 3
1352 1
1354 2
1356 1
1358 1
1360 3
1362 1
1364 4
1366 1
1370 1
1372 3
1380 2
1384 2
1388 2
1390 2
1392 2
1394 1
1396 1
1398 1
1400 2
1402 1
1404 1
1406 1
1410 1
1412 5
1418 1
1420 1
1424 1
1432 2
1434 2
1442 3
1444 5
1448 1
1454 1
1456 1
1460 3
1462 4
1468 1
1474 1
1476 1
1478 2
1480 1
1486 2
1488 1
1492 1
1496 1
1500 3
1503 1
1506 1
1512 2
1516 1
1522 1
1524 2
1534 4
1536 1
1538 1
1540 2
1544 2
1548 1
1556 1
1560 1
1562 1
1564 2
1566 1
1568 1
1570 1
1572 1
1576 1
1590 1
1594 1
1604 1
1608 1
1614 1
1622 1
1624 2
1628 1
1629 1
1636 1
1642 1
1654 2
1660 1
1664 1
1670 1
1684 4
1698 1
1732 3
1742 1
1752 1
1760 1
1764 1
1772 2
1798 1
1808 1
1820 1
1852 1
1856 1
1874 1
1902 1
1908 1
1952 1
2004 1
2018 1
2020 1
2028 1
2174 1
2233 1
2244 1
2280 1
2290 1
2352 1
2604 1
4190 1
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
import os import os
import argparse import argparse
import json import json
...@@ -31,7 +31,9 @@ def gen_det_label(root_path, input_dir, out_label): ...@@ -31,7 +31,9 @@ def gen_det_label(root_path, input_dir, out_label):
for label_file in os.listdir(input_dir): for label_file in os.listdir(input_dir):
img_path = root_path + label_file[3:-4] + ".jpg" img_path = root_path + label_file[3:-4] + ".jpg"
label = [] label = []
with open(os.path.join(input_dir, label_file), 'r') as f: with open(
os.path.join(input_dir, label_file), 'r',
encoding='utf-8-sig') as f:
for line in f.readlines(): for line in f.readlines():
tmp = line.strip("\n\r").replace("\xef\xbb\xbf", tmp = line.strip("\n\r").replace("\xef\xbb\xbf",
"").split(',') "").split(',')
......
...@@ -22,7 +22,7 @@ logger_initialized = {} ...@@ -22,7 +22,7 @@ logger_initialized = {}
@functools.lru_cache() @functools.lru_cache()
def get_logger(name='root', log_file=None, log_level=logging.INFO): def get_logger(name='root', log_file=None, log_level=logging.DEBUG):
"""Initialize and get a logger by name. """Initialize and get a logger by name.
If the logger has not been initialized, this method will initialize the If the logger has not been initialized, this method will initialize the
logger by adding one or two handlers, otherwise the initialized logger will logger by adding one or two handlers, otherwise the initialized logger will
......
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import tarfile
import requests
from tqdm import tqdm
from ppocr.utils.logging import get_logger
def download_with_progressbar(url, save_path):
logger = get_logger()
response = requests.get(url, stream=True)
total_size_in_bytes = int(response.headers.get('content-length', 0))
block_size = 1024 # 1 Kibibyte
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
with open(save_path, 'wb') as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data))
file.write(data)
progress_bar.close()
if total_size_in_bytes == 0 or progress_bar.n != total_size_in_bytes:
logger.error("Something went wrong while downloading models")
sys.exit(0)
def maybe_download(model_storage_directory, url):
# using custom model
tar_file_name_list = [
'inference.pdiparams', 'inference.pdiparams.info', 'inference.pdmodel'
]
if not os.path.exists(
os.path.join(model_storage_directory, 'inference.pdiparams')
) or not os.path.exists(
os.path.join(model_storage_directory, 'inference.pdmodel')):
assert url.endswith('.tar'), 'Only supports tar compressed package'
tmp_path = os.path.join(model_storage_directory, url.split('/')[-1])
print('download {} to {}'.format(url, tmp_path))
os.makedirs(model_storage_directory, exist_ok=True)
download_with_progressbar(url, tmp_path)
with tarfile.open(tmp_path, 'r') as tarObj:
for member in tarObj.getmembers():
filename = None
for tar_file_name in tar_file_name_list:
if tar_file_name in member.name:
filename = tar_file_name
if filename is None:
continue
file = tarObj.extractfile(member)
with open(
os.path.join(model_storage_directory, filename),
'wb') as f:
f.write(file.read())
os.remove(tmp_path)
def is_link(s):
return s is not None and s.startswith('http')
def confirm_model_dir_url(model_dir, default_model_dir, default_url):
url = default_url
if model_dir is None or is_link(model_dir):
if is_link(model_dir):
url = model_dir
file_name = url.split('/')[-1][:-4]
model_dir = default_model_dir
model_dir = os.path.join(model_dir, file_name)
return model_dir, url
...@@ -25,7 +25,7 @@ import paddle ...@@ -25,7 +25,7 @@ import paddle
from ppocr.utils.logging import get_logger from ppocr.utils.logging import get_logger
__all__ = ['init_model', 'save_model', 'load_dygraph_pretrain'] __all__ = ['init_model', 'save_model', 'load_dygraph_params']
def _mkdir_if_not_exist(path, logger): def _mkdir_if_not_exist(path, logger):
...@@ -89,6 +89,55 @@ def init_model(config, model, optimizer=None, lr_scheduler=None): ...@@ -89,6 +89,55 @@ def init_model(config, model, optimizer=None, lr_scheduler=None):
return best_model_dict return best_model_dict
def load_dygraph_params(config, model, logger, optimizer):
ckp = config['Global']['checkpoints']
if ckp and os.path.exists(ckp + ".pdparams"):
pre_best_model_dict = init_model(config, model, optimizer)
return pre_best_model_dict
else:
pm = config['Global']['pretrained_model']
if pm is None:
return {}
if not os.path.exists(pm) and not os.path.exists(pm + ".pdparams"):
logger.info(f"The pretrained_model {pm} does not exists!")
return {}
pm = pm if pm.endswith('.pdparams') else pm + '.pdparams'
params = paddle.load(pm)
state_dict = model.state_dict()
new_state_dict = {}
for k1, k2 in zip(state_dict.keys(), params.keys()):
if list(state_dict[k1].shape) == list(params[k2].shape):
new_state_dict[k1] = params[k2]
else:
logger.info(
f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !"
)
model.set_state_dict(new_state_dict)
logger.info(f"loaded pretrained_model successful from {pm}")
return {}
def load_pretrained_params(model, path):
if path is None:
return False
if not os.path.exists(path) and not os.path.exists(path + ".pdparams"):
print(f"The pretrained_model {path} does not exists!")
return False
path = path if path.endswith('.pdparams') else path + '.pdparams'
params = paddle.load(path)
state_dict = model.state_dict()
new_state_dict = {}
for k1, k2 in zip(state_dict.keys(), params.keys()):
if list(state_dict[k1].shape) == list(params[k2].shape):
new_state_dict[k1] = params[k2]
else:
print(
f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !"
)
model.set_state_dict(new_state_dict)
print(f"load pretrain successful from {path}")
return model
def save_model(model, def save_model(model,
optimizer, optimizer,
model_path, model_path,
......
English | [简体中文](README_ch.md)
# PP-Structure
PP-Structure is an OCR toolkit that can be used for complex documents analysis. The main features are as follows:
- Support the layout analysis of documents, divide the documents into 5 types of areas **text, title, table, image and list** (conjunction with Layout-Parser)
- Support to extract the texts from the text, title, picture and list areas (used in conjunction with PP-OCR)
- Support to extract excel files from the table areas
- Support python whl package and command line usage, easy to use
- Support custom training for layout analysis and table structure tasks
## 1. Visualization
<img src="../doc/table/ppstructure.GIF" width="100%"/>
## 2. Installation
### 2.1 Install requirements
- **(1) Install PaddlePaddle**
```bash
pip3 install --upgrade pip
# GPU
python3 -m pip install paddlepaddle-gpu==2.1.1 -i https://mirror.baidu.com/pypi/simple
# CPU
python3 -m pip install paddlepaddle==2.1.1 -i https://mirror.baidu.com/pypi/simple
# For more,refer[Installation](https://www.paddlepaddle.org.cn/install/quick)。
```
- **(2) Install Layout-Parser**
```bash
pip3 install -U premailer paddleocr https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
```
### 2.2 Install PaddleOCR(including PP-OCR and PP-Structure)
- **(1) PIP install PaddleOCR whl package(inference only)**
```bash
pip install "paddleocr>=2.2"
```
- **(2) Clone PaddleOCR(Inference+training)**
```bash
git clone https://github.com/PaddlePaddle/PaddleOCR
```
## 3. Quick Start
### 3.1 Use by command line
```bash
paddleocr --image_dir=../doc/table/1.png --type=structure
```
### 3.2 Use by python API
```python
import os
import cv2
from paddleocr import PPStructure,draw_structure_result,save_structure_res
table_engine = PPStructure(show_log=True)
save_folder = './output/table'
img_path = '../doc/table/1.png'
img = cv2.imread(img_path)
result = table_engine(img)
save_structure_res(result, save_folder,os.path.basename(img_path).split('.')[0])
for line in result:
line.pop('img')
print(line)
from PIL import Image
font_path = '../doc/fonts/simfang.ttf'
image = Image.open(img_path).convert('RGB')
im_show = draw_structure_result(image, result,font_path=font_path)
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')
```
### 3.3 Returned results format
The returned results of PP-Structure is a list composed of a dict, an example is as follows
```shell
[
{ 'type': 'Text',
'bbox': [34, 432, 345, 462],
'res': ([[36.0, 437.0, 341.0, 437.0, 341.0, 446.0, 36.0, 447.0], [41.0, 454.0, 125.0, 453.0, 125.0, 459.0, 41.0, 460.0]],
[('Tigure-6. The performance of CNN and IPT models using difforen', 0.90060663), ('Tent ', 0.465441)])
}
]
```
The description of each field in dict is as follows
| Parameter | Description |
| --------------- | -------------|
|type|Type of image area|
|bbox|The coordinates of the image area in the original image, respectively [left upper x, left upper y, right bottom x, right bottom y]|
|res|OCR or table recognition result of image area。<br> Table: HTML string of the table; <br> OCR: A tuple containing the detection coordinates and recognition results of each single line of text|
### 3.4 Parameter description:
| Parameter | Description | Default value |
| --------------- | ---------------------------------------- | ------------------------------------------- |
| output | The path where excel and recognition results are saved | ./output/table |
| table_max_len | The long side of the image is resized in table structure model | 488 |
| table_model_dir | inference model path of table structure model | None |
| table_char_type | dict path of table structure model | ../ppocr/utils/dict/table_structure_dict.tx |
Most of the parameters are consistent with the paddleocr whl package, see [doc of whl](../doc/doc_en/whl_en.md)
After running, each image will have a directory with the same name under the directory specified in the output field. Each table in the picture will be stored as an excel and figure area will be cropped and saved, the excel and image file name will be the coordinates of the table in the image.
## 4. PP-Structure Pipeline
the process is as follows
![pipeline](../doc/table/pipeline_en.jpg)
In PP-Structure, the image will be analyzed by layoutparser first. In the layout analysis, the area in the image will be classified, including **text, title, image, list and table** 5 categories. For the first 4 types of areas, directly use the PP-OCR to complete the text detection and recognition. The table area will be converted to an excel file of the same table style via Table OCR.
### 4.1 LayoutParser
Layout analysis divides the document data into regions, including the use of Python scripts for layout analysis tools, extraction of special category detection boxes, performance indicators, and custom training layout analysis models. For details, please refer to [document](layout/README_en.md).
### 4.2 Table Recognition
Table Recognition converts table image into excel documents, which include the detection and recognition of table text and the prediction of table structure and cell coordinates. For detailed, please refer to [document](table/README.md)
## 5. Prediction by inference engine
Use the following commands to complete the inference.
```python
cd PaddleOCR/ppstructure
# download model
mkdir inference && cd inference
# Download the detection model of the ultra-lightweight Chinese OCR model and uncompress it
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar
# Download the recognition model of the ultra-lightweight Chinese OCR model and uncompress it
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
# Download the table structure model of the ultra-lightweight Chinese OCR model and uncompress it
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
cd ..
python3 predict_system.py --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=ch --output=../output/table --vis_font_path=../doc/fonts/simfang.ttf
```
After running, each image will have a directory with the same name under the directory specified in the output field. Each table in the picture will be stored as an excel and figure area will be cropped and saved, the excel and image file name will be the coordinates of the table in the image.
**Model List**
|model name|description|config|model size|download|
| --- | --- | --- | --- | --- |
|en_ppocr_mobile_v2.0_table_structure|Table structure prediction for English table scenarios|[table_mv3.yml](../configs/table/table_mv3.yml)|18.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) |
**Model List**
LayoutParser model
|model name|description|download|
| --- | --- | --- |
| ppyolov2_r50vd_dcn_365e_publaynet | The layout analysis model trained on the PubLayNet data set can be divided into 5 types of areas **text, title, table, picture and list** | [PubLayNet](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) |
| ppyolov2_r50vd_dcn_365e_tableBank_word | The layout analysis model trained on the TableBank Word dataset can only detect tables | [TableBank Word](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_word.tar) |
| ppyolov2_r50vd_dcn_365e_tableBank_latex | The layout analysis model trained on the TableBank Latex dataset can only detect tables | [TableBank Latex](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_latex.tar) |
OCR and table recognition model
|model name|description|model size|download|
| --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_det|Slim pruned lightweight model, supporting Chinese, English, multilingual text detection|2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar) |
|ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) |
|en_ppocr_mobile_v2.0_table_det|Text detection of English table scenes trained on PubLayNet dataset|4.7M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) |
|en_ppocr_mobile_v2.0_table_rec|Text recognition of English table scene trained on PubLayNet dataset|6.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) |
|en_ppocr_mobile_v2.0_table_structure|Table structure prediction of English table scene trained on PubLayNet dataset|18.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) |
If you need to use other models, you can download the model in [model_list](../doc/doc_en/models_list_en.md) or use your own trained model to configure it to the three fields of `det_model_dir`, `rec_model_dir`, `table_model_dir` .
[English](README.md) | 简体中文
# PP-Structure
PP-Structure是一个可用于复杂文档结构分析和处理的OCR工具包,主要特性如下:
- 支持对图片形式的文档进行版面分析,可以划分**文字、标题、表格、图片以及列表**5类区域(与Layout-Parser联合使用)
- 支持文字、标题、图片以及列表区域提取为文字字段(与PP-OCR联合使用)
- 支持表格区域进行结构化分析,最终结果输出Excel文件
- 支持python whl包和命令行两种方式,简单易用
- 支持版面分析和表格结构化两类任务自定义训练
## 1. 效果展示
<img src="../doc/table/ppstructure.GIF" width="100%"/>
## 2. 安装
### 2.1 安装依赖
- **(1) 安装PaddlePaddle**
```bash
pip3 install --upgrade pip
# GPU安装
python3 -m pip install paddlepaddle-gpu==2.1.1 -i https://mirror.baidu.com/pypi/simple
# CPU安装
python3 -m pip install paddlepaddle==2.1.1 -i https://mirror.baidu.com/pypi/simple
# 更多需求,请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
```
- **(2) 安装 Layout-Parser**
```bash
pip3 install -U premailer paddleocr https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
```
### 2.2 安装PaddleOCR(包含PP-OCR和PP-Structure)
- **(1) PIP快速安装PaddleOCR whl包(仅预测)**
```bash
pip install "paddleocr>=2.2" # 推荐使用2.2+版本
```
- **(2) 完整克隆PaddleOCR源码(预测+训练)**
```bash
【推荐】git clone https://github.com/PaddlePaddle/PaddleOCR
#如果因为网络问题无法pull成功,也可选择使用码云上的托管:
git clone https://gitee.com/paddlepaddle/PaddleOCR
#注:码云托管代码可能无法实时同步本github项目更新,存在3~5天延时,请优先使用推荐方式。
```
## 3. PP-Structure 快速开始
### 3.1 命令行使用(默认参数,极简)
```bash
paddleocr --image_dir=../doc/table/1.png --type=structure
```
### 3.2 Python脚本使用(自定义参数,灵活)
```python
import os
import cv2
from paddleocr import PPStructure,draw_structure_result,save_structure_res
table_engine = PPStructure(show_log=True)
save_folder = './output/table'
img_path = '../doc/table/1.png'
img = cv2.imread(img_path)
result = table_engine(img)
save_structure_res(result, save_folder,os.path.basename(img_path).split('.')[0])
for line in result:
line.pop('img')
print(line)
from PIL import Image
font_path = '../doc/fonts/simfang.ttf' # PaddleOCR下提供字体包
image = Image.open(img_path).convert('RGB')
im_show = draw_structure_result(image, result,font_path=font_path)
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')
```
### 3.3 返回结果说明
PP-Structure的返回结果为一个dict组成的list,示例如下
```shell
[
{ 'type': 'Text',
'bbox': [34, 432, 345, 462],
'res': ([[36.0, 437.0, 341.0, 437.0, 341.0, 446.0, 36.0, 447.0], [41.0, 454.0, 125.0, 453.0, 125.0, 459.0, 41.0, 460.0]],
[('Tigure-6. The performance of CNN and IPT models using difforen', 0.90060663), ('Tent ', 0.465441)])
}
]
```
dict 里各个字段说明如下
| 字段 | 说明 |
| --------------- | -------------|
|type|图片区域的类型|
|bbox|图片区域的在原图的坐标,分别[左上角x,左上角y,右下角x,右下角y]|
|res|图片区域的OCR或表格识别结果。<br> 表格: 表格的HTML字符串; <br> OCR: 一个包含各个单行文字的检测坐标和识别结果的元组|
### 3.4 参数说明
| 字段 | 说明 | 默认值 |
| --------------- | ---------------------------------------- | ------------------------------------------- |
| output | excel和识别结果保存的地址 | ./output/table |
| table_max_len | 表格结构模型预测时,图像的长边resize尺度 | 488 |
| table_model_dir | 表格结构模型 inference 模型地址 | None |
| table_char_type | 表格结构模型所用字典地址 | ../ppocr/utils/dict/table_structure_dict.tx |
大部分参数和paddleocr whl包保持一致,见 [whl包文档](../doc/doc_ch/whl.md)
运行完成后,每张图片会在`output`字段指定的目录下有一个同名目录,图片里的每个表格会存储为一个excel,图片区域会被裁剪之后保存下来,excel文件和图片名名为表格在图片里的坐标。
## 4. PP-Structure Pipeline介绍
![pipeline](../doc/table/pipeline.jpg)
在PP-Structure中,图片会先经由Layout-Parser进行版面分析,在版面分析中,会对图片里的区域进行分类,包括**文字、标题、图片、列表和表格**5类。对于前4类区域,直接使用PP-OCR完成对应区域文字检测与识别。对于表格类区域,经过表格结构化处理后,表格图片转换为相同表格样式的Excel文件。
### 4.1 版面分析
版面分析对文档数据进行区域分类,其中包括版面分析工具的Python脚本使用、提取指定类别检测框、性能指标以及自定义训练版面分析模型,详细内容可以参考[文档](layout/README_ch.md)
### 4.2 表格识别
表格识别将表格图片转换为excel文档,其中包含对于表格文本的检测和识别以及对于表格结构和单元格坐标的预测,详细说明参考[文档](table/README_ch.md)
## 5. 预测引擎推理(与whl包效果相同)
使用如下命令即可完成预测引擎的推理
```python
cd ppstructure
# 下载模型
mkdir inference && cd inference
# 下载超轻量级中文OCR模型的检测模型并解压
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar
# 下载超轻量级中文OCR模型的识别模型并解压
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
# 下载超轻量级英文表格英寸模型并解压
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
cd ..
python3 predict_system.py --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=ch --output=../output/table --vis_font_path=../doc/fonts/simfang.ttf
```
运行完成后,每张图片会在`output`字段指定的目录下有一个同名目录,图片里的每个表格会存储为一个excel,图片区域会被裁剪之后保存下来,excel文件和图片名名为表格在图片里的坐标。
**Model List**
LayoutParser 模型
|模型名称|模型简介|下载地址|
| --- | --- | --- |
| ppyolov2_r50vd_dcn_365e_publaynet | PubLayNet 数据集训练的版面分析模型,可以划分**文字、标题、表格、图片以及列表**5类区域 | [PubLayNet](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) |
| ppyolov2_r50vd_dcn_365e_tableBank_word | TableBank Word 数据集训练的版面分析模型,只能检测表格 | [TableBank Word](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_word.tar) |
| ppyolov2_r50vd_dcn_365e_tableBank_latex | TableBank Latex 数据集训练的版面分析模型,只能检测表格 | [TableBank Latex](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_latex.tar) |
OCR和表格识别模型
|模型名称|模型简介|推理模型大小|下载地址|
| --- | --- | --- | --- |
|ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型,支持中英文、多语种文本检测|2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar) |
|ch_ppocr_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型,支持中英文、数字识别|6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) |
|en_ppocr_mobile_v2.0_table_det|PubLayNet数据集训练的英文表格场景的文字检测|4.7M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) |
|en_ppocr_mobile_v2.0_table_rec|PubLayNet数据集训练的英文表格场景的文字识别|6.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) |
|en_ppocr_mobile_v2.0_table_structure|PubLayNet数据集训练的英文表格场景的表格结构预测|18.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) |
如需要使用其他模型,可以在 [model_list](../doc/doc_ch/models_list.md) 下载模型或者使用自己训练好的模型配置到`det_model_dir`,`rec_model_dir`,`table_model_dir`三个字段即可。
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
English | [简体中文](README_ch.md)
# Getting Started
[1. Install whl package](#Install)
[2. Quick Start](#QuickStart)
[3. PostProcess](#PostProcess)
[4. Results](#Results)
[5. Training](#Training)
<a name="Install"></a>
## 1. Install whl package
```bash
wget https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
pip install -U layoutparser-0.0.0-py3-none-any.whl
```
<a name="QuickStart"></a>
## 2. Quick Start
Use LayoutParser to identify the layout of a document:
```python
import cv2
import layoutparser as lp
image = cv2.imread("doc/table/layout.jpg")
image = image[..., ::-1]
# load model
model = lp.PaddleDetectionLayoutModel(config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
threshold=0.5,
label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"},
enforce_cpu=False,
enable_mkldnn=True)
# detect
layout = model.detect(image)
# show result
show_img = lp.draw_box(image, layout, box_width=3, show_element_type=True)
show_img.show()
```
The following figure shows the result, with different colored detection boxes representing different categories and displaying specific categories in the upper left corner of the box with `show_element_type`
<div align="center">
<img src="../../doc/table/result_all.jpg" width = "600" />
</div>
`PaddleDetectionLayoutModel`parameters are described as follows:
| parameter | description | default | remark |
| :------------: | :------------------------------------------------------: | :---------: | :----------------------------------------------------------: |
| config_path | model config path | None | Specify config_ path will automatically download the model (only for the first time,the model will exist and will not be downloaded again) |
| model_path | model path | None | local model path, config_ path and model_ path must be set to one, cannot be none at the same time |
| threshold | threshold of prediction score | 0.5 | \ |
| input_shape | picture size of reshape | [3,640,640] | \ |
| batch_size | testing batch size | 1 | \ |
| label_map | category mapping table | None | Setting config_ path, it can be none, and the label is automatically obtained according to the dataset name_ map |
| enforce_cpu | whether to use CPU | False | False to use GPU, and True to force the use of CPU |
| enforce_mkldnn | whether mkldnn acceleration is enabled in CPU prediction | True | \ |
| thread_num | the number of CPU threads | 10 | \ |
The following model configurations and label maps are currently supported, which you can use by modifying '--config_path' and '--label_map' to detect different types of content:
| dataset | config_path | label_map |
| ------------------------------------------------------------ | ------------------------------------------------------------ | --------------------------------------------------------- |
| [TableBank](https://doc-analysis.github.io/tablebank-page/index.html) word | lp://TableBank/ppyolov2_r50vd_dcn_365e_tableBank_word/config | {0:"Table"} |
| TableBank latex | lp://TableBank/ppyolov2_r50vd_dcn_365e_tableBank_latex/config | {0:"Table"} |
| [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) | lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config | {0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"} |
* TableBank word and TableBank latex are trained on datasets of word documents and latex documents respectively;
* Download TableBank dataset contains both word and latex。
<a name="PostProcess"></a>
## 3. PostProcess
Layout parser contains multiple categories, if you only want to get the detection box for a specific category (such as the "Text" category), you can use the following code:
```python
# follow the above code
# filter areas for a specific text type
text_blocks = lp.Layout([b for b in layout if b.type=='Text'])
figure_blocks = lp.Layout([b for b in layout if b.type=='Figure'])
# text areas may be detected within the image area, delete these areas
text_blocks = lp.Layout([b for b in text_blocks \
if not any(b.is_in(b_fig) for b_fig in figure_blocks)])
# sort text areas and assign ID
h, w = image.shape[:2]
left_interval = lp.Interval(0, w/2*1.05, axis='x').put_on_canvas(image)
left_blocks = text_blocks.filter_by(left_interval, center=True)
left_blocks.sort(key = lambda b:b.coordinates[1])
right_blocks = [b for b in text_blocks if b not in left_blocks]
right_blocks.sort(key = lambda b:b.coordinates[1])
# the two lists are merged and the indexes are added in order
text_blocks = lp.Layout([b.set(id = idx) for idx, b in enumerate(left_blocks + right_blocks)])
# display result
show_img = lp.draw_box(image, text_blocks,
box_width=3,
show_element_id=True)
show_img.show()
```
Displays results with only the "Text" category:
<div align="center">
<img src="../../doc/table/result_text.jpg" width = "600" />
</div>
<a name="Results"></a>
## 4. Results
| Dataset | mAP | CPU time cost | GPU time cost |
| --------- | ---- | ------------- | ------------- |
| PubLayNet | 93.6 | 1713.7ms | 66.6ms |
| TableBank | 96.2 | 1968.4ms | 65.1ms |
**Envrionment:**
**CPU:** Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz,24core
**GPU:** a single NVIDIA Tesla P40
<a name="Training"></a>
## 5. Training
The above model is based on [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection). If you want to train your own layout parser model,please refer to:[train_layoutparser_model](train_layoutparser_model.md)
[English](README.md) | 简体中文
# 版面分析使用说明
[1. 安装whl包](#安装whl包)
[2. 使用](#使用)
[3. 后处理](#后处理)
[4. 指标](#指标)
[5. 训练版面分析模型](#训练版面分析模型)
<a name="安装whl包"></a>
## 1. 安装whl包
```bash
pip install -U https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
```
<a name="使用"></a>
## 2. 使用
使用layoutparser识别给定文档的布局:
```python
import cv2
import layoutparser as lp
image = cv2.imread("doc/table/layout.jpg")
image = image[..., ::-1]
# 加载模型
model = lp.PaddleDetectionLayoutModel(config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
threshold=0.5,
label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"},
enforce_cpu=False,
enable_mkldnn=True)
# 检测
layout = model.detect(image)
# 显示结果
show_img = lp.draw_box(image, layout, box_width=3, show_element_type=True)
show_img.show()
```
下图展示了结果,不同颜色的检测框表示不同的类别,并通过`show_element_type`在框的左上角显示具体类别:
<div align="center">
<img src="../../doc/table/result_all.jpg" width = "600" />
</div>
`PaddleDetectionLayoutModel`函数参数说明如下:
| 参数 | 含义 | 默认值 | 备注 |
| :------------: | :-------------------------: | :---------: | :----------------------------------------------------------: |
| config_path | 模型配置路径 | None | 指定config_path会自动下载模型(仅第一次,之后模型存在,不会再下载) |
| model_path | 模型路径 | None | 本地模型路径,config_path和model_path必须设置一个,不能同时为None |
| threshold | 预测得分的阈值 | 0.5 | \ |
| input_shape | reshape之后图片尺寸 | [3,640,640] | \ |
| batch_size | 测试batch size | 1 | \ |
| label_map | 类别映射表 | None | 设置config_path时,可以为None,根据数据集名称自动获取label_map |
| enforce_cpu | 代码是否使用CPU运行 | False | 设置为False表示使用GPU,True表示强制使用CPU |
| enforce_mkldnn | CPU预测中是否开启MKLDNN加速 | True | \ |
| thread_num | 设置CPU线程数 | 10 | \ |
目前支持以下几种模型配置和label map,您可以通过修改 `--config_path``--label_map`使用这些模型,从而检测不同类型的内容:
| dataset | config_path | label_map |
| ------------------------------------------------------------ | ------------------------------------------------------------ | --------------------------------------------------------- |
| [TableBank](https://doc-analysis.github.io/tablebank-page/index.html) word | lp://TableBank/ppyolov2_r50vd_dcn_365e_tableBank_word/config | {0:"Table"} |
| TableBank latex | lp://TableBank/ppyolov2_r50vd_dcn_365e_tableBank_latex/config | {0:"Table"} |
| [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) | lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config | {0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"} |
* TableBank word和TableBank latex分别在word文档、latex文档数据集训练;
* 下载的TableBank数据集里同时包含word和latex。
<a name="后处理"></a>
## 3. 后处理
版面分析检测包含多个类别,如果只想获取指定类别(如"Text"类别)的检测框、可以使用下述代码:
```python
# 接上面代码
# 首先过滤特定文本类型的区域
text_blocks = lp.Layout([b for b in layout if b.type=='Text'])
figure_blocks = lp.Layout([b for b in layout if b.type=='Figure'])
# 因为在图像区域内可能检测到文本区域,所以只需要删除它们
text_blocks = lp.Layout([b for b in text_blocks \
if not any(b.is_in(b_fig) for b_fig in figure_blocks)])
# 对文本区域排序并分配id
h, w = image.shape[:2]
left_interval = lp.Interval(0, w/2*1.05, axis='x').put_on_canvas(image)
left_blocks = text_blocks.filter_by(left_interval, center=True)
left_blocks.sort(key = lambda b:b.coordinates[1])
right_blocks = [b for b in text_blocks if b not in left_blocks]
right_blocks.sort(key = lambda b:b.coordinates[1])
# 最终合并两个列表,并按顺序添加索引
text_blocks = lp.Layout([b.set(id = idx) for idx, b in enumerate(left_blocks + right_blocks)])
# 显示结果
show_img = lp.draw_box(image, text_blocks,
box_width=3,
show_element_id=True)
show_img.show()
```
显示只有"Text"类别的结果:
<div align="center">
<img src="../../doc/table/result_text.jpg" width = "600" />
</div>
<a name="指标"></a>
## 4. 指标
| Dataset | mAP | CPU time cost | GPU time cost |
| --------- | ---- | ------------- | ------------- |
| PubLayNet | 93.6 | 1713.7ms | 66.6ms |
| TableBank | 96.2 | 1968.4ms | 65.1ms |
**Envrionment:**
**CPU:** Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz,24core
**GPU:** a single NVIDIA Tesla P40
<a name="训练版面分析模型"></a>
## 5. 训练版面分析模型
上述模型基于[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection) 训练,如果您想训练自己的版面分析模型,请参考:[train_layoutparser_model](train_layoutparser_model_ch.md)
# Training layout-parse
[1. Installation](#Installation)
[1.1 Requirements](#Requirements)
[1.2 Install PaddleDetection](#Install PaddleDetection)
[2. Data preparation](#Data preparation)
[3. Configuration](#Configuration)
[4. Training](#Training)
[5. Prediction](#Prediction)
[6. Deployment](#Deployment)
[6.1 Export model](#Export model)
[6.2 Inference](#Inference)
<a name="Installation"></a>
## 1. Installation
<a name="Requirements"></a>
### 1.1 Requirements
- PaddlePaddle 2.1
- OS 64 bit
- Python 3(3.5.1+/3.6/3.7/3.8/3.9),64 bit
- pip/pip3(9.0.1+), 64 bit
- CUDA >= 10.1
- cuDNN >= 7.6
<a name="Install PaddleDetection"></a>
### 1.2 Install PaddleDetection
```bash
# Clone PaddleDetection repository
cd <path/to/clone/PaddleDetection>
git clone https://github.com/PaddlePaddle/PaddleDetection.git
cd PaddleDetection
# Install other dependencies
pip install -r requirements.txt
```
For more installation tutorials, please refer to: [Install doc](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/INSTALL_cn.md)
<a name="Data preparation"></a>
## 2. Data preparation
Download the [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) dataset
```bash
cd PaddleDetection/dataset/
mkdir publaynet
# execute the command,download PubLayNet
wget -O publaynet.tar.gz https://dax-cdn.cdn.appdomain.cloud/dax-publaynet/1.0.0/publaynet.tar.gz?_ga=2.104193024.1076900768.1622560733-649911202.1622560733
# unpack
tar -xvf publaynet.tar.gz
```
PubLayNet directory structure after decompressing :
| File or Folder | Description | num |
| :------------- | :----------------------------------------------- | ------- |
| `train/` | Images in the training subset | 335,703 |
| `val/` | Images in the validation subset | 11,245 |
| `test/` | Images in the testing subset | 11,405 |
| `train.json` | Annotations for training images | 1 |
| `val.json` | Annotations for validation images | 1 |
| `LICENSE.txt` | Plaintext version of the CDLA-Permissive license | 1 |
| `README.txt` | Text file with the file names and description | 1 |
For other datasets,please refer to [the PrepareDataSet]((https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/PrepareDataSet.md) )
<a name="Configuration"></a>
## 3. Configuration
We use the `configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml` configuration for training,the configuration file is as follows
```bash
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'./_base_/ppyolov2_r50vd_dcn.yml',
'./_base_/optimizer_365e.yml',
'./_base_/ppyolov2_reader.yml',
]
snapshot_epoch: 8
weights: output/ppyolov2_r50vd_dcn_365e_coco/model_final
```
The `ppyolov2_r50vd_dcn_365e_coco.yml` configuration depends on other configuration files, in this case:
- coco_detection.yml:mainly explains the path of training data and verification data
- runtime.yml:mainly describes the common parameters, such as whether to use the GPU and how many epoch to save model etc.
- optimizer_365e.yml:mainly explains the learning rate and optimizer configuration
- ppyolov2_r50vd_dcn.yml:mainly describes the model and the network
- ppyolov2_reader.yml:mainly describes the configuration of data readers, such as batch size and number of concurrent loading child processes, and also includes post preprocessing, such as resize and data augmention etc.
Modify the preceding files, such as the dataset path and batch size etc.
<a name="Training"></a>
## 4. Training
PaddleDetection provides single-card/multi-card training mode to meet various training needs of users:
* GPU single card training
```bash
export CUDA_VISIBLE_DEVICES=0 #Don't need to run this command on Windows and Mac
python tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml
```
* GPU multi-card training
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --eval
```
--eval: training while verifying
* Model recovery training
During the daily training, if training is interrupted due to some reasons, you can use the -r command to resume the training:
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --eval -r output/ppyolov2_r50vd_dcn_365e_coco/10000
```
Note: If you encounter "`Out of memory error`" , try reducing `batch_size` in the `ppyolov2_reader.yml` file
prediction<a name="Prediction"></a>
## 5. Prediction
Set parameters and use PaddleDetection to predict:
```bash
export CUDA_VISIBLE_DEVICES=0
python tools/infer.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --infer_img=images/paper-image.jpg --output_dir=infer_output/ --draw_threshold=0.5 -o weights=output/ppyolov2_r50vd_dcn_365e_coco/model_final --use_vdl=Ture
```
`--draw_threshold` is an optional parameter. According to the calculation of [NMS](https://ieeexplore.ieee.org/document/1699659), different threshold will produce different results, ` keep_top_k ` represent the maximum amount of output target, the default value is 10. You can set different value according to your own actual situation。
<a name="Deployment"></a>
## 6. Deployment
Use your trained model in Layout Parser
<a name="Export model"></a>
### 6.1 Export model
n the process of model training, the model file saved contains the process of forward prediction and back propagation. In the actual industrial deployment, there is no need for back propagation. Therefore, the model should be translated into the model format required by the deployment. The `tools/export_model.py` script is provided in PaddleDetection to export the model.
The exported model name defaults to `model.*`, Layout Parser's code model is `inference.*`, So change [PaddleDetection/ppdet/engine/trainer. Py ](https://github.com/PaddlePaddle/PaddleDetection/blob/b87a1ea86fa18ce69e44a17ad1b49c1326f19ff9/ppdet/engine/trainer.py# L512) (click on the link to see the detailed line of code), change 'model' to 'inference'.
Execute the script to export model:
```bash
python tools/export_model.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --output_dir=./inference -o weights=output/ppyolov2_r50vd_dcn_365e_coco/model_final.pdparams
```
The prediction model is exported to `inference/ppyolov2_r50vd_dcn_365e_coco` ,including:`infer_cfg.yml`(prediction not required), `inference.pdiparams`, `inference.pdiparams.info`,`inference.pdmodel`
More model export tutorials, please refer to:[EXPORT_MODEL](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md)
<a name="Inference"></a>
### 6.2 Inference
`model_path` represent the trained model path, and layoutparser is used to predict:
```bash
import layoutparser as lp
model = lp.PaddleDetectionLayoutModel(model_path="inference/ppyolov2_r50vd_dcn_365e_coco", threshold=0.5,label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"},enforce_cpu=True,enable_mkldnn=True)
```
***
More PaddleDetection training tutorials,please reference:[PaddleDetection Training](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/GETTING_STARTED_cn.md)
***
# 训练版面分析
[1. 安装](#安装)
[1.1 环境要求](#环境要求)
[1.2 安装PaddleDetection](#安装PaddleDetection)
[2. 准备数据](#准备数据)
[3. 配置文件改动和说明](#配置文件改动和说明)
[4. PaddleDetection训练](#训练)
[5. PaddleDetection预测](#预测)
[6. 预测部署](#预测部署)
[6.1 模型导出](#模型导出)
[6.2 layout parser预测](#layout_parser预测)
<a name="安装"></a>
## 1. 安装
<a name="环境要求"></a>
### 1.1 环境要求
- PaddlePaddle 2.1
- OS 64 bit
- Python 3(3.5.1+/3.6/3.7/3.8/3.9),64 bit
- pip/pip3(9.0.1+), 64 bit
- CUDA >= 10.1
- cuDNN >= 7.6
<a name="安装PaddleDetection"></a>
### 1.2 安装PaddleDetection
```bash
# 克隆PaddleDetection仓库
cd <path/to/clone/PaddleDetection>
git clone https://github.com/PaddlePaddle/PaddleDetection.git
cd PaddleDetection
# 安装其他依赖
pip install -r requirements.txt
```
更多安装教程,请参考: [Install doc](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/INSTALL_cn.md)
<a name="数据准备"></a>
## 2. 准备数据
下载 [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) 数据集:
```bash
cd PaddleDetection/dataset/
mkdir publaynet
# 执行命令,下载
wget -O publaynet.tar.gz https://dax-cdn.cdn.appdomain.cloud/dax-publaynet/1.0.0/publaynet.tar.gz?_ga=2.104193024.1076900768.1622560733-649911202.1622560733
# 解压
tar -xvf publaynet.tar.gz
```
解压之后PubLayNet目录结构:
| File or Folder | Description | num |
| :------------- | :----------------------------------------------- | ------- |
| `train/` | Images in the training subset | 335,703 |
| `val/` | Images in the validation subset | 11,245 |
| `test/` | Images in the testing subset | 11,405 |
| `train.json` | Annotations for training images | 1 |
| `val.json` | Annotations for validation images | 1 |
| `LICENSE.txt` | Plaintext version of the CDLA-Permissive license | 1 |
| `README.txt` | Text file with the file names and description | 1 |
如果使用其它数据集,请参考[准备训练数据](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/PrepareDataSet.md)
<a name="配置文件改动和说明"></a>
## 3. 配置文件改动和说明
我们使用 `configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml`配置进行训练,配置文件摘要如下:
```bash
_BASE_: [
'../datasets/coco_detection.yml',
'../runtime.yml',
'./_base_/ppyolov2_r50vd_dcn.yml',
'./_base_/optimizer_365e.yml',
'./_base_/ppyolov2_reader.yml',
]
snapshot_epoch: 8
weights: output/ppyolov2_r50vd_dcn_365e_coco/model_final
```
从中可以看到 `ppyolov2_r50vd_dcn_365e_coco.yml` 配置需要依赖其他的配置文件,在该例子中需要依赖:
- coco_detection.yml:主要说明了训练数据和验证数据的路径
- runtime.yml:主要说明了公共的运行参数,比如是否使用GPU、每多少个epoch存储checkpoint等
- optimizer_365e.yml:主要说明了学习率和优化器的配置
- ppyolov2_r50vd_dcn.yml:主要说明模型和主干网络的情况
- ppyolov2_reader.yml:主要说明数据读取器配置,如batch size,并发加载子进程数等,同时包含读取后预处理操作,如resize、数据增强等等
根据实际情况,修改上述文件,比如数据集路径、batch size等。
<a name="训练"></a>
## 4. PaddleDetection训练
PaddleDetection提供了单卡/多卡训练模式,满足用户多种训练需求
* GPU 单卡训练
```bash
export CUDA_VISIBLE_DEVICES=0 #windows和Mac下不需要执行该命令
python tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml
```
* GPU多卡训练
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --eval
```
--eval:表示边训练边验证
* 模型恢复训练
在日常训练过程中,有的用户由于一些原因导致训练中断,用户可以使用-r的命令恢复训练:
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --eval -r output/ppyolov2_r50vd_dcn_365e_coco/10000
```
注意:如果遇到 "`Out of memory error`" 问题, 尝试在 `ppyolov2_reader.yml` 文件中调小`batch_size`
<a name="预测"></a>
## 5. PaddleDetection预测
设置参数,使用PaddleDetection预测:
```bash
export CUDA_VISIBLE_DEVICES=0
python tools/infer.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --infer_img=images/paper-image.jpg --output_dir=infer_output/ --draw_threshold=0.5 -o weights=output/ppyolov2_r50vd_dcn_365e_coco/model_final --use_vdl=Ture
```
`--draw_threshold` 是个可选参数. 根据 [NMS](https://ieeexplore.ieee.org/document/1699659) 的计算,不同阈值会产生不同的结果 `keep_top_k`表示设置输出目标的最大数量,默认值为100,用户可以根据自己的实际情况进行设定。
<a name="预测部署"></a>
## 6. 预测部署
在layout parser中使用自己训练好的模型。
<a name="模型导出"></a>
### 6.1 模型导出
在模型训练过程中保存的模型文件是包含前向预测和反向传播的过程,在实际的工业部署则不需要反向传播,因此需要将模型进行导成部署需要的模型格式。 在PaddleDetection中提供了 `tools/export_model.py`脚本来导出模型。
导出模型名称默认是`model.*`,layout parser代码模型名称是`inference.*`, 所以修改[PaddleDetection/ppdet/engine/trainer.py ](https://github.com/PaddlePaddle/PaddleDetection/blob/b87a1ea86fa18ce69e44a17ad1b49c1326f19ff9/ppdet/engine/trainer.py#L512) (点开链接查看详细代码行),将`model`改为`inference`即可。
执行导出模型脚本:
```bash
python tools/export_model.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --output_dir=./inference -o weights=output/ppyolov2_r50vd_dcn_365e_coco/model_final.pdparams
```
预测模型会导出到`inference/ppyolov2_r50vd_dcn_365e_coco`目录下,分别为`infer_cfg.yml`(预测不需要), `inference.pdiparams`, `inference.pdiparams.info`,`inference.pdmodel`
更多模型导出教程,请参考:[EXPORT_MODEL](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md)
<a name="layout parser预测"></a>
### 6.2 layout_parser预测
`model_path`指定训练好的模型路径,使用layout parser进行预测:
```bash
import layoutparser as lp
model = lp.PaddleDetectionLayoutModel(model_path="inference/ppyolov2_r50vd_dcn_365e_coco", threshold=0.5,label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"},enforce_cpu=True,enable_mkldnn=True)
```
***
更多PaddleDetection训练教程,请参考:[PaddleDetection训练](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/GETTING_STARTED_cn.md)
***
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import subprocess
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
import cv2
import numpy as np
import time
import logging
from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from ppocr.utils.logging import get_logger
from tools.infer.predict_system import TextSystem
from ppstructure.table.predict_table import TableSystem, to_excel
from ppstructure.utility import parse_args, draw_structure_result
logger = get_logger()
class OCRSystem(object):
def __init__(self, args):
import layoutparser as lp
# args.det_limit_type = 'resize_long'
args.drop_score = 0
if not args.show_log:
logger.setLevel(logging.INFO)
self.text_system = TextSystem(args)
self.table_system = TableSystem(args, self.text_system.text_detector, self.text_system.text_recognizer)
config_path = None
model_path = None
if os.path.isdir(args.layout_path_model):
model_path = args.layout_path_model
else:
config_path = args.layout_path_model
self.table_layout = lp.PaddleDetectionLayoutModel(config_path=config_path,
model_path=model_path,
threshold=0.5, enable_mkldnn=args.enable_mkldnn,
enforce_cpu=not args.use_gpu, thread_num=args.cpu_threads)
self.use_angle_cls = args.use_angle_cls
self.drop_score = args.drop_score
def __call__(self, img):
ori_im = img.copy()
layout_res = self.table_layout.detect(img[..., ::-1])
res_list = []
for region in layout_res:
x1, y1, x2, y2 = region.coordinates
x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
roi_img = ori_im[y1:y2, x1:x2, :]
if region.type == 'Table':
res = self.table_system(roi_img)
else:
filter_boxes, filter_rec_res = self.text_system(roi_img)
filter_boxes = [x + [x1, y1] for x in filter_boxes]
filter_boxes = [x.reshape(-1).tolist() for x in filter_boxes]
# remove style char
style_token = ['<strike>', '<strike>', '<sup>', '</sub>', '<b>', '</b>', '<sub>', '</sup>',
'<overline>', '</overline>', '<underline>', '</underline>', '<i>', '</i>']
filter_rec_res_tmp = []
for rec_res in filter_rec_res:
rec_str, rec_conf = rec_res
for token in style_token:
if token in rec_str:
rec_str = rec_str.replace(token, '')
filter_rec_res_tmp.append((rec_str, rec_conf))
res = (filter_boxes, filter_rec_res_tmp)
res_list.append({'type': region.type, 'bbox': [x1, y1, x2, y2], 'img': roi_img, 'res': res})
return res_list
def save_structure_res(res, save_folder, img_name):
excel_save_folder = os.path.join(save_folder, img_name)
os.makedirs(excel_save_folder, exist_ok=True)
# save res
with open(os.path.join(excel_save_folder, 'res.txt'), 'w', encoding='utf8') as f:
for region in res:
if region['type'] == 'Table':
excel_path = os.path.join(excel_save_folder, '{}.xlsx'.format(region['bbox']))
to_excel(region['res'], excel_path)
if region['type'] == 'Figure':
roi_img = region['img']
img_path = os.path.join(excel_save_folder, '{}.jpg'.format(region['bbox']))
cv2.imwrite(img_path, roi_img)
else:
for box, rec_res in zip(region['res'][0], region['res'][1]):
f.write('{}\t{}\n'.format(np.array(box).reshape(-1).tolist(), rec_res))
def main(args):
image_file_list = get_image_file_list(args.image_dir)
image_file_list = image_file_list
image_file_list = image_file_list[args.process_id::args.total_process_num]
save_folder = args.output
os.makedirs(save_folder, exist_ok=True)
structure_sys = OCRSystem(args)
img_num = len(image_file_list)
for i, image_file in enumerate(image_file_list):
logger.info("[{}/{}] {}".format(i, img_num, image_file))
img, flag = check_and_read_gif(image_file)
img_name = os.path.basename(image_file).split('.')[0]
if not flag:
img = cv2.imread(image_file)
if img is None:
logger.error("error in loading image:{}".format(image_file))
continue
starttime = time.time()
res = structure_sys(img)
save_structure_res(res, save_folder, img_name)
draw_img = draw_structure_result(img, res, args.vis_font_path)
cv2.imwrite(os.path.join(save_folder, img_name, 'show.jpg'), draw_img)
logger.info('result save to {}'.format(os.path.join(save_folder, img_name)))
elapse = time.time() - starttime
logger.info("Predict time : {:.3f}s".format(elapse))
if __name__ == "__main__":
args = parse_args()
if args.use_mp:
p_list = []
total_process_num = args.total_process_num
for process_id in range(total_process_num):
cmd = [sys.executable, "-u"] + sys.argv + [
"--process_id={}".format(process_id),
"--use_mp={}".format(False)
]
p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout)
p_list.append(p)
for p in p_list:
p.wait()
else:
main(args)
# Table Recognition
## 1. pipeline
The table recognition mainly contains three models
1. Single line text detection-DB
2. Single line text recognition-CRNN
3. Table structure and cell coordinate prediction-RARE
The table recognition flow chart is as follows
![tableocr_pipeline](../../doc/table/tableocr_pipeline_en.jpg)
1. The coordinates of single-line text is detected by DB model, and then sends it to the recognition model to get the recognition result.
2. The table structure and cell coordinates is predicted by RARE model.
3. The recognition result of the cell is combined by the coordinates, recognition result of the single line and the coordinates of the cell.
4. The cell recognition result and the table structure together construct the html string of the table.
## 2. Performance
We evaluated the algorithm on the PubTabNet<sup>[1]</sup> eval dataset, and the performance is as follows:
|Method|[TEDS(Tree-Edit-Distance-based Similarity)](https://github.com/ibm-aur-nlp/PubTabNet/tree/master/src)|
| --- | --- |
| EDD<sup>[2]</sup> | 88.3 |
| Ours | 93.32 |
## 3. How to use
### 3.1 quick start
```python
cd PaddleOCR/ppstructure
# download model
mkdir inference && cd inference
# Download the detection model of the ultra-lightweight table English OCR model and unzip it
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar
# Download the recognition model of the ultra-lightweight table English OCR model and unzip it
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar
# Download the ultra-lightweight English table inch model and unzip it
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
cd ..
# run
python3 table/predict_table.py --det_model_dir=inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=inference/en_ppocr_mobile_v2.0_table_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/table.jpg --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=ch --det_limit_side_len=736 --det_limit_type=min --output ../output/table
```
Note: The above model is trained on the PubLayNet dataset and only supports English scanning scenarios. If you need to identify other scenarios, you need to train the model yourself and replace the three fields `det_model_dir`, `rec_model_dir`, `table_model_dir`.
After running, the excel sheet of each picture will be saved in the directory specified by the output field
### 3.2 Train
In this chapter, we only introduce the training of the table structure model, For model training of [text detection](../../doc/doc_en/detection_en.md) and [text recognition](../../doc/doc_en/recognition_en.md), please refer to the corresponding documents
#### data preparation
The training data uses public data set [PubTabNet](https://arxiv.org/abs/1911.10683 ), Can be downloaded from the official [website](https://github.com/ibm-aur-nlp/PubTabNet) 。The PubTabNet data set contains about 500,000 images, as well as annotations in html format。
#### Start training
*If you are installing the cpu version of paddle, please modify the `use_gpu` field in the configuration file to false*
```shell
# single GPU training
python3 tools/train.py -c configs/table/table_mv3.yml
# multi-GPU training
# Set the GPU ID used by the '--gpus' parameter.
python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/table/table_mv3.yml
```
In the above instruction, use `-c` to select the training to use the `configs/table/table_mv3.yml` configuration file.
For a detailed explanation of the configuration file, please refer to [config](../../doc/doc_en/config_en.md).
#### load trained model and continue training
If you expect to load trained model and continue the training again, you can specify the parameter `Global.checkpoints` as the model path to be loaded.
```shell
python3 tools/train.py -c configs/table/table_mv3.yml -o Global.checkpoints=./your/trained/model
```
**Note**: The priority of `Global.checkpoints` is higher than that of `Global.pretrain_weights`, that is, when two parameters are specified at the same time, the model specified by `Global.checkpoints` will be loaded first. If the model path specified by `Global.checkpoints` is wrong, the one specified by `Global.pretrain_weights` will be loaded.
### 3.3 Eval
The table uses [TEDS(Tree-Edit-Distance-based Similarity)](https://github.com/ibm-aur-nlp/PubTabNet/tree/master/src) as the evaluation metric of the model. Before the model evaluation, the three models in the pipeline need to be exported as inference models (we have provided them), and the gt for evaluation needs to be prepared. Examples of gt are as follows:
```json
{"PMC4289340_004_00.png": [
["<html>", "<body>", "<table>", "<thead>", "<tr>", "<td>", "</td>", "<td>", "</td>", "<td>", "</td>", "</tr>", "</thead>", "<tbody>", "<tr>", "<td>", "</td>", "<td>", "</td>", "<td>", "</td>", "</tr>", "</tbody>", "</table>", "</body>", "</html>"],
[[1, 4, 29, 13], [137, 4, 161, 13], [215, 4, 236, 13], [1, 17, 30, 27], [137, 17, 147, 27], [215, 17, 225, 27]],
[["<b>", "F", "e", "a", "t", "u", "r", "e", "</b>"], ["<b>", "G", "b", "3", " ", "+", "</b>"], ["<b>", "G", "b", "3", " ", "-", "</b>"], ["<b>", "P", "a", "t", "i", "e", "n", "t", "s", "</b>"], ["6", "2"], ["4", "5"]]
]}
```
In gt json, the key is the image name, the value is the corresponding gt, and gt is a list composed of four items, and each item is
1. HTML string list of table structure
2. The coordinates of each cell (not including the empty text in the cell)
3. The text information in each cell (not including the empty text in the cell)
Use the following command to evaluate. After the evaluation is completed, the teds indicator will be output.
```python
cd PaddleOCR/ppstructure
python3 table/eval_table.py --det_model_dir=path/to/det_model_dir --rec_model_dir=path/to/rec_model_dir --table_model_dir=path/to/table_model_dir --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=EN --det_limit_side_len=736 --det_limit_type=min --gt_path=path/to/gt.json
```
If the PubLatNet eval dataset is used, it will be output
```bash
teds: 93.32
```
### 3.4 Inference
```python
cd PaddleOCR/ppstructure
python3 table/predict_table.py --det_model_dir=path/to/det_model_dir --rec_model_dir=path/to/rec_model_dir --table_model_dir=path/to/table_model_dir --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=EN --det_limit_side_len=736 --det_limit_type=min --output ../output/table
```
After running, the excel sheet of each picture will be saved in the directory specified by the output field
Reference
1. https://github.com/ibm-aur-nlp/PubTabNet
2. https://arxiv.org/pdf/1911.10683
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment