Merge branch 'dygraph' into dygraph

adc62fcd · topduke · GitHub · 8227ad1b · a81b88a0 · adc62fcd
Unverified Commit adc62fcd authored Aug 17, 2021 by topduke Committed by GitHub Aug 17, 2021
20 changed files
--- a/ppocr/modeling/necks/table_fpn.py
+++ b/ppocr/modeling/necks/table_fpn.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+class TableFPN(nn.Layer):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(TableFPN, self).__init__()
+        self.out_channels = 512
+        weight_attr = paddle.nn.initializer.KaimingUniform()
+        self.in2_conv = nn.Conv2D(
+            in_channels=in_channels[0],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.in3_conv = nn.Conv2D(
+            in_channels=in_channels[1],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            stride = 1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.in4_conv = nn.Conv2D(
+            in_channels=in_channels[2],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.in5_conv = nn.Conv2D(
+            in_channels=in_channels[3],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p5_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p4_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p3_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.p2_conv = nn.Conv2D(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.fuse_conv = nn.Conv2D(
+            in_channels=self.out_channels * 4,
+            out_channels=512,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(initializer=weight_attr), bias_attr=False)
+    def forward(self, x):
+        c2, c3, c4, c5 = x
+        in5 = self.in5_conv(c5)
+        in4 = self.in4_conv(c4)
+        in3 = self.in3_conv(c3)
+        in2 = self.in2_conv(c2)
+        out4 = in4 + F.upsample(
+            in5, size=in4.shape[2:4], mode="nearest", align_mode=1)  # 1/16
+        out3 = in3 + F.upsample(
+            out4, size=in3.shape[2:4], mode="nearest", align_mode=1)  # 1/8
+        out2 = in2 + F.upsample(
+            out3, size=in2.shape[2:4], mode="nearest", align_mode=1)  # 1/4
+        p4 = F.upsample(out4, size=in5.shape[2:4], mode="nearest", align_mode=1)
+        p3 = F.upsample(out3, size=in5.shape[2:4], mode="nearest", align_mode=1)
+        p2 = F.upsample(out2, size=in5.shape[2:4], mode="nearest", align_mode=1)
+        fuse = paddle.concat([in5, p4, p3, p2], axis=1)
+        fuse_conv = self.fuse_conv(fuse) * 0.005
+        return [c5 + fuse_conv]
--- a/ppocr/modeling/transforms/tps.py
+++ b/ppocr/modeling/transforms/tps.py
@@ -230,15 +230,8 @@ class GridGenerator(nn.Layer):
    def build_inv_delta_C_paddle(self, C):
        """ Return inv_delta_C which is needed to calculate T """
        F = self.F
-        hat_C = paddle.zeros((F, F), dtype='float64')  # F x F
+        hat_eye = paddle.eye(F, dtype='float64')  # F x F
-        for i in range(0, F):
+        hat_C = paddle.norm(C.reshape([1, F, 2]) - C.reshape([F, 1, 2]), axis=2) + hat_eye
-            for j in range(i, F):
-                if i == j:
-                    hat_C[i, j] = 1
-                else:
-                    r = paddle.norm(C[i] - C[j])
-                    hat_C[i, j] = r
-                    hat_C[j, i] = r
        hat_C = (hat_C**2) * paddle.log(hat_C)
        delta_C = paddle.concat(  # F+3 x F+3
            [

--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -21,10 +21,13 @@ import copy
 __all__ = ['build_post_process']
-from .db_postprocess import DBPostProcess
+from .db_postprocess import DBPostProcess, DistillationDBPostProcess
 from .east_postprocess import EASTPostProcess
 from .sast_postprocess import SASTPostProcess
-from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, NRTRLabelDecode
+from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, NRTRLabelDecode, \
+    TableLabelDecode
 from .cls_postprocess import ClsPostProcess
 from .pg_postprocess import PGPostProcess
@@ -32,7 +35,7 @@ def build_post_process(config, global_config=None):
    support_dict = [
        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode',
        'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess',
-        'DistillationCTCLabelDecode', 'NRTRLabelDecode'
+        'DistillationCTCLabelDecode', 'NRTRLabelDecode', 'TableLabelDecode', 'DistillationDBPostProcess'
    ]
    config = copy.deepcopy(config)

--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@@ -187,3 +187,29 @@ class DBPostProcess(object):
            boxes_batch.append({'points': boxes})
        return boxes_batch
+class DistillationDBPostProcess(object):
+    def __init__(self, model_name=["student"],
+                 key=None,
+                 thresh=0.3,
+                 box_thresh=0.6,
+                 max_candidates=1000,
+                 unclip_ratio=1.5,
+                 use_dilation=False,
+                 score_mode="fast",
+                 **kwargs):
+        self.model_name = model_name
+        self.key = key
+        self.post_process = DBPostProcess(thresh=thresh,
+                 box_thresh=box_thresh,
+                 max_candidates=max_candidates,
+                 unclip_ratio=unclip_ratio,
+                 use_dilation=use_dilation,
+                 score_mode=score_mode)
+    def __call__(self, predicts, shape_list):
+        results = {}
+        for k in self.model_name:
+            results[k] = self.post_process(predicts[k], shape_list=shape_list)
+        return results
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -44,16 +44,16 @@ class BaseRecLabelDecode(object):
            self.character_str = string.printable[:-6]
            dict_character = list(self.character_str)
        elif character_type in support_character_type:
-            self.character_str = ""
+            self.character_str = []
            assert character_dict_path is not None, "character_dict_path should not be None when character_type is {}".format(
                character_type)
            with open(character_dict_path, "rb") as fin:
                lines = fin.readlines()
                for line in lines:
                    line = line.decode('utf-8').strip("\n").strip("\r\n")
-                    self.character_str += line
+                    self.character_str.append(line)
            if use_space_char:
-                self.character_str += " "
+                self.character_str.append(" ")
            dict_character = list(self.character_str)
        else:
@@ -381,3 +381,138 @@ class SRNLabelDecode(BaseRecLabelDecode):
            assert False, "unsupport type %s in get_beg_end_flag_idx" \
                          % beg_or_end
        return idx
+class TableLabelDecode(object):
+    """  """
+    def __init__(self,
+                 character_dict_path,
+                 **kwargs):
+        list_character, list_elem = self.load_char_elem_dict(character_dict_path)
+        list_character = self.add_special_char(list_character)
+        list_elem = self.add_special_char(list_elem)
+        self.dict_character = {}
+        self.dict_idx_character = {}
+        for i, char in enumerate(list_character):
+            self.dict_idx_character[i] = char
+            self.dict_character[char] = i
+        self.dict_elem = {}
+        self.dict_idx_elem = {}
+        for i, elem in enumerate(list_elem):
+            self.dict_idx_elem[i] = elem
+            self.dict_elem[elem] = i
+    def load_char_elem_dict(self, character_dict_path):
+        list_character = []
+        list_elem = []
+        with open(character_dict_path, "rb") as fin:
+            lines = fin.readlines()
+            substr = lines[0].decode('utf-8').strip("\n").strip("\r\n").split("\t")
+            character_num = int(substr[0])
+            elem_num = int(substr[1])
+            for cno in range(1, 1 + character_num):
+                character = lines[cno].decode('utf-8').strip("\n").strip("\r\n")
+                list_character.append(character)
+            for eno in range(1 + character_num, 1 + character_num + elem_num):
+                elem = lines[eno].decode('utf-8').strip("\n").strip("\r\n")
+                list_elem.append(elem)
+        return list_character, list_elem
+    def add_special_char(self, list_character):
+        self.beg_str = "sos"
+        self.end_str = "eos"
+        list_character = [self.beg_str] + list_character + [self.end_str]
+        return list_character
+    def __call__(self, preds):
+        structure_probs = preds['structure_probs']
+        loc_preds = preds['loc_preds']
+        if isinstance(structure_probs,paddle.Tensor):
+            structure_probs = structure_probs.numpy()
+        if isinstance(loc_preds,paddle.Tensor):
+            loc_preds = loc_preds.numpy()
+        structure_idx = structure_probs.argmax(axis=2)
+        structure_probs = structure_probs.max(axis=2)
+        structure_str, structure_pos, result_score_list, result_elem_idx_list = self.decode(structure_idx,
+                                                                                            structure_probs, 'elem')
+        res_html_code_list = []
+        res_loc_list = []
+        batch_num = len(structure_str)
+        for bno in range(batch_num):
+            res_loc = []
+            for sno in range(len(structure_str[bno])):
+                text = structure_str[bno][sno]
+                if text in ['<td>', '<td']:
+                    pos = structure_pos[bno][sno]
+                    res_loc.append(loc_preds[bno, pos])
+            res_html_code = ''.join(structure_str[bno])
+            res_loc = np.array(res_loc)
+            res_html_code_list.append(res_html_code)
+            res_loc_list.append(res_loc)
+        return {'res_html_code': res_html_code_list, 'res_loc': res_loc_list, 'res_score_list': result_score_list,
+                'res_elem_idx_list': result_elem_idx_list,'structure_str_list':structure_str}
+    def decode(self, text_index, structure_probs, char_or_elem):
+        """convert text-label into text-index.
+        """
+        if char_or_elem == "char":
+            current_dict = self.dict_idx_character
+        else:
+            current_dict = self.dict_idx_elem
+            ignored_tokens = self.get_ignored_tokens('elem')
+            beg_idx, end_idx = ignored_tokens
+        result_list = []
+        result_pos_list = []
+        result_score_list = []
+        result_elem_idx_list = []
+        batch_size = len(text_index)
+        for batch_idx in range(batch_size):
+            char_list = []
+            elem_pos_list = []
+            elem_idx_list = []
+            score_list = []
+            for idx in range(len(text_index[batch_idx])):
+                tmp_elem_idx = int(text_index[batch_idx][idx])
+                if idx > 0 and tmp_elem_idx == end_idx:
+                    break
+                if tmp_elem_idx in ignored_tokens:
+                    continue
+                char_list.append(current_dict[tmp_elem_idx])
+                elem_pos_list.append(idx)
+                score_list.append(structure_probs[batch_idx, idx])
+                elem_idx_list.append(tmp_elem_idx)
+            result_list.append(char_list)
+            result_pos_list.append(elem_pos_list)
+            result_score_list.append(score_list)
+            result_elem_idx_list.append(elem_idx_list)
+        return result_list, result_pos_list, result_score_list, result_elem_idx_list
+    def get_ignored_tokens(self, char_or_elem):
+        beg_idx = self.get_beg_end_flag_idx("beg", char_or_elem)
+        end_idx = self.get_beg_end_flag_idx("end", char_or_elem)
+        return [beg_idx, end_idx]
+    def get_beg_end_flag_idx(self, beg_or_end, char_or_elem):
+        if char_or_elem == "char":
+            if beg_or_end == "beg":
+                idx = self.dict_character[self.beg_str]
+            elif beg_or_end == "end":
+                idx = self.dict_character[self.end_str]
+            else:
+                assert False, "Unsupport type %s in get_beg_end_flag_idx of char" \
+                              % beg_or_end
+        elif char_or_elem == "elem":
+            if beg_or_end == "beg":
+                idx = self.dict_elem[self.beg_str]
+            elif beg_or_end == "end":
+                idx = self.dict_elem[self.end_str]
+            else:
+                assert False, "Unsupport type %s in get_beg_end_flag_idx of elem" \
+                              % beg_or_end
+        else:
+            assert False, "Unsupport type %s in char_or_elem" \
+                          % char_or_elem
+        return idx
--- a/ppocr/utils/dict/table_dict.txt
+++ b/ppocr/utils/dict/table_dict.txt
+←
+</overline>
+☆
+─
+α
+⋅
+$
+ω
+ψ
+χ
+(
+υ
+≥
+σ
+,
+ρ
+ε
+0
+■
+4
+8
+✗
+b
+<
+✓
+Ψ
+Ω
+€
+D
+3
+Π
+H
+║
+</strike>
+L
+Φ
+Χ
+θ
+P
+κ
+λ
+μ
+T
+ξ
+X
+β
+γ
+δ
+\
+ζ
+η
+`
+d
+<strike>
+h
+f
+l
+Θ
+p
+√
+t
+</sub>
+x
+Β
+Γ
+Δ
+|
+ǂ
+ɛ
+j
+̧
+➢
+⁡
+̌
+′
+«
+△
+▲
+#
+</b>
+'
+Ι
+
+¶
+/
+▼
+⇑
+□
+·
+7
+▪
+;
+?
+➔
+∩
+C
+÷
+G
+⇒
+K
+<sup>
+O
+S
+С
+W
+Α
+[
+○
+_
+●
+‡
+c
+z
+g
+<i>
+o
+<sub>
+〈
+〉
+s
+⩽
+w
+φ
+ʹ
+{
+»
+∣
+̆
+e
+ˆ
+∈
+τ
+◆
+ι
+∅
+∆
+∙
+∘
+Ø
+ß
+✔
+∞
+∑
+−
+×
+◊
+∗
+∖
+˃
+˂
+∫
+"
+i
+&
+π
+↔
+*
+∥
+æ
+∧
+.
+⁄
+ø
+Q
+∼
+6
+⁎
+:
+★
+>
+a
+B
+≈
+F
+J
+̄
+N
+♯
+R
+V
+<overline>
+―
+Z
+♣
+^
+¤
+¥
+§
+<underline>
+¢
+£
+≦
+≤
+‖
+Λ
+©
+n
+↓
+→
+↑
+r
+°
+±
+v
+<b>
+♂
+k
+♀
+~
+ᅟ
+̇
+@
+”
+♦
+ł
+®
+⊕
+„
+!
+</sup>
+%
+⇓
+)
+-
+1
+5
+9
+=
+А
+A
+‰
+⋆
+Σ
+E
+◦
+I
+※
+M
+m
+̨
+⩾
+†
+</i>
+•
+U
+Y
+]
+̸
+2
+‐
+–
+‒
+̂
+—
+̀
+́
+’
+‘
+⋮
+⋯
+̊
+“
+̈
+≧
+q
+u
+ı
+y
+</underline>
+̃
+}
+ν
--- a/ppocr/utils/dict/table_structure_dict.txt
+++ b/ppocr/utils/dict/table_structure_dict.txt
+277	28	1267	1186
+<b>
+V
+a
+r
+i
+b
+l
+e
+</b>
+H
+z
+d
+t
+o
+9
+5
+%
+C
+I
+<i>
+p
+</i>
+v
+u
+*
+A
+g
+(
+m
+n
+)
+0
+.
+7
+1
+6
+≤
+>
+8
+3
+–
+2
+G
+4
+M
+F
+T
+y
+f
+s
+L
+w
+c
+U
+h
+D
+S
+Q
+R
+x
+P
+-
+E
+O
+/
+k
+,
+
+N
+K
+q
+′
+[
+]
+<
+≥
+<sup>
+−
+</sup>
+μ
+±
+J
+j
+W
+_
+Δ
+B
+“
+:
+Y
+α
+λ
+;
+<sub>
+</sub>
+?
+∼
+=
+°
+#
+̊
+̈
+̂
+’
+Z
+X
+∗
+—
+β
+'
+†
+~
+@
+"
+γ
+↓
+↑
+&
+‡
+χ
+”
+σ
+§
+|
+¶
+‐
+×
+$
+→
+√
+✓
+‘
+\
+∞
+π
+•
+®
+^
+∆
+≧
+<underline>
+</underline>
+́
+♀
+♂
+‒
+⁎
+▲
+·
+£
+φ
+Ψ
+ß
+△
+☆
+▪
+η
+€
+∧
+̃
+Φ
+ρ
+̄
+δ
+‰
+̧
+Ω
+♦
+{
+}
+̀
+∑
+∫
+ø
+κ
+ε
+¥
+※
+`
+ω
+Σ
+➔
+‖
+Β
+̸
+─
+●
+⩾
+Χ
+Α
+⋅
+◆
+★
+■
+ψ
+ǂ
+□
+ζ
+!
+Γ
+↔
+θ
+⁄
+〈
+〉
+―
+υ
+τ
+⋆
+Ø
+©
+∥
+С
+˂
+➢
+ɛ
+⁡
+✗
+←
+○
+¢
+⩽
+∖
+˃
+≈
+Π
+̌
+≦
+∅
+ᅟ
+<overline>
+</overline>
+∣
+¤
+♯
+̆
+ξ
+÷
+▼
+ι
+ν
+║
+<strike>
+</strike>
+◦
+◊
+∙
+«
+»
+ł
+ı
+Θ
+∈
+„
+∘
+✔
+̇
+æ
+ʹ
+ˆ
+♣
+⇓
+∩
+⊕
+⇒
+⇑
+̨
+Ι
+Λ
+⋯
+А
+⋮
+<thead>
+<tr>
+<td>
+</td>
+</tr>
+</thead>
+<tbody>
+</tbody>
+<td
+ colspan="5"
+>
+ colspan="2"
+ colspan="3"
+ rowspan="2"
+ colspan="4"
+ colspan="6"
+ rowspan="3"
+ colspan="9"
+ colspan="10"
+ colspan="7"
+ rowspan="4"
+ rowspan="5"
+ rowspan="9"
+ colspan="8"
+ rowspan="8"
+ rowspan="6"
+ rowspan="7"
+ rowspan="10"
+0	2924682
+1	3405345
+2	2363468
+3	2709165
+4	4078680
+5	3250792
+6	1923159
+7	1617890
+8	1450532
+9	1717624
+10	1477550
+11	1489223
+12	915528
+13	819193
+14	593660
+15	518924
+16	682065
+17	494584
+18	400591
+19	396421
+20	340994
+21	280688
+22	250328
+23	226786
+24	199927
+25	182707
+26	164629
+27	141613
+28	127554
+29	116286
+30	107682
+31	96367
+32	88002
+33	79234
+34	72186
+35	65921
+36	60374
+37	55976
+38	52166
+39	47414
+40	44932
+41	41279
+42	38232
+43	35463
+44	33703
+45	30557
+46	29639
+47	27000
+48	25447
+49	23186
+50	22093
+51	20412
+52	19844
+53	18261
+54	17561
+55	16499
+56	15597
+57	14558
+58	14372
+59	13445
+60	13514
+61	12058
+62	11145
+63	10767
+64	10370
+65	9630
+66	9337
+67	8881
+68	8727
+69	8060
+70	7994
+71	7740
+72	7189
+73	6729
+74	6749
+75	6548
+76	6321
+77	5957
+78	5740
+79	5407
+80	5370
+81	5035
+82	4921
+83	4656
+84	4600
+85	4519
+86	4277
+87	4023
+88	3939
+89	3910
+90	3861
+91	3560
+92	3483
+93	3406
+94	3346
+95	3229
+96	3122
+97	3086
+98	3001
+99	2884
+100	2822
+101	2677
+102	2670
+103	2610
+104	2452
+105	2446
+106	2400
+107	2300
+108	2316
+109	2196
+110	2089
+111	2083
+112	2041
+113	1881
+114	1838
+115	1896
+116	1795
+117	1786
+118	1743
+119	1765
+120	1750
+121	1683
+122	1563
+123	1499
+124	1513
+125	1462
+126	1388
+127	1441
+128	1417
+129	1392
+130	1306
+131	1321
+132	1274
+133	1294
+134	1240
+135	1126
+136	1157
+137	1130
+138	1084
+139	1130
+140	1083
+141	1040
+142	980
+143	1031
+144	974
+145	980
+146	932
+147	898
+148	960
+149	907
+150	852
+151	912
+152	859
+153	847
+154	876
+155	792
+156	791
+157	765
+158	788
+159	787
+160	744
+161	673
+162	683
+163	697
+164	666
+165	680
+166	632
+167	677
+168	657
+169	618
+170	587
+171	585
+172	567
+173	549
+174	562
+175	548
+176	542
+177	539
+178	542
+179	549
+180	547
+181	526
+182	525
+183	514
+184	512
+185	505
+186	515
+187	467
+188	475
+189	458
+190	435
+191	443
+192	427
+193	424
+194	404
+195	389
+196	429
+197	404
+198	386
+199	351
+200	388
+201	408
+202	361
+203	346
+204	324
+205	361
+206	363
+207	364
+208	323
+209	336
+210	342
+211	315
+212	325
+213	328
+214	314
+215	327
+216	320
+217	300
+218	295
+219	315
+220	310
+221	295
+222	275
+223	248
+224	274
+225	232
+226	293
+227	259
+228	286
+229	263
+230	242
+231	214
+232	261
+233	231
+234	211
+235	250
+236	233
+237	206
+238	224
+239	210
+240	233
+241	223
+242	216
+243	222
+244	207
+245	212
+246	196
+247	205
+248	201
+249	202
+250	211
+251	201
+252	215
+253	179
+254	163
+255	179
+256	191
+257	188
+258	196
+259	150
+260	154
+261	176
+262	211
+263	166
+264	171
+265	165
+266	149
+267	182
+268	159
+269	161
+270	164
+271	161
+272	141
+273	151
+274	127
+275	129
+276	142
+277	158
+278	148
+279	135
+280	127
+281	134
+282	138
+283	131
+284	126
+285	125
+286	130
+287	126
+288	135
+289	125
+290	135
+291	131
+292	95
+293	135
+294	106
+295	117
+296	136
+297	128
+298	128
+299	118
+300	109
+301	112
+302	117
+303	108
+304	120
+305	100
+306	95
+307	108
+308	112
+309	77
+310	120
+311	104
+312	109
+313	89
+314	98
+315	82
+316	98
+317	93
+318	77
+319	93
+320	77
+321	98
+322	93
+323	86
+324	89
+325	73
+326	70
+327	71
+328	77
+329	87
+330	77
+331	93
+332	100
+333	83
+334	72
+335	74
+336	69
+337	77
+338	68
+339	78
+340	90
+341	98
+342	75
+343	80
+344	63
+345	71
+346	83
+347	66
+348	71
+349	70
+350	62
+351	62
+352	59
+353	63
+354	62
+355	52
+356	64
+357	64
+358	56
+359	49
+360	57
+361	63
+362	60
+363	68
+364	62
+365	55
+366	54
+367	40
+368	75
+369	70
+370	53
+371	58
+372	57
+373	55
+374	69
+375	57
+376	53
+377	43
+378	45
+379	47
+380	56
+381	51
+382	59
+383	51
+384	43
+385	34
+386	57
+387	49
+388	39
+389	46
+390	48
+391	43
+392	40
+393	54
+394	50
+395	41
+396	43
+397	33
+398	27
+399	49
+400	44
+401	44
+402	38
+403	30
+404	32
+405	37
+406	39
+407	42
+408	53
+409	39
+410	34
+411	31
+412	32
+413	52
+414	27
+415	41
+416	34
+417	36
+418	50
+419	35
+420	32
+421	33
+422	45
+423	35
+424	40
+425	29
+426	41
+427	40
+428	39
+429	32
+430	31
+431	34
+432	29
+433	27
+434	26
+435	22
+436	34
+437	28
+438	30
+439	38
+440	35
+441	36
+442	36
+443	27
+444	24
+445	33
+446	31
+447	25
+448	33
+449	27
+450	32
+451	46
+452	31
+453	35
+454	35
+455	34
+456	26
+457	21
+458	25
+459	26
+460	24
+461	27
+462	33
+463	30
+464	35
+465	21
+466	32
+467	19
+468	27
+469	16
+470	28
+471	26
+472	27
+473	26
+474	25
+475	25
+476	27
+477	20
+478	28
+479	22
+480	23
+481	16
+482	25
+483	27
+484	19
+485	23
+486	19
+487	15
+488	15
+489	23
+490	24
+491	19
+492	20
+493	18
+494	17
+495	30
+496	28
+497	20
+498	29
+499	17
+500	19
+501	21
+502	15
+503	24
+504	15
+505	19
+506	25
+507	16
+508	23
+509	26
+510	21
+511	15
+512	12
+513	16
+514	18
+515	24
+516	26
+517	18
+518	8
+519	25
+520	14
+521	8
+522	24
+523	20
+524	18
+525	15
+526	13
+527	17
+528	18
+529	22
+530	21
+531	9
+532	16
+533	17
+534	13
+535	17
+536	15
+537	13
+538	20
+539	13
+540	19
+541	29
+542	10
+543	8
+544	18
+545	13
+546	9
+547	18
+548	10
+549	18
+550	18
+551	9
+552	9
+553	15
+554	13
+555	15
+556	14
+557	14
+558	18
+559	8
+560	13
+561	9
+562	7
+563	12
+564	6
+565	9
+566	9
+567	18
+568	9
+569	10
+570	13
+571	14
+572	13
+573	21
+574	8
+575	16
+576	12
+577	9
+578	16
+579	17
+580	22
+581	6
+582	14
+583	13
+584	15
+585	11
+586	13
+587	5
+588	12
+589	13
+590	15
+591	13
+592	15
+593	12
+594	7
+595	18
+596	12
+597	13
+598	13
+599	13
+600	12
+601	12
+602	10
+603	11
+604	6
+605	6
+606	2
+607	9
+608	8
+609	12
+610	9
+611	12
+612	13
+613	12
+614	14
+615	9
+616	8
+617	9
+618	14
+619	13
+620	12
+621	6
+622	8
+623	8
+624	8
+625	12
+626	8
+627	7
+628	5
+629	8
+630	12
+631	6
+632	10
+633	10
+634	7
+635	8
+636	9
+637	6
+638	9
+639	4
+640	12
+641	4
+642	3
+643	11
+644	10
+645	6
+646	12
+647	12
+648	4
+649	4
+650	9
+651	8
+652	6
+653	5
+654	14
+655	10
+656	11
+657	8
+658	5
+659	5
+660	9
+661	13
+662	4
+663	5
+664	9
+665	11
+666	12
+667	7
+668	13
+669	2
+670	1
+671	7
+672	7
+673	7
+674	10
+675	9
+676	6
+677	5
+678	7
+679	6
+680	3
+681	3
+682	4
+683	9
+684	8
+685	5
+686	3
+687	11
+688	9
+689	2
+690	6
+691	5
+692	9
+693	5
+694	6
+695	5
+696	9
+697	8
+698	3
+699	7
+700	5
+701	9
+702	8
+703	7
+704	2
+705	3
+706	7
+707	6
+708	6
+709	10
+710	2
+711	10
+712	6
+713	7
+714	5
+715	6
+716	4
+717	6
+718	8
+719	4
+720	6
+721	7
+722	5
+723	7
+724	3
+725	10
+726	10
+727	3
+728	7
+729	7
+730	5
+731	2
+732	1
+733	5
+734	1
+735	5
+736	6
+737	2
+738	2
+739	3
+740	7
+741	2
+742	7
+743	4
+744	5
+745	4
+746	5
+747	3
+748	1
+749	4
+750	4
+751	2
+752	4
+753	6
+754	6
+755	6
+756	3
+757	2
+758	5
+759	5
+760	3
+761	4
+762	2
+763	1
+764	8
+765	3
+766	4
+767	3
+768	1
+769	5
+770	3
+771	3
+772	4
+773	4
+774	1
+775	3
+776	2
+777	2
+778	3
+779	3
+780	1
+781	4
+782	3
+783	4
+784	6
+785	3
+786	5
+787	4
+788	2
+789	4
+790	5
+791	4
+792	6
+794	4
+795	1
+796	1
+797	4
+798	2
+799	3
+800	3
+801	1
+802	5
+803	5
+804	3
+805	3
+806	3
+807	4
+808	4
+809	2
+811	5
+812	4
+813	6
+814	3
+815	2
+816	2
+817	3
+818	5
+819	3
+820	1
+821	1
+822	4
+823	3
+824	4
+825	8
+826	3
+827	5
+828	5
+829	3
+830	6
+831	3
+832	4
+833	8
+834	5
+835	3
+836	3
+837	2
+838	4
+839	2
+840	1
+841	3
+842	2
+843	1
+844	3
+846	4
+847	4
+848	3
+849	3
+850	2
+851	3
+853	1
+854	4
+855	4
+856	2
+857	4
+858	1
+859	2
+860	5
+861	1
+862	1
+863	4
+864	2
+865	2
+867	5
+868	1
+869	4
+870	1
+871	1
+872	1
+873	2
+875	5
+876	3
+877	1
+878	3
+879	3
+880	3
+881	2
+882	1
+883	6
+884	2
+885	2
+886	1
+887	1
+888	3
+889	2
+890	2
+891	3
+892	1
+893	3
+894	1
+895	5
+896	1
+897	3
+899	2
+900	2
+902	1
+903	2
+904	4
+905	4
+906	3
+907	1
+908	1
+909	2
+910	5
+911	2
+912	3
+914	1
+915	1
+916	2
+918	2
+919	2
+920	4
+921	4
+922	1
+923	1
+924	4
+925	5
+926	1
+928	2
+929	1
+930	1
+931	1
+932	1
+933	1
+934	2
+935	1
+936	1
+937	1
+938	2
+939	1
+941	1
+942	4
+944	2
+945	2
+946	2
+947	1
+948	1
+950	1
+951	2
+953	1
+954	2
+955	1
+956	1
+957	2
+958	1
+960	3
+962	4
+963	1
+964	1
+965	3
+966	2
+967	2
+968	1
+969	3
+970	3
+972	1
+974	4
+975	3
+976	3
+977	2
+979	2
+980	1
+981	1
+983	5
+984	1
+985	3
+986	1
+987	2
+988	4
+989	2
+991	2
+992	2
+993	1
+994	1
+996	2
+997	2
+998	1
+999	3
+1000	2
+1001	1
+1002	3
+1003	3
+1004	2
+1005	3
+1006	1
+1007	2
+1009	1
+1011	1
+1013	3
+1014	1
+1016	2
+1017	1
+1018	1
+1019	1
+1020	4
+1021	1
+1022	2
+1025	1
+1026	1
+1027	2
+1028	1
+1030	1
+1031	2
+1032	4
+1034	3
+1035	2
+1036	1
+1038	1
+1039	1
+1040	1
+1041	1
+1042	2
+1043	1
+1044	2
+1045	4
+1048	1
+1050	1
+1051	1
+1052	2
+1054	1
+1055	3
+1056	2
+1057	1
+1059	1
+1061	2
+1063	1
+1064	1
+1065	1
+1066	1
+1067	1
+1068	1
+1069	2
+1074	1
+1075	1
+1077	1
+1078	1
+1079	1
+1082	1
+1085	1
+1088	1
+1090	1
+1091	1
+1092	2
+1094	2
+1097	2
+1098	1
+1099	2
+1101	2
+1102	1
+1104	1
+1105	1
+1107	1
+1109	1
+1111	2
+1112	1
+1114	2
+1115	2
+1116	2
+1117	1
+1118	1
+1119	1
+1120	1
+1122	1
+1123	1
+1127	1
+1128	3
+1132	2
+1138	3
+1142	1
+1145	4
+1150	1
+1153	2
+1154	1
+1158	1
+1159	1
+1163	1
+1165	1
+1169	2
+1174	1
+1176	1
+1177	1
+1178	2
+1179	1
+1180	2
+1181	1
+1182	1
+1183	2
+1185	1
+1187	1
+1191	2
+1193	1
+1195	3
+1196	1
+1201	3
+1203	1
+1206	1
+1210	1
+1213	1
+1214	1
+1215	2
+1218	1
+1220	1
+1221	1
+1225	1
+1226	1
+1233	2
+1241	1
+1243	1
+1249	1
+1250	2
+1251	1
+1254	1
+1255	2
+1260	1
+1268	1
+1270	1
+1273	1
+1274	1
+1277	1
+1284	1
+1287	1
+1291	1
+1292	2
+1294	1
+1295	2
+1297	1
+1298	1
+1301	1
+1307	1
+1308	3
+1311	2
+1313	1
+1316	1
+1321	1
+1324	1
+1325	1
+1330	1
+1333	1
+1334	1
+1338	2
+1340	1
+1341	1
+1342	1
+1343	1
+1345	1
+1355	1
+1357	1
+1360	2
+1375	1
+1376	1
+1380	1
+1383	1
+1387	1
+1389	1
+1393	1
+1394	1
+1396	1
+1398	1
+1410	1
+1414	1
+1419	1
+1425	1
+1434	1
+1435	1
+1438	1
+1439	1
+1447	1
+1455	2
+1460	1
+1461	1
+1463	1
+1466	1
+1470	1
+1473	1
+1478	1
+1480	1
+1483	1
+1484	1
+1485	2
+1492	2
+1499	1
+1509	1
+1512	1
+1513	1
+1523	1
+1524	1
+1525	2
+1529	1
+1539	1
+1544	1
+1568	1
+1584	1
+1591	1
+1598	1
+1600	1
+1604	1
+1614	1
+1617	1
+1621	1
+1622	1
+1626	1
+1638	1
+1648	1
+1658	1
+1661	1
+1679	1
+1682	1
+1693	1
+1700	1
+1705	1
+1707	1
+1722	1
+1728	1
+1758	1
+1762	1
+1763	1
+1775	1
+1776	1
+1801	1
+1810	1
+1812	1
+1827	1
+1834	1
+1846	1
+1847	1
+1848	1
+1851	1
+1862	1
+1866	1
+1877	2
+1884	1
+1888	1
+1903	1
+1912	1
+1925	1
+1938	1
+1955	1
+1998	1
+2054	1
+2058	1
+2065	1
+2069	1
+2076	1
+2089	1
+2104	1
+2111	1
+2133	1
+2138	1
+2156	1
+2204	1
+2212	1
+2237	1
+2246	2
+2298	1
+2304	1
+2360	1
+2400	1
+2481	1
+2544	1
+2586	1
+2622	1
+2666	1
+2682	1
+2725	1
+2920	1
+3997	1
+4019	1
+5211	1
+12	19
+14	1
+16	401
+18	2
+20	421
+22	557
+24	625
+26	50
+28	4481
+30	52
+32	550
+34	5840
+36	4644
+38	87
+40	5794
+41	33
+42	571
+44	11805
+46	4711
+47	7
+48	597
+49	12
+50	678
+51	2
+52	14715
+53	3
+54	7322
+55	3
+56	508
+57	39
+58	3486
+59	11
+60	8974
+61	45
+62	1276
+63	4
+64	15693
+65	15
+66	657
+67	13
+68	6409
+69	10
+70	3188
+71	25
+72	1889
+73	27
+74	10370
+75	9
+76	12432
+77	23
+78	520
+79	15
+80	1534
+81	29
+82	2944
+83	23
+84	12071
+85	36
+86	1502
+87	10
+88	10978
+89	11
+90	889
+91	16
+92	4571
+93	17
+94	7855
+95	21
+96	2271
+97	33
+98	1423
+99	15
+100	11096
+101	21
+102	4082
+103	13
+104	5442
+105	25
+106	2113
+107	26
+108	3779
+109	43
+110	1294
+111	29
+112	7860
+113	29
+114	4965
+115	22
+116	7898
+117	25
+118	1772
+119	28
+120	1149
+121	38
+122	1483
+123	32
+124	10572
+125	25
+126	1147
+127	31
+128	1699
+129	22
+130	5533
+131	22
+132	4669
+133	34
+134	3777
+135	10
+136	5412
+137	21
+138	855
+139	26
+140	2485
+141	46
+142	1970
+143	27
+144	6565
+145	40
+146	933
+147	15
+148	7923
+149	16
+150	735
+151	23
+152	1111
+153	33
+154	3714
+155	27
+156	2445
+157	30
+158	3367
+159	10
+160	4646
+161	27
+162	990
+163	23
+164	5679
+165	25
+166	2186
+167	17
+168	899
+169	32
+170	1034
+171	22
+172	6185
+173	32
+174	2685
+175	17
+176	1354
+177	38
+178	1460
+179	15
+180	3478
+181	20
+182	958
+183	20
+184	6055
+185	23
+186	2180
+187	15
+188	1416
+189	30
+190	1284
+191	22
+192	1341
+193	21
+194	2413
+195	18
+196	4984
+197	13
+198	830
+199	22
+200	1834
+201	19
+202	2238
+203	9
+204	3050
+205	22
+206	616
+207	17
+208	2892
+209	22
+210	711
+211	30
+212	2631
+213	19
+214	3341
+215	21
+216	987
+217	26
+218	823
+219	9
+220	3588
+221	20
+222	692
+223	7
+224	2925
+225	31
+226	1075
+227	16
+228	2909
+229	18
+230	673
+231	20
+232	2215
+233	14
+234	1584
+235	21
+236	1292
+237	29
+238	1647
+239	25
+240	1014
+241	30
+242	1648
+243	19
+244	4465
+245	10
+246	787
+247	11
+248	480
+249	25
+250	842
+251	15
+252	1219
+253	23
+254	1508
+255	8
+256	3525
+257	16
+258	490
+259	12
+260	1678
+261	14
+262	822
+263	16
+264	1729
+265	28
+266	604
+267	11
+268	2572
+269	7
+270	1242
+271	15
+272	725
+273	18
+274	1983
+275	13
+276	1662
+277	19
+278	491
+279	12
+280	1586
+281	14
+282	563
+283	10
+284	2363
+285	10
+286	656
+287	14
+288	725
+289	28
+290	871
+291	9
+292	2606
+293	12
+294	961
+295	9
+296	478
+297	13
+298	1252
+299	10
+300	736
+301	19
+302	466
+303	13
+304	2254
+305	12
+306	486
+307	14
+308	1145
+309	13
+310	955
+311	13
+312	1235
+313	13
+314	931
+315	14
+316	1768
+317	11
+318	330
+319	10
+320	539
+321	23
+322	570
+323	12
+324	1789
+325	13
+326	884
+327	5
+328	1422
+329	14
+330	317
+331	11
+332	509
+333	13
+334	1062
+335	12
+336	577
+337	27
+338	378
+339	10
+340	2313
+341	9
+342	391
+343	13
+344	894
+345	17
+346	664
+347	9
+348	453
+349	6
+350	363
+351	15
+352	1115
+353	13
+354	1054
+355	8
+356	1108
+357	12
+358	354
+359	7
+360	363
+361	16
+362	344
+363	11
+364	1734
+365	12
+366	265
+367	10
+368	969
+369	16
+370	316
+371	12
+372	757
+373	7
+374	563
+375	15
+376	857
+377	9
+378	469
+379	9
+380	385
+381	12
+382	921
+383	15
+384	764
+385	14
+386	246
+387	6
+388	1108
+389	14
+390	230
+391	8
+392	266
+393	11
+394	641
+395	8
+396	719
+397	9
+398	243
+399	4
+400	1108
+401	7
+402	229
+403	7
+404	903
+405	7
+406	257
+407	12
+408	244
+409	3
+410	541
+411	6
+412	744
+413	8
+414	419
+415	8
+416	388
+417	19
+418	470
+419	14
+420	612
+421	6
+422	342
+423	3
+424	1179
+425	3
+426	116
+427	14
+428	207
+429	6
+430	255
+431	4
+432	288
+433	12
+434	343
+435	6
+436	1015
+437	3
+438	538
+439	10
+440	194
+441	6
+442	188
+443	15
+444	524
+445	7
+446	214
+447	7
+448	574
+449	6
+450	214
+451	5
+452	635
+453	9
+454	464
+455	5
+456	205
+457	9
+458	163
+459	2
+460	558
+461	4
+462	171
+463	14
+464	444
+465	11
+466	543
+467	5
+468	388
+469	6
+470	141
+471	4
+472	647
+473	3
+474	210
+475	4
+476	193
+477	7
+478	195
+479	7
+480	443
+481	10
+482	198
+483	3
+484	816
+485	6
+486	128
+487	9
+488	215
+489	9
+490	328
+491	7
+492	158
+493	11
+494	335
+495	8
+496	435
+497	6
+498	174
+499	1
+500	373
+501	5
+502	140
+503	7
+504	330
+505	9
+506	149
+507	5
+508	642
+509	3
+510	179
+511	3
+512	159
+513	8
+514	204
+515	7
+516	306
+517	4
+518	110
+519	5
+520	326
+521	6
+522	305
+523	6
+524	294
+525	7
+526	268
+527	5
+528	149
+529	4
+530	133
+531	2
+532	513
+533	10
+534	116
+535	5
+536	258
+537	4
+538	113
+539	4
+540	138
+541	6
+542	116
+544	485
+545	4
+546	93
+547	9
+548	299
+549	3
+550	256
+551	6
+552	92
+553	3
+554	175
+555	6
+556	253
+557	7
+558	95
+559	2
+560	128
+561	4
+562	206
+563	2
+564	465
+565	3
+566	69
+567	3
+568	157
+569	7
+570	97
+571	8
+572	118
+573	5
+574	130
+575	4
+576	301
+577	6
+578	177
+579	2
+580	397
+581	3
+582	80
+583	1
+584	128
+585	5
+586	52
+587	2
+588	72
+589	1
+590	84
+591	6
+592	323
+593	11
+594	77
+595	5
+596	205
+597	1
+598	244
+599	4
+600	69
+601	3
+602	89
+603	5
+604	254
+605	6
+606	147
+607	3
+608	83
+609	3
+610	77
+611	3
+612	194
+613	1
+614	98
+615	3
+616	243
+617	3
+618	50
+619	8
+620	188
+621	4
+622	67
+623	4
+624	123
+625	2
+626	50
+627	1
+628	239
+629	2
+630	51
+631	4
+632	65
+633	5
+634	188
+636	81
+637	3
+638	46
+639	3
+640	103
+641	1
+642	136
+643	3
+644	188
+645	3
+646	58
+648	122
+649	4
+650	47
+651	2
+652	155
+653	4
+654	71
+655	1
+656	71
+657	3
+658	50
+659	2
+660	177
+661	5
+662	66
+663	2
+664	183
+665	3
+666	50
+667	2
+668	53
+669	2
+670	115
+672	66
+673	2
+674	47
+675	1
+676	197
+677	2
+678	46
+679	3
+680	95
+681	3
+682	46
+683	3
+684	107
+685	1
+686	86
+687	2
+688	158
+689	4
+690	51
+691	1
+692	80
+694	56
+695	4
+696	40
+698	43
+699	3
+700	95
+701	2
+702	51
+703	2
+704	133
+705	1
+706	100
+707	2
+708	121
+709	2
+710	15
+711	3
+712	35
+713	2
+714	20
+715	3
+716	37
+717	2
+718	78
+720	55
+721	1
+722	42
+723	2
+724	218
+725	3
+726	23
+727	2
+728	26
+729	1
+730	64
+731	2
+732	65
+734	24
+735	2
+736	53
+737	1
+738	32
+739	1
+740	60
+742	81
+743	1
+744	77
+745	1
+746	47
+747	1
+748	62
+749	1
+750	19
+751	1
+752	86
+753	3
+754	40
+756	55
+757	2
+758	38
+759	1
+760	101
+761	1
+762	22
+764	67
+765	2
+766	35
+767	1
+768	38
+769	1
+770	22
+771	1
+772	82
+773	1
+774	73
+776	29
+777	1
+778	55
+780	23
+781	1
+782	16
+784	84
+785	3
+786	28
+788	59
+789	1
+790	33
+791	3
+792	24
+794	13
+795	1
+796	110
+797	2
+798	15
+800	22
+801	3
+802	29
+803	1
+804	87
+806	21
+808	29
+810	48
+812	28
+813	1
+814	58
+815	1
+816	48
+817	1
+818	31
+819	1
+820	66
+822	17
+823	2
+824	58
+826	10
+827	2
+828	25
+829	1
+830	29
+831	1
+832	63
+833	1
+834	26
+835	3
+836	52
+837	1
+838	18
+840	27
+841	2
+842	12
+843	1
+844	83
+845	1
+846	7
+847	1
+848	10
+850	26
+852	25
+853	1
+854	15
+856	27
+858	32
+859	1
+860	15
+862	43
+864	32
+865	1
+866	6
+868	39
+870	11
+872	25
+873	1
+874	10
+875	1
+876	20
+877	2
+878	19
+879	1
+880	30
+882	11
+884	53
+886	25
+887	1
+888	28
+890	6
+892	36
+894	10
+896	13
+898	14
+900	31
+902	14
+903	2
+904	43
+906	25
+908	9
+910	11
+911	1
+912	16
+913	1
+914	24
+916	27
+918	6
+920	15
+922	27
+923	1
+924	23
+926	13
+928	42
+929	1
+930	3
+932	27
+934	17
+936	8
+937	1
+938	11
+940	33
+942	4
+943	1
+944	18
+946	15
+948	13
+950	18
+952	12
+954	11
+956	21
+958	10
+960	13
+962	5
+964	32
+966	13
+968	8
+970	8
+971	1
+972	23
+973	2
+974	12
+975	1
+976	22
+978	7
+979	1
+980	14
+982	8
+984	22
+985	1
+986	6
+988	17
+989	1
+990	6
+992	13
+994	19
+996	11
+998	4
+1000	9
+1002	2
+1004	14
+1006	5
+1008	3
+1010	9
+1012	29
+1014	6
+1016	22
+1017	1
+1018	8
+1019	1
+1020	7
+1022	6
+1023	1
+1024	10
+1026	2
+1028	8
+1030	11
+1031	2
+1032	8
+1034	9
+1036	13
+1038	12
+1040	12
+1042	3
+1044	12
+1046	3
+1048	11
+1050	2
+1051	1
+1052	2
+1054	11
+1056	6
+1058	8
+1059	1
+1060	23
+1062	6
+1063	1
+1064	8
+1066	3
+1068	6
+1070	8
+1071	1
+1072	5
+1074	3
+1076	5
+1078	3
+1080	11
+1081	1
+1082	7
+1084	18
+1086	4
+1087	1
+1088	3
+1090	3
+1092	7
+1094	3
+1096	12
+1098	6
+1099	1
+1100	2
+1102	6
+1104	14
+1106	3
+1108	6
+1110	5
+1112	2
+1114	8
+1116	3
+1118	3
+1120	7
+1122	10
+1124	6
+1126	8
+1128	1
+1130	4
+1132	3
+1134	2
+1136	5
+1138	5
+1140	8
+1142	3
+1144	7
+1146	3
+1148	11
+1150	1
+1152	5
+1154	1
+1156	5
+1158	1
+1160	5
+1162	3
+1164	6
+1165	1
+1166	1
+1168	4
+1169	1
+1170	3
+1171	1
+1172	2
+1174	5
+1176	3
+1177	1
+1180	8
+1182	2
+1184	4
+1186	2
+1188	3
+1190	2
+1192	5
+1194	6
+1196	1
+1198	2
+1200	2
+1204	10
+1206	2
+1208	9
+1210	1
+1214	6
+1216	3
+1218	4
+1220	9
+1221	2
+1222	1
+1224	5
+1226	4
+1228	8
+1230	1
+1232	1
+1234	3
+1236	5
+1240	3
+1242	1
+1244	3
+1245	1
+1246	4
+1248	6
+1250	2
+1252	7
+1256	3
+1258	2
+1260	2
+1262	3
+1264	4
+1265	1
+1266	1
+1270	1
+1271	1
+1272	2
+1274	3
+1276	3
+1278	1
+1280	3
+1284	1
+1286	1
+1290	1
+1292	3
+1294	1
+1296	7
+1300	2
+1302	4
+1304	3
+1306	2
+1308	2
+1312	1
+1314	1
+1316	3
+1318	2
+1320	1
+1324	8
+1326	1
+1330	1
+1331	1
+1336	2
+1338	1
+1340	3
+1341	1
+1344	1
+1346	2
+1347	1
+1348	3
+1352	1
+1354	2
+1356	1
+1358	1
+1360	3
+1362	1
+1364	4
+1366	1
+1370	1
+1372	3
+1380	2
+1384	2
+1388	2
+1390	2
+1392	2
+1394	1
+1396	1
+1398	1
+1400	2
+1402	1
+1404	1
+1406	1
+1410	1
+1412	5
+1418	1
+1420	1
+1424	1
+1432	2
+1434	2
+1442	3
+1444	5
+1448	1
+1454	1
+1456	1
+1460	3
+1462	4
+1468	1
+1474	1
+1476	1
+1478	2
+1480	1
+1486	2
+1488	1
+1492	1
+1496	1
+1500	3
+1503	1
+1506	1
+1512	2
+1516	1
+1522	1
+1524	2
+1534	4
+1536	1
+1538	1
+1540	2
+1544	2
+1548	1
+1556	1
+1560	1
+1562	1
+1564	2
+1566	1
+1568	1
+1570	1
+1572	1
+1576	1
+1590	1
+1594	1
+1604	1
+1608	1
+1614	1
+1622	1
+1624	2
+1628	1
+1629	1
+1636	1
+1642	1
+1654	2
+1660	1
+1664	1
+1670	1
+1684	4
+1698	1
+1732	3
+1742	1
+1752	1
+1760	1
+1764	1
+1772	2
+1798	1
+1808	1
+1820	1
+1852	1
+1856	1
+1874	1
+1902	1
+1908	1
+1952	1
+2004	1
+2018	1
+2020	1
+2028	1
+2174	1
+2233	1
+2244	1
+2280	1
+2290	1
+2352	1
+2604	1
+4190	1
--- a/ppocr/utils/gen_label.py
+++ b/ppocr/utils/gen_label.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
+# you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
+# Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
+# See the License for the specific language governing permissions and
-#limitations under the License.
+# limitations under the License.
 import os
 import argparse
 import json
@@ -31,7 +31,9 @@ def gen_det_label(root_path, input_dir, out_label):
        for label_file in os.listdir(input_dir):
            img_path = root_path + label_file[3:-4] + ".jpg"
            label = []
-            with open(os.path.join(input_dir, label_file), 'r') as f:
+            with open(
+                    os.path.join(input_dir, label_file), 'r',
+                    encoding='utf-8-sig') as f:
                for line in f.readlines():
                    tmp = line.strip("\n\r").replace("\xef\xbb\xbf",
                                                     "").split(',')

--- a/ppocr/utils/logging.py
+++ b/ppocr/utils/logging.py
@@ -22,7 +22,7 @@ logger_initialized = {}
 @functools.lru_cache()
-def get_logger(name='root', log_file=None, log_level=logging.INFO):
+def get_logger(name='root', log_file=None, log_level=logging.DEBUG):
    """Initialize and get a logger by name.
    If the logger has not been initialized, this method will initialize the
    logger by adding one or two handlers, otherwise the initialized logger will

--- a/ppocr/utils/network.py
+++ b/ppocr/utils/network.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+import tarfile
+import requests
+from tqdm import tqdm
+from ppocr.utils.logging import get_logger
+def download_with_progressbar(url, save_path):
+    logger = get_logger()
+    response = requests.get(url, stream=True)
+    total_size_in_bytes = int(response.headers.get('content-length', 0))
+    block_size = 1024  # 1 Kibibyte
+    progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
+    with open(save_path, 'wb') as file:
+        for data in response.iter_content(block_size):
+            progress_bar.update(len(data))
+            file.write(data)
+    progress_bar.close()
+    if total_size_in_bytes == 0 or progress_bar.n != total_size_in_bytes:
+        logger.error("Something went wrong while downloading models")
+        sys.exit(0)
+def maybe_download(model_storage_directory, url):
+    # using custom model
+    tar_file_name_list = [
+        'inference.pdiparams', 'inference.pdiparams.info', 'inference.pdmodel'
+    ]
+    if not os.path.exists(
+            os.path.join(model_storage_directory, 'inference.pdiparams')
+    ) or not os.path.exists(
+        os.path.join(model_storage_directory, 'inference.pdmodel')):
+        assert url.endswith('.tar'), 'Only supports tar compressed package'
+        tmp_path = os.path.join(model_storage_directory, url.split('/')[-1])
+        print('download {} to {}'.format(url, tmp_path))
+        os.makedirs(model_storage_directory, exist_ok=True)
+        download_with_progressbar(url, tmp_path)
+        with tarfile.open(tmp_path, 'r') as tarObj:
+            for member in tarObj.getmembers():
+                filename = None
+                for tar_file_name in tar_file_name_list:
+                    if tar_file_name in member.name:
+                        filename = tar_file_name
+                if filename is None:
+                    continue
+                file = tarObj.extractfile(member)
+                with open(
+                        os.path.join(model_storage_directory, filename),
+                        'wb') as f:
+                    f.write(file.read())
+        os.remove(tmp_path)
+def is_link(s):
+    return s is not None and s.startswith('http')
+def confirm_model_dir_url(model_dir, default_model_dir, default_url):
+    url = default_url
+    if model_dir is None or is_link(model_dir):
+        if is_link(model_dir):
+            url = model_dir
+        file_name = url.split('/')[-1][:-4]
+        model_dir = default_model_dir
+        model_dir = os.path.join(model_dir, file_name)
+    return model_dir, url
--- a/ppocr/utils/save_load.py
+++ b/ppocr/utils/save_load.py
@@ -25,7 +25,7 @@ import paddle
 from ppocr.utils.logging import get_logger
-__all__ = ['init_model', 'save_model', 'load_dygraph_pretrain']
+__all__ = ['init_model', 'save_model', 'load_dygraph_params']
 def _mkdir_if_not_exist(path, logger):
@@ -89,6 +89,55 @@ def init_model(config, model, optimizer=None, lr_scheduler=None):
    return best_model_dict
+def load_dygraph_params(config, model, logger, optimizer):
+    ckp = config['Global']['checkpoints']
+    if ckp and os.path.exists(ckp + ".pdparams"):
+        pre_best_model_dict = init_model(config, model, optimizer)
+        return pre_best_model_dict
+    else:
+        pm = config['Global']['pretrained_model']
+        if pm is None:
+            return {}
+        if not os.path.exists(pm) and not os.path.exists(pm + ".pdparams"):
+            logger.info(f"The pretrained_model {pm} does not exists!")
+            return {}
+        pm = pm if pm.endswith('.pdparams') else pm + '.pdparams'
+        params = paddle.load(pm)
+        state_dict = model.state_dict()
+        new_state_dict = {}
+        for k1, k2 in zip(state_dict.keys(), params.keys()):
+            if list(state_dict[k1].shape) == list(params[k2].shape):
+                new_state_dict[k1] = params[k2]
+        else:
+            logger.info(
+                f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !"
+            )
+        model.set_state_dict(new_state_dict)
+        logger.info(f"loaded pretrained_model successful from {pm}")
+        return {}
+def load_pretrained_params(model, path):
+    if path is None:
+        return False
+    if not os.path.exists(path) and not os.path.exists(path + ".pdparams"):
+        print(f"The pretrained_model {path} does not exists!")
+        return False
+    path = path if path.endswith('.pdparams') else path + '.pdparams'
+    params = paddle.load(path)
+    state_dict = model.state_dict()
+    new_state_dict = {}
+    for k1, k2 in zip(state_dict.keys(), params.keys()):
+        if list(state_dict[k1].shape) == list(params[k2].shape):
+            new_state_dict[k1] = params[k2]
+        else:
+            print(
+                f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !"
+            )
+    model.set_state_dict(new_state_dict)
+    print(f"load pretrain successful from {path}")
+    return model
 def save_model(model,
               optimizer,
               model_path,

--- a/ppstructure/README.md
+++ b/ppstructure/README.md
+English | [简体中文](README_ch.md)
+# PP-Structure
+PP-Structure is an OCR toolkit that can be used for complex documents analysis. The main features are as follows:
+- Support the layout analysis of documents, divide the documents into 5 types of areas **text, title, table, image and list** (conjunction with Layout-Parser)
+- Support to extract the texts from the text, title, picture and list areas (used in conjunction with PP-OCR)
+- Support to extract excel files from the table areas
+- Support python whl package and command line usage, easy to use
+- Support custom training for layout analysis and table structure tasks
+## 1. Visualization
+<img src="../doc/table/ppstructure.GIF" width="100%"/>
+## 2. Installation
+### 2.1 Install requirements
+- **（1) Install PaddlePaddle**
+```bash
+pip3 install --upgrade pip
+# GPU
+python3 -m pip install paddlepaddle-gpu==2.1.1 -i https://mirror.baidu.com/pypi/simple
+# CPU
+ python3 -m pip install paddlepaddle==2.1.1 -i https://mirror.baidu.com/pypi/simple
+# For more，refer[Installation](https://www.paddlepaddle.org.cn/install/quick)。
+```
+- **(2) Install Layout-Parser**
+```bash
+pip3 install -U premailer paddleocr https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
+```
+### 2.2 Install PaddleOCR（including PP-OCR and PP-Structure）
+- **（1) PIP install PaddleOCR whl package（inference only）**
+```bash
+pip install "paddleocr>=2.2"
+```
+- **（2) Clone PaddleOCR（Inference+training）**
+```bash
+git clone https://github.com/PaddlePaddle/PaddleOCR
+```
+## 3. Quick Start
+### 3.1 Use by command line
+```bash
+paddleocr --image_dir=../doc/table/1.png --type=structure
+```
+### 3.2 Use by python API
+```python
+import os
+import cv2
+from paddleocr import PPStructure,draw_structure_result,save_structure_res
+table_engine = PPStructure(show_log=True)
+save_folder = './output/table'
+img_path = '../doc/table/1.png'
+img = cv2.imread(img_path)
+result = table_engine(img)
+save_structure_res(result, save_folder,os.path.basename(img_path).split('.')[0])
+for line in result:
+    line.pop('img')
+    print(line)
+from PIL import Image
+font_path = '../doc/fonts/simfang.ttf'
+image = Image.open(img_path).convert('RGB')
+im_show = draw_structure_result(image, result,font_path=font_path)
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+### 3.3 Returned results format
+The returned results of PP-Structure is a list composed of a dict, an example is as follows
+```shell
+[
+  {   'type': 'Text',
+      'bbox': [34, 432, 345, 462],
+      'res': ([[36.0, 437.0, 341.0, 437.0, 341.0, 446.0, 36.0, 447.0], [41.0, 454.0, 125.0, 453.0, 125.0, 459.0, 41.0, 460.0]],
+                [('Tigure-6. The performance of CNN and IPT models using difforen', 0.90060663), ('Tent  ', 0.465441)])
+  }
+]
+```
+The description of each field in dict is as follows
+| Parameter            | Description           |
+| --------------- | -------------|
+|type|Type of image area|
+|bbox|The coordinates of the image area in the original image, respectively [left upper x, left upper y, right bottom x, right bottom y]|
+|res|OCR or table recognition result of image area。<br> Table: HTML string of the table; <br> OCR: A tuple containing the detection coordinates and recognition results of each single line of text|
+### 3.4 Parameter description：
+| Parameter            | Description                                     | Default value                                        |
+| --------------- | ---------------------------------------- | ------------------------------------------- |
+| output          | The path where excel and recognition results are saved                | ./output/table                              |
+| table_max_len   | The long side of the image is resized in table structure model  | 488                                         |
+| table_model_dir | inference model path of table structure model          | None                                        |
+| table_char_type | dict path of table structure model                 | ../ppocr/utils/dict/table_structure_dict.tx |
+Most of the parameters are consistent with the paddleocr whl package, see [doc of whl](../doc/doc_en/whl_en.md)
+After running, each image will have a directory with the same name under the directory specified in the output field. Each table in the picture will be stored as an excel and figure area will be cropped and saved, the excel and image file name will be the coordinates of the table in the image.
+## 4. PP-Structure Pipeline
+the process is as follows
+![pipeline](../doc/table/pipeline_en.jpg)
+In PP-Structure, the image will be analyzed by layoutparser first. In the layout analysis, the area in the image will be classified, including **text, title, image, list and table** 5 categories. For the first 4 types of areas, directly use the PP-OCR to complete the text detection and recognition. The table area will  be converted to an excel file of the same table style via Table OCR.
+### 4.1 LayoutParser
+Layout analysis divides the document data into regions, including the use of Python scripts for layout analysis tools, extraction of special category detection boxes, performance indicators, and custom training layout analysis models. For details, please refer to [document](layout/README_en.md).
+### 4.2 Table Recognition
+Table Recognition converts table image into excel documents, which include the detection and recognition of table text and the prediction of table structure and cell coordinates. For detailed, please refer to [document](table/README.md)
+## 5. Prediction by inference engine
+Use the following commands to complete the inference.
+```python
+cd PaddleOCR/ppstructure
+# download model
+mkdir inference && cd inference
+# Download the detection model of the ultra-lightweight Chinese OCR model and uncompress it
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar
+# Download the recognition model of the ultra-lightweight Chinese OCR model and uncompress it
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
+# Download the table structure model of the ultra-lightweight Chinese OCR model and uncompress it
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
+cd ..
+python3 predict_system.py --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=ch --output=../output/table --vis_font_path=../doc/fonts/simfang.ttf
+```
+After running, each image will have a directory with the same name under the directory specified in the output field. Each table in the picture will be stored as an excel and figure area will be cropped and saved, the excel and image file name will be the coordinates of the table in the image.
+**Model List**
+|model name|description|config|model size|download|
+| --- | --- | --- | --- | --- |
+|en_ppocr_mobile_v2.0_table_structure|Table structure prediction for English table scenarios|[table_mv3.yml](../configs/table/table_mv3.yml)|18.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) |
+**Model List**
+LayoutParser model
+|model name|description|download|
+| --- | --- | --- |
+| ppyolov2_r50vd_dcn_365e_publaynet | The layout analysis model trained on the PubLayNet data set can be divided into 5 types of areas **text, title, table, picture and list** | [PubLayNet](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) |
+| ppyolov2_r50vd_dcn_365e_tableBank_word | The layout analysis model trained on the TableBank Word dataset can only detect tables | [TableBank Word](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_word.tar) |
+| ppyolov2_r50vd_dcn_365e_tableBank_latex | The layout analysis model trained on the TableBank Latex dataset can only detect tables | [TableBank Latex](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_latex.tar) |
+OCR and table recognition model
+|model name|description|model size|download|
+| --- | --- | --- | --- |
+|ch_ppocr_mobile_slim_v2.0_det|Slim pruned lightweight model, supporting Chinese, English, multilingual text detection|2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar) |
+|ch_ppocr_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting Chinese, English and number recognition|6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) |
+|en_ppocr_mobile_v2.0_table_det|Text detection of English table scenes trained on PubLayNet dataset|4.7M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) |
+|en_ppocr_mobile_v2.0_table_rec|Text recognition of English table scene trained on PubLayNet dataset|6.9M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) |
+|en_ppocr_mobile_v2.0_table_structure|Table structure prediction of English table scene trained on PubLayNet dataset|18.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) |
+If you need to use other models, you can download the model in [model_list](../doc/doc_en/models_list_en.md) or use your own trained model to configure it to the three fields of `det_model_dir`, `rec_model_dir`, `table_model_dir` .
--- a/ppstructure/README_ch.md
+++ b/ppstructure/README_ch.md
+[English](README.md) | 简体中文
+# PP-Structure
+PP-Structure是一个可用于复杂文档结构分析和处理的OCR工具包，主要特性如下：
+- 支持对图片形式的文档进行版面分析，可以划分**文字、标题、表格、图片以及列表**5类区域（与Layout-Parser联合使用）
+- 支持文字、标题、图片以及列表区域提取为文字字段（与PP-OCR联合使用）
+- 支持表格区域进行结构化分析，最终结果输出Excel文件
+- 支持python whl包和命令行两种方式，简单易用
+- 支持版面分析和表格结构化两类任务自定义训练
+## 1. 效果展示
+<img src="../doc/table/ppstructure.GIF" width="100%"/>
+## 2. 安装
+### 2.1 安装依赖
+- **（1) 安装PaddlePaddle**
+```bash
+pip3 install --upgrade pip
+# GPU安装
+python3 -m pip install paddlepaddle-gpu==2.1.1 -i https://mirror.baidu.com/pypi/simple
+# CPU安装
+ python3 -m pip install paddlepaddle==2.1.1 -i https://mirror.baidu.com/pypi/simple
+# 更多需求，请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
+```
+- **(2) 安装 Layout-Parser**
+```bash
+pip3 install -U premailer paddleocr https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
+```
+### 2.2 安装PaddleOCR（包含PP-OCR和PP-Structure）
+- **（1) PIP快速安装PaddleOCR whl包（仅预测）**
+```bash
+pip install "paddleocr>=2.2" # 推荐使用2.2+版本
+```
+- **（2) 完整克隆PaddleOCR源码（预测+训练）**
+```bash
+【推荐】git clone https://github.com/PaddlePaddle/PaddleOCR
+#如果因为网络问题无法pull成功，也可选择使用码云上的托管：
+git clone https://gitee.com/paddlepaddle/PaddleOCR
+#注：码云托管代码可能无法实时同步本github项目更新，存在3~5天延时，请优先使用推荐方式。
+```
+## 3. PP-Structure 快速开始
+### 3.1 命令行使用（默认参数，极简）
+```bash
+paddleocr --image_dir=../doc/table/1.png --type=structure
+```
+### 3.2 Python脚本使用（自定义参数，灵活）
+```python
+import os
+import cv2
+from paddleocr import PPStructure,draw_structure_result,save_structure_res
+table_engine = PPStructure(show_log=True)
+save_folder = './output/table'
+img_path = '../doc/table/1.png'
+img = cv2.imread(img_path)
+result = table_engine(img)
+save_structure_res(result, save_folder,os.path.basename(img_path).split('.')[0])
+for line in result:
+    line.pop('img')
+    print(line)
+from PIL import Image
+font_path = '../doc/fonts/simfang.ttf' # PaddleOCR下提供字体包
+image = Image.open(img_path).convert('RGB')
+im_show = draw_structure_result(image, result,font_path=font_path)
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+### 3.3 返回结果说明
+PP-Structure的返回结果为一个dict组成的list，示例如下
+```shell
+[
+  {   'type': 'Text',
+      'bbox': [34, 432, 345, 462],
+      'res': ([[36.0, 437.0, 341.0, 437.0, 341.0, 446.0, 36.0, 447.0], [41.0, 454.0, 125.0, 453.0, 125.0, 459.0, 41.0, 460.0]],
+                [('Tigure-6. The performance of CNN and IPT models using difforen', 0.90060663), ('Tent  ', 0.465441)])
+  }
+]
+```
+dict 里各个字段说明如下
+| 字段            | 说明           |
+| --------------- | -------------|
+|type|图片区域的类型|
+|bbox|图片区域的在原图的坐标，分别[左上角x，左上角y，右下角x，右下角y]|
+|res|图片区域的OCR或表格识别结果。<br> 表格: 表格的HTML字符串; <br> OCR: 一个包含各个单行文字的检测坐标和识别结果的元组|
+### 3.4 参数说明
+| 字段            | 说明                                     | 默认值                                      |
+| --------------- | ---------------------------------------- | ------------------------------------------- |
+| output          | excel和识别结果保存的地址                | ./output/table                              |
+| table_max_len   | 表格结构模型预测时，图像的长边resize尺度 | 488                                         |
+| table_model_dir | 表格结构模型 inference 模型地址          | None                                        |
+| table_char_type | 表格结构模型所用字典地址                 | ../ppocr/utils/dict/table_structure_dict.tx |
+大部分参数和paddleocr whl包保持一致，见 [whl包文档](../doc/doc_ch/whl.md)
+运行完成后，每张图片会在`output`字段指定的目录下有一个同名目录，图片里的每个表格会存储为一个excel，图片区域会被裁剪之后保存下来，excel文件和图片名名为表格在图片里的坐标。
+## 4. PP-Structure Pipeline介绍
+![pipeline](../doc/table/pipeline.jpg)
+在PP-Structure中，图片会先经由Layout-Parser进行版面分析，在版面分析中，会对图片里的区域进行分类，包括**文字、标题、图片、列表和表格**5类。对于前4类区域，直接使用PP-OCR完成对应区域文字检测与识别。对于表格类区域，经过表格结构化处理后，表格图片转换为相同表格样式的Excel文件。
+### 4.1 版面分析
+版面分析对文档数据进行区域分类，其中包括版面分析工具的Python脚本使用、提取指定类别检测框、性能指标以及自定义训练版面分析模型，详细内容可以参考[文档](layout/README_ch.md)。
+### 4.2 表格识别
+表格识别将表格图片转换为excel文档，其中包含对于表格文本的检测和识别以及对于表格结构和单元格坐标的预测，详细说明参考[文档](table/README_ch.md)
+## 5. 预测引擎推理（与whl包效果相同）
+使用如下命令即可完成预测引擎的推理
+```python
+cd ppstructure
+# 下载模型
+mkdir inference && cd inference
+# 下载超轻量级中文OCR模型的检测模型并解压
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar && tar xf ch_ppocr_mobile_v2.0_det_infer.tar
+# 下载超轻量级中文OCR模型的识别模型并解压
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar && tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
+# 下载超轻量级英文表格英寸模型并解压
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
+cd ..
+python3 predict_system.py --det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer --rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=ch --output=../output/table --vis_font_path=../doc/fonts/simfang.ttf
+```
+运行完成后，每张图片会在`output`字段指定的目录下有一个同名目录，图片里的每个表格会存储为一个excel，图片区域会被裁剪之后保存下来，excel文件和图片名名为表格在图片里的坐标。
+**Model List**
+LayoutParser 模型
+|模型名称|模型简介|下载地址|
+| --- | --- | --- |
+| ppyolov2_r50vd_dcn_365e_publaynet | PubLayNet 数据集训练的版面分析模型，可以划分**文字、标题、表格、图片以及列表**5类区域 | [PubLayNet](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) |
+| ppyolov2_r50vd_dcn_365e_tableBank_word | TableBank Word 数据集训练的版面分析模型，只能检测表格 | [TableBank Word](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_word.tar) |
+| ppyolov2_r50vd_dcn_365e_tableBank_latex | TableBank Latex 数据集训练的版面分析模型，只能检测表格 | [TableBank Latex](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_latex.tar) |
+OCR和表格识别模型
+|模型名称|模型简介|推理模型大小|下载地址|
+| --- | --- | --- | --- |
+|ch_ppocr_mobile_slim_v2.0_det|slim裁剪版超轻量模型，支持中英文、多语种文本检测|2.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/slim/ch_ppocr_mobile_v2.0_det_prune_infer.tar) |
+|ch_ppocr_mobile_slim_v2.0_rec|slim裁剪量化版超轻量模型，支持中英文、数字识别|6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_slim_infer.tar) |
+|en_ppocr_mobile_v2.0_table_det|PubLayNet数据集训练的英文表格场景的文字检测|4.7M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar) |
+|en_ppocr_mobile_v2.0_table_rec|PubLayNet数据集训练的英文表格场景的文字识别|6.9M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar) |
+|en_ppocr_mobile_v2.0_table_structure|PubLayNet数据集训练的英文表格场景的表格结构预测|18.6M|[推理模型](https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar) |
+如需要使用其他模型，可以在 [model_list](../doc/doc_ch/models_list.md) 下载模型或者使用自己训练好的模型配置到`det_model_dir`,`rec_model_dir`,`table_model_dir`三个字段即可。
--- a/ppstructure/__init__.py
+++ b/ppstructure/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppstructure/layout/README.md
+++ b/ppstructure/layout/README.md
+English | [简体中文](README_ch.md)
+# Getting Started
+[1. Install whl package](#Install)
+[2. Quick Start](#QuickStart)
+[3. PostProcess](#PostProcess)
+[4. Results](#Results)
+[5. Training](#Training)
+<a name="Install"></a>
+## 1.  Install whl package
+```bash
+wget https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
+pip install -U layoutparser-0.0.0-py3-none-any.whl
+```
+<a name="QuickStart"></a>
+## 2. Quick Start
+Use LayoutParser to identify the layout of a document:
+```python
+import cv2
+import layoutparser as lp
+image = cv2.imread("doc/table/layout.jpg")
+image = image[..., ::-1]
+# load model
+model = lp.PaddleDetectionLayoutModel(config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
+                                threshold=0.5,
+                                label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"},
+                                enforce_cpu=False,
+                                enable_mkldnn=True)
+# detect
+layout = model.detect(image)
+# show result
+show_img = lp.draw_box(image, layout, box_width=3, show_element_type=True)
+show_img.show()
+```
+The following figure shows the result, with different colored detection boxes representing different categories and displaying specific categories in the upper left corner of the box with `show_element_type`
+<div align="center">
+<img src="../../doc/table/result_all.jpg"  width = "600" />
+</div>
+`PaddleDetectionLayoutModel`parameters are described as follows:
+|   parameter    |                       description                        |   default   |                            remark                            |
+| :------------: | :------------------------------------------------------: | :---------: | :----------------------------------------------------------: |
+|  config_path   |                    model config path                     |    None     | Specify config_ path will automatically download the model (only for the first time,the model will exist and will not be downloaded again) |
+|   model_path   |                        model path                        |    None     | local model path, config_ path and model_ path must be set to one, cannot be none at the same time |
+|   threshold    |              threshold of prediction score               |     0.5     |                              \                               |
+|  input_shape   |                 picture size of reshape                  | [3,640,640] |                              \                               |
+|   batch_size   |                    testing batch size                    |      1      |                              \                               |
+|   label_map    |                  category mapping table                  |    None     | Setting config_ path, it can be none, and the label is automatically obtained according to the dataset name_ map |
+|  enforce_cpu   |                    whether to use CPU                    |    False    |      False to use GPU, and True to force the use of CPU      |
+| enforce_mkldnn | whether mkldnn acceleration is enabled in CPU prediction |    True     |                              \                               |
+|   thread_num   |                the number of CPU threads                 |     10      |                              \                               |
+The following model configurations and label maps are currently supported, which you can use by modifying '--config_path' and '--label_map' to detect different types of content:
+| dataset                                                      | config_path                                                  | label_map                                                 |
+| ------------------------------------------------------------ | ------------------------------------------------------------ | --------------------------------------------------------- |
+| [TableBank](https://doc-analysis.github.io/tablebank-page/index.html) word | lp://TableBank/ppyolov2_r50vd_dcn_365e_tableBank_word/config | {0:"Table"}                                               |
+| TableBank latex                                              | lp://TableBank/ppyolov2_r50vd_dcn_365e_tableBank_latex/config | {0:"Table"}                                               |
+| [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet)        | lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config      | {0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"} |
+* TableBank word and TableBank latex are trained on datasets of word documents and latex documents respectively;
+* Download TableBank dataset contains both word and latex。
+<a name="PostProcess"></a>
+## 3. PostProcess
+Layout parser contains multiple categories, if you only want to get the detection box for a specific category (such as the "Text" category), you can use the following code:
+```python
+# follow the above code
+# filter areas for a specific text type
+text_blocks = lp.Layout([b for b in layout if b.type=='Text'])
+figure_blocks = lp.Layout([b for b in layout if b.type=='Figure'])
+# text areas may be detected within the image area, delete these areas
+text_blocks = lp.Layout([b for b in text_blocks \
+                   if not any(b.is_in(b_fig) for b_fig in figure_blocks)])
+# sort text areas and assign ID
+h, w = image.shape[:2]
+left_interval = lp.Interval(0, w/2*1.05, axis='x').put_on_canvas(image)
+left_blocks = text_blocks.filter_by(left_interval, center=True)
+left_blocks.sort(key = lambda b:b.coordinates[1])
+right_blocks = [b for b in text_blocks if b not in left_blocks]
+right_blocks.sort(key = lambda b:b.coordinates[1])
+# the two lists are merged and the indexes are added in order
+text_blocks = lp.Layout([b.set(id = idx) for idx, b in enumerate(left_blocks + right_blocks)])
+# display result
+show_img = lp.draw_box(image, text_blocks,
+            box_width=3,
+            show_element_id=True)
+show_img.show()
+```
+Displays results with only the "Text" category：
+<div align="center">
+<img src="../../doc/table/result_text.jpg"  width = "600" />
+</div>
+<a name="Results"></a>
+## 4. Results
+| Dataset   | mAP  | CPU time cost | GPU time cost |
+| --------- | ---- | ------------- | ------------- |
+| PubLayNet | 93.6 | 1713.7ms      | 66.6ms        |
+| TableBank | 96.2 | 1968.4ms      | 65.1ms        |
+**Envrionment：**
+    **CPU：**  Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz，24core
+    **GPU：**  a single NVIDIA Tesla P40
+<a name="Training"></a>
+## 5. Training
+The above model is based on [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection). If you want to train your own layout parser model，please refer to：[train_layoutparser_model](train_layoutparser_model.md)
--- a/ppstructure/layout/README_ch.md
+++ b/ppstructure/layout/README_ch.md
+[English](README.md) | 简体中文
+# 版面分析使用说明
+[1. 安装whl包](#安装whl包)
+[2. 使用](#使用)
+[3. 后处理](#后处理)
+[4. 指标](#指标)
+[5. 训练版面分析模型](#训练版面分析模型)
+<a name="安装whl包"></a>
+## 1.  安装whl包
+```bash
+pip install -U https://paddleocr.bj.bcebos.com/whl/layoutparser-0.0.0-py3-none-any.whl
+```
+<a name="使用"></a>
+## 2. 使用
+使用layoutparser识别给定文档的布局：
+```python
+import cv2
+import layoutparser as lp
+image = cv2.imread("doc/table/layout.jpg")
+image = image[..., ::-1]
+# 加载模型
+model = lp.PaddleDetectionLayoutModel(config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
+                                threshold=0.5,
+                                label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"},
+                                enforce_cpu=False,
+                                enable_mkldnn=True)
+# 检测
+layout = model.detect(image)
+# 显示结果
+show_img = lp.draw_box(image, layout, box_width=3, show_element_type=True)
+show_img.show()
+```
+下图展示了结果，不同颜色的检测框表示不同的类别，并通过`show_element_type`在框的左上角显示具体类别：
+<div align="center">
+<img src="../../doc/table/result_all.jpg"  width = "600" />
+</div>
+`PaddleDetectionLayoutModel`函数参数说明如下:
+|      参数      |            含义             |   默认值    |                             备注                             |
+| :------------: | :-------------------------: | :---------: | :----------------------------------------------------------: |
+|  config_path   |        模型配置路径         |    None     | 指定config_path会自动下载模型(仅第一次，之后模型存在，不会再下载) |
+|   model_path   |          模型路径           |    None     | 本地模型路径，config_path和model_path必须设置一个，不能同时为None |
+|   threshold    |       预测得分的阈值        |     0.5     |                              \                               |
+|  input_shape   |     reshape之后图片尺寸     | [3,640,640] |                              \                               |
+|   batch_size   |       测试batch size        |      1      |                              \                               |
+|   label_map    |         类别映射表          |    None     | 设置config_path时，可以为None，根据数据集名称自动获取label_map |
+|  enforce_cpu   |     代码是否使用CPU运行     |    False    |         设置为False表示使用GPU，True表示强制使用CPU          |
+| enforce_mkldnn | CPU预测中是否开启MKLDNN加速 |    True     |                              \                               |
+|   thread_num   |        设置CPU线程数        |     10      |                              \                               |
+目前支持以下几种模型配置和label map，您可以通过修改 `--config_path`和 `--label_map`使用这些模型，从而检测不同类型的内容：
+| dataset                                                      | config_path                                                  | label_map                                                 |
+| ------------------------------------------------------------ | ------------------------------------------------------------ | --------------------------------------------------------- |
+| [TableBank](https://doc-analysis.github.io/tablebank-page/index.html) word | lp://TableBank/ppyolov2_r50vd_dcn_365e_tableBank_word/config | {0:"Table"}                                               |
+| TableBank latex                                              | lp://TableBank/ppyolov2_r50vd_dcn_365e_tableBank_latex/config | {0:"Table"}                                               |
+| [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet)        | lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config      | {0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"} |
+* TableBank word和TableBank latex分别在word文档、latex文档数据集训练；
+* 下载的TableBank数据集里同时包含word和latex。
+<a name="后处理"></a>
+## 3. 后处理
+版面分析检测包含多个类别，如果只想获取指定类别(如"Text"类别)的检测框、可以使用下述代码：
+```python
+# 接上面代码
+# 首先过滤特定文本类型的区域
+text_blocks = lp.Layout([b for b in layout if b.type=='Text'])
+figure_blocks = lp.Layout([b for b in layout if b.type=='Figure'])
+# 因为在图像区域内可能检测到文本区域，所以只需要删除它们
+text_blocks = lp.Layout([b for b in text_blocks \
+                   if not any(b.is_in(b_fig) for b_fig in figure_blocks)])
+# 对文本区域排序并分配id
+h, w = image.shape[:2]
+left_interval = lp.Interval(0, w/2*1.05, axis='x').put_on_canvas(image)
+left_blocks = text_blocks.filter_by(left_interval, center=True)
+left_blocks.sort(key = lambda b:b.coordinates[1])
+right_blocks = [b for b in text_blocks if b not in left_blocks]
+right_blocks.sort(key = lambda b:b.coordinates[1])
+# 最终合并两个列表，并按顺序添加索引
+text_blocks = lp.Layout([b.set(id = idx) for idx, b in enumerate(left_blocks + right_blocks)])
+# 显示结果
+show_img = lp.draw_box(image, text_blocks,
+            box_width=3,
+            show_element_id=True)
+show_img.show()
+```
+显示只有"Text"类别的结果：
+<div align="center">
+<img src="../../doc/table/result_text.jpg"  width = "600" />
+</div>
+<a name="指标"></a>
+## 4. 指标
+| Dataset   | mAP  | CPU time cost | GPU time cost |
+| --------- | ---- | ------------- | ------------- |
+| PubLayNet | 93.6 | 1713.7ms      | 66.6ms        |
+| TableBank | 96.2 | 1968.4ms      | 65.1ms        |
+**Envrionment：**
+    **CPU：**  Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz，24core
+    **GPU：**  a single NVIDIA Tesla P40
+<a name="训练版面分析模型"></a>
+## 5. 训练版面分析模型
+上述模型基于[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection) 训练，如果您想训练自己的版面分析模型，请参考：[train_layoutparser_model](train_layoutparser_model_ch.md)
--- a/ppstructure/layout/train_layoutparser_model.md
+++ b/ppstructure/layout/train_layoutparser_model.md
+# Training layout-parse
+[1. Installation](#Installation)
+  [1.1 Requirements](#Requirements)
+  [1.2 Install PaddleDetection](#Install PaddleDetection)
+[2.  Data preparation](#Data preparation)
+[3. Configuration](#Configuration)
+[4. Training](#Training)
+[5. Prediction](#Prediction)
+[6. Deployment](#Deployment)
+  [6.1 Export model](#Export model)
+  [6.2 Inference](#Inference)  
+<a name="Installation"></a>
+## 1.  Installation
+<a name="Requirements"></a>
+### 1.1 Requirements
+- PaddlePaddle 2.1
+- OS 64 bit
+- Python 3(3.5.1+/3.6/3.7/3.8/3.9)，64 bit
+- pip/pip3(9.0.1+), 64 bit
+- CUDA >= 10.1
+- cuDNN >= 7.6
+<a name="Install PaddleDetection"></a>
+### 1.2 Install PaddleDetection
+```bash
+# Clone PaddleDetection repository
+cd <path/to/clone/PaddleDetection>
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection
+# Install other dependencies
+pip install -r requirements.txt
+```
+For more installation tutorials, please refer to： [Install doc](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/INSTALL_cn.md)
+<a name="Data preparation"></a>
+## 2. Data preparation
+Download the [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) dataset
+```bash
+cd PaddleDetection/dataset/
+mkdir publaynet
+# execute the command，download PubLayNet
+wget -O publaynet.tar.gz https://dax-cdn.cdn.appdomain.cloud/dax-publaynet/1.0.0/publaynet.tar.gz?_ga=2.104193024.1076900768.1622560733-649911202.1622560733
+# unpack
+tar -xvf publaynet.tar.gz
+```
+PubLayNet directory structure after decompressing ：
+| File or Folder | Description                                      | num     |
+| :------------- | :----------------------------------------------- | ------- |
+| `train/`       | Images in the training subset                    | 335,703 |
+| `val/`         | Images in the validation subset                  | 11,245  |
+| `test/`        | Images in the testing subset                     | 11,405  |
+| `train.json`   | Annotations for training images                  |  1       |
+| `val.json`     | Annotations for validation images                |  1       |
+| `LICENSE.txt`  | Plaintext version of the CDLA-Permissive license |   1      |
+| `README.txt`   | Text file with the file names and description    |   1      |
+For other datasets，please refer to [the PrepareDataSet]((https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/PrepareDataSet.md) )
+<a name="Configuration"></a>
+## 3. Configuration
+We use the  `configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml` configuration for training，the configuration file is as follows
+```bash
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  './_base_/ppyolov2_r50vd_dcn.yml',
+  './_base_/optimizer_365e.yml',
+  './_base_/ppyolov2_reader.yml',
+]
+snapshot_epoch: 8
+weights: output/ppyolov2_r50vd_dcn_365e_coco/model_final
+```
+The `ppyolov2_r50vd_dcn_365e_coco.yml` configuration depends on other configuration files, in this case:
+- coco_detection.yml：mainly explains the path of training data and verification data
+- runtime.yml：mainly describes the common parameters, such as whether to use the GPU and how many epoch to save model etc.
+- optimizer_365e.yml：mainly explains the learning rate and optimizer configuration
+- ppyolov2_r50vd_dcn.yml：mainly describes the model and the  network
+- ppyolov2_reader.yml：mainly describes the configuration of data readers, such as batch size and number of concurrent loading child processes, and also includes post preprocessing, such as resize and data augmention etc.
+Modify the preceding files, such as the dataset path and batch size etc.
+<a name="Training"></a>
+## 4. Training
+PaddleDetection provides single-card/multi-card training mode to meet various training needs of users:
+* GPU single card training
+```bash
+export CUDA_VISIBLE_DEVICES=0 #Don't need to run this command on Windows and Mac
+python tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml
+```
+* GPU multi-card training
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --eval
+```
+--eval: training while verifying
+* Model recovery training
+During the daily training, if training is interrupted due to some reasons, you can use the -r command to resume the training:
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --eval -r output/ppyolov2_r50vd_dcn_365e_coco/10000
+```
+Note: If you encounter "`Out of memory error`" , try reducing `batch_size` in the `ppyolov2_reader.yml`  file
+prediction<a name="Prediction"></a>
+## 5. Prediction
+Set parameters and use PaddleDetection to predict：
+```bash
+export CUDA_VISIBLE_DEVICES=0
+python tools/infer.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --infer_img=images/paper-image.jpg --output_dir=infer_output/ --draw_threshold=0.5 -o weights=output/ppyolov2_r50vd_dcn_365e_coco/model_final --use_vdl=Ture
+```
+`--draw_threshold` is an optional parameter. According to the calculation of [NMS](https://ieeexplore.ieee.org/document/1699659), different threshold will produce different results, ` keep_top_k ` represent  the maximum amount of output target, the default value is 10. You can set different value according to your own actual situation。
+<a name="Deployment"></a>
+## 6. Deployment
+Use your trained model in Layout Parser
+<a name="Export model"></a>
+### 6.1 Export model
+n the process of model training, the model file saved contains the process of forward prediction and back propagation. In the actual industrial deployment, there is no need for back propagation. Therefore, the model should be translated into the model format required by the deployment. The `tools/export_model.py` script is provided in PaddleDetection to export the model.
+The exported model name defaults to `model.*`, Layout Parser's code model is `inference.*`, So change [PaddleDetection/ppdet/engine/trainer. Py ](https://github.com/PaddlePaddle/PaddleDetection/blob/b87a1ea86fa18ce69e44a17ad1b49c1326f19ff9/ppdet/engine/trainer.py# L512) (click on the link to see the detailed line of code), change 'model' to 'inference'.
+Execute the script to export model:
+```bash
+python tools/export_model.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --output_dir=./inference -o weights=output/ppyolov2_r50vd_dcn_365e_coco/model_final.pdparams
+```
+The prediction model is exported to `inference/ppyolov2_r50vd_dcn_365e_coco` ,including:`infer_cfg.yml`(prediction not required), `inference.pdiparams`, `inference.pdiparams.info`,`inference.pdmodel`
+More model export tutorials, please refer to：[EXPORT_MODEL](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md)
+<a name="Inference"></a>
+### 6.2 Inference
+`model_path` represent  the trained model path, and layoutparser is used to predict:
+```bash
+import layoutparser as lp
+model = lp.PaddleDetectionLayoutModel(model_path="inference/ppyolov2_r50vd_dcn_365e_coco", threshold=0.5,label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"},enforce_cpu=True,enable_mkldnn=True)
+```
+***
+More PaddleDetection training tutorials，please reference：[PaddleDetection Training](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/GETTING_STARTED_cn.md)
+***
--- a/ppstructure/layout/train_layoutparser_model_ch.md
+++ b/ppstructure/layout/train_layoutparser_model_ch.md
+# 训练版面分析
+[1. 安装](#安装)
+        [1.1 环境要求](#环境要求)
+        [1.2 安装PaddleDetection](#安装PaddleDetection)
+[2. 准备数据](#准备数据)
+[3. 配置文件改动和说明](#配置文件改动和说明)
+[4. PaddleDetection训练](#训练)
+[5. PaddleDetection预测](#预测)
+[6. 预测部署](#预测部署)
+        [6.1 模型导出](#模型导出)
+        [6.2 layout parser预测](#layout_parser预测)
+<a name="安装"></a>
+## 1. 安装
+<a name="环境要求"></a>
+### 1.1 环境要求
+- PaddlePaddle 2.1
+- OS 64 bit
+- Python 3(3.5.1+/3.6/3.7/3.8/3.9)，64 bit
+- pip/pip3(9.0.1+), 64 bit
+- CUDA >= 10.1
+- cuDNN >= 7.6
+<a name="安装PaddleDetection"></a>
+### 1.2 安装PaddleDetection
+```bash
+# 克隆PaddleDetection仓库
+cd <path/to/clone/PaddleDetection>
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection
+# 安装其他依赖
+pip install -r requirements.txt
+```
+更多安装教程，请参考: [Install doc](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/INSTALL_cn.md)
+<a name="数据准备"></a>
+## 2. 准备数据
+下载 [PubLayNet](https://github.com/ibm-aur-nlp/PubLayNet) 数据集：
+```bash
+cd PaddleDetection/dataset/
+mkdir publaynet
+# 执行命令，下载
+wget -O publaynet.tar.gz https://dax-cdn.cdn.appdomain.cloud/dax-publaynet/1.0.0/publaynet.tar.gz?_ga=2.104193024.1076900768.1622560733-649911202.1622560733
+# 解压
+tar -xvf publaynet.tar.gz
+```
+解压之后PubLayNet目录结构：
+| File or Folder | Description                                      | num     |
+| :------------- | :----------------------------------------------- | ------- |
+| `train/`       | Images in the training subset                    | 335,703 |
+| `val/`         | Images in the validation subset                  | 11,245  |
+| `test/`        | Images in the testing subset                     | 11,405  |
+| `train.json`   | Annotations for training images                  | 1       |
+| `val.json`     | Annotations for validation images                | 1       |
+| `LICENSE.txt`  | Plaintext version of the CDLA-Permissive license | 1       |
+| `README.txt`   | Text file with the file names and description    | 1       |
+如果使用其它数据集，请参考[准备训练数据](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/PrepareDataSet.md)
+<a name="配置文件改动和说明"></a>
+## 3. 配置文件改动和说明
+我们使用 `configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml`配置进行训练，配置文件摘要如下：
+```bash
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  './_base_/ppyolov2_r50vd_dcn.yml',
+  './_base_/optimizer_365e.yml',
+  './_base_/ppyolov2_reader.yml',
+]
+snapshot_epoch: 8
+weights: output/ppyolov2_r50vd_dcn_365e_coco/model_final
+```
+从中可以看到 `ppyolov2_r50vd_dcn_365e_coco.yml` 配置需要依赖其他的配置文件，在该例子中需要依赖:
+- coco_detection.yml：主要说明了训练数据和验证数据的路径
+- runtime.yml：主要说明了公共的运行参数，比如是否使用GPU、每多少个epoch存储checkpoint等
+- optimizer_365e.yml：主要说明了学习率和优化器的配置
+- ppyolov2_r50vd_dcn.yml：主要说明模型和主干网络的情况
+- ppyolov2_reader.yml：主要说明数据读取器配置，如batch size，并发加载子进程数等，同时包含读取后预处理操作，如resize、数据增强等等
+根据实际情况，修改上述文件，比如数据集路径、batch size等。
+<a name="训练"></a>
+## 4. PaddleDetection训练
+PaddleDetection提供了单卡/多卡训练模式，满足用户多种训练需求
+* GPU 单卡训练
+```bash
+export CUDA_VISIBLE_DEVICES=0 #windows和Mac下不需要执行该命令
+python tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml
+```
+* GPU多卡训练
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --eval
+```
+--eval：表示边训练边验证
+* 模型恢复训练
+在日常训练过程中，有的用户由于一些原因导致训练中断，用户可以使用-r的命令恢复训练:
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --eval -r output/ppyolov2_r50vd_dcn_365e_coco/10000
+```
+注意：如果遇到 "`Out of memory error`" 问题, 尝试在 `ppyolov2_reader.yml` 文件中调小`batch_size`
+<a name="预测"></a>
+## 5. PaddleDetection预测
+设置参数，使用PaddleDetection预测：
+```bash
+export CUDA_VISIBLE_DEVICES=0
+python tools/infer.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --infer_img=images/paper-image.jpg --output_dir=infer_output/ --draw_threshold=0.5 -o weights=output/ppyolov2_r50vd_dcn_365e_coco/model_final --use_vdl=Ture
+```
+`--draw_threshold` 是个可选参数. 根据 [NMS](https://ieeexplore.ieee.org/document/1699659) 的计算，不同阈值会产生不同的结果 `keep_top_k`表示设置输出目标的最大数量，默认值为100，用户可以根据自己的实际情况进行设定。
+<a name="预测部署"></a>
+## 6. 预测部署
+在layout parser中使用自己训练好的模型。
+<a name="模型导出"></a>
+### 6.1 模型导出
+在模型训练过程中保存的模型文件是包含前向预测和反向传播的过程，在实际的工业部署则不需要反向传播，因此需要将模型进行导成部署需要的模型格式。 在PaddleDetection中提供了 `tools/export_model.py`脚本来导出模型。
+导出模型名称默认是`model.*`，layout parser代码模型名称是`inference.*`,  所以修改[PaddleDetection/ppdet/engine/trainer.py ](https://github.com/PaddlePaddle/PaddleDetection/blob/b87a1ea86fa18ce69e44a17ad1b49c1326f19ff9/ppdet/engine/trainer.py#L512) (点开链接查看详细代码行)，将`model`改为`inference`即可。
+执行导出模型脚本：
+```bash
+python tools/export_model.py -c configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml --output_dir=./inference -o weights=output/ppyolov2_r50vd_dcn_365e_coco/model_final.pdparams
+```
+预测模型会导出到`inference/ppyolov2_r50vd_dcn_365e_coco`目录下，分别为`infer_cfg.yml`(预测不需要), `inference.pdiparams`, `inference.pdiparams.info`,`inference.pdmodel` 。
+更多模型导出教程，请参考：[EXPORT_MODEL](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/deploy/EXPORT_MODEL.md)
+<a name="layout parser预测"></a>
+### 6.2 layout_parser预测
+`model_path`指定训练好的模型路径，使用layout parser进行预测：
+```bash
+import layoutparser as lp
+model = lp.PaddleDetectionLayoutModel(model_path="inference/ppyolov2_r50vd_dcn_365e_coco", threshold=0.5,label_map={0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"},enforce_cpu=True,enable_mkldnn=True)
+```
+***
+更多PaddleDetection训练教程，请参考：[PaddleDetection训练](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.1/docs/tutorials/GETTING_STARTED_cn.md)
+***
--- a/ppstructure/predict_system.py
+++ b/ppstructure/predict_system.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+import subprocess
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
+import cv2
+import numpy as np
+import time
+import logging
+from ppocr.utils.utility import get_image_file_list, check_and_read_gif
+from ppocr.utils.logging import get_logger
+from tools.infer.predict_system import TextSystem
+from ppstructure.table.predict_table import TableSystem, to_excel
+from ppstructure.utility import parse_args, draw_structure_result
+logger = get_logger()
+class OCRSystem(object):
+    def __init__(self, args):
+        import layoutparser as lp
+        # args.det_limit_type = 'resize_long'
+        args.drop_score = 0
+        if not args.show_log:
+            logger.setLevel(logging.INFO)
+        self.text_system = TextSystem(args)
+        self.table_system = TableSystem(args, self.text_system.text_detector, self.text_system.text_recognizer)
+        config_path = None
+        model_path = None
+        if os.path.isdir(args.layout_path_model):
+            model_path = args.layout_path_model
+        else:
+            config_path = args.layout_path_model
+        self.table_layout = lp.PaddleDetectionLayoutModel(config_path=config_path,
+                                                          model_path=model_path,
+                                                          threshold=0.5, enable_mkldnn=args.enable_mkldnn,
+                                                          enforce_cpu=not args.use_gpu, thread_num=args.cpu_threads)
+        self.use_angle_cls = args.use_angle_cls
+        self.drop_score = args.drop_score
+    def __call__(self, img):
+        ori_im = img.copy()
+        layout_res = self.table_layout.detect(img[..., ::-1])
+        res_list = []
+        for region in layout_res:
+            x1, y1, x2, y2 = region.coordinates
+            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
+            roi_img = ori_im[y1:y2, x1:x2, :]
+            if region.type == 'Table':
+                res = self.table_system(roi_img)
+            else:
+                filter_boxes, filter_rec_res = self.text_system(roi_img)
+                filter_boxes = [x + [x1, y1] for x in filter_boxes]
+                filter_boxes = [x.reshape(-1).tolist() for x in filter_boxes]
+                # remove style char
+                style_token = ['<strike>', '<strike>', '<sup>', '</sub>', '<b>', '</b>', '<sub>', '</sup>',
+                               '<overline>', '</overline>', '<underline>', '</underline>', '<i>', '</i>']
+                filter_rec_res_tmp = []
+                for rec_res in filter_rec_res:
+                    rec_str, rec_conf = rec_res
+                    for token in style_token:
+                        if token in rec_str:
+                            rec_str = rec_str.replace(token, '')
+                    filter_rec_res_tmp.append((rec_str, rec_conf))
+                res = (filter_boxes, filter_rec_res_tmp)
+            res_list.append({'type': region.type, 'bbox': [x1, y1, x2, y2], 'img': roi_img, 'res': res})
+        return res_list
+def save_structure_res(res, save_folder, img_name):
+    excel_save_folder = os.path.join(save_folder, img_name)
+    os.makedirs(excel_save_folder, exist_ok=True)
+    # save res
+    with open(os.path.join(excel_save_folder, 'res.txt'), 'w', encoding='utf8') as f:
+        for region in res:
+            if region['type'] == 'Table':
+                excel_path = os.path.join(excel_save_folder, '{}.xlsx'.format(region['bbox']))
+                to_excel(region['res'], excel_path)
+            if region['type'] == 'Figure':
+                roi_img = region['img']
+                img_path = os.path.join(excel_save_folder, '{}.jpg'.format(region['bbox']))
+                cv2.imwrite(img_path, roi_img)
+            else:
+                for box, rec_res in zip(region['res'][0], region['res'][1]):
+                    f.write('{}\t{}\n'.format(np.array(box).reshape(-1).tolist(), rec_res))
+def main(args):
+    image_file_list = get_image_file_list(args.image_dir)
+    image_file_list = image_file_list
+    image_file_list = image_file_list[args.process_id::args.total_process_num]
+    save_folder = args.output
+    os.makedirs(save_folder, exist_ok=True)
+    structure_sys = OCRSystem(args)
+    img_num = len(image_file_list)
+    for i, image_file in enumerate(image_file_list):
+        logger.info("[{}/{}] {}".format(i, img_num, image_file))
+        img, flag = check_and_read_gif(image_file)
+        img_name = os.path.basename(image_file).split('.')[0]
+        if not flag:
+            img = cv2.imread(image_file)
+        if img is None:
+            logger.error("error in loading image:{}".format(image_file))
+            continue
+        starttime = time.time()
+        res = structure_sys(img)
+        save_structure_res(res, save_folder, img_name)
+        draw_img = draw_structure_result(img, res, args.vis_font_path)
+        cv2.imwrite(os.path.join(save_folder, img_name, 'show.jpg'), draw_img)
+        logger.info('result save to {}'.format(os.path.join(save_folder, img_name)))
+        elapse = time.time() - starttime
+        logger.info("Predict time : {:.3f}s".format(elapse))
+if __name__ == "__main__":
+    args = parse_args()
+    if args.use_mp:
+        p_list = []
+        total_process_num = args.total_process_num
+        for process_id in range(total_process_num):
+            cmd = [sys.executable, "-u"] + sys.argv + [
+                "--process_id={}".format(process_id),
+                "--use_mp={}".format(False)
+            ]
+            p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout)
+            p_list.append(p)
+        for p in p_list:
+            p.wait()
+    else:
+        main(args)
--- a/ppstructure/table/README.md
+++ b/ppstructure/table/README.md
+# Table Recognition
+## 1. pipeline
+The table recognition mainly contains three models
+1. Single line text detection-DB
+2. Single line text recognition-CRNN
+3. Table structure and cell coordinate prediction-RARE
+The table recognition flow chart is as follows
+![tableocr_pipeline](../../doc/table/tableocr_pipeline_en.jpg)
+1. The coordinates of single-line text is detected by DB model, and then sends it to the recognition model to get the recognition result.
+2. The table structure and cell coordinates is predicted by RARE model.
+3. The recognition result of the cell is combined by the coordinates, recognition result of the single line and the coordinates of the cell.
+4. The cell recognition result and the table structure together construct the html string of the table.
+## 2. Performance
+We evaluated the algorithm on the PubTabNet<sup>[1]</sup> eval dataset, and the performance is as follows:
+|Method|[TEDS(Tree-Edit-Distance-based Similarity)](https://github.com/ibm-aur-nlp/PubTabNet/tree/master/src)|
+| --- | --- | 
+| EDD<sup>[2]</sup> | 88.3 | 
+| Ours | 93.32 | 
+## 3. How to use
+### 3.1 quick start
+```python
+cd PaddleOCR/ppstructure
+# download model
+mkdir inference && cd inference
+# Download the detection model of the ultra-lightweight table English OCR model and unzip it
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar && tar xf en_ppocr_mobile_v2.0_table_det_infer.tar
+# Download the recognition model of the ultra-lightweight table English OCR model and unzip it
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar && tar xf en_ppocr_mobile_v2.0_table_rec_infer.tar
+# Download the ultra-lightweight English table inch model and unzip it
+wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar && tar xf en_ppocr_mobile_v2.0_table_structure_infer.tar
+cd ..
+# run
+python3 table/predict_table.py --det_model_dir=inference/en_ppocr_mobile_v2.0_table_det_infer --rec_model_dir=inference/en_ppocr_mobile_v2.0_table_rec_infer --table_model_dir=inference/en_ppocr_mobile_v2.0_table_structure_infer --image_dir=../doc/table/table.jpg --rec_char_dict_path=../ppocr/utils/ppocr_keys_v1.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=ch --det_limit_side_len=736 --det_limit_type=min --output ../output/table
+```
+Note: The above model is trained on the PubLayNet dataset and only supports English scanning scenarios. If you need to identify other scenarios, you need to train the model yourself and replace the three fields `det_model_dir`, `rec_model_dir`, `table_model_dir`.
+After running, the excel sheet of each picture will be saved in the directory specified by the output field
+### 3.2 Train
+In this chapter, we only introduce the training of the table structure model, For model training of [text detection](../../doc/doc_en/detection_en.md) and [text recognition](../../doc/doc_en/recognition_en.md), please refer to the corresponding documents
+#### data preparation  
+The training data uses public data set [PubTabNet](https://arxiv.org/abs/1911.10683 ), Can be downloaded from the official [website](https://github.com/ibm-aur-nlp/PubTabNet) 。The PubTabNet data set contains about 500,000 images, as well as annotations in html format。
+#### Start training  
+*If you are installing the cpu version of paddle, please modify the `use_gpu` field in the configuration file to false*
+```shell
+# single GPU training
+python3 tools/train.py -c configs/table/table_mv3.yml
+# multi-GPU training
+# Set the GPU ID used by the '--gpus' parameter.
+python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/table/table_mv3.yml
+```
+In the above instruction, use `-c` to select the training to use the `configs/table/table_mv3.yml` configuration file.
+For a detailed explanation of the configuration file, please refer to [config](../../doc/doc_en/config_en.md).
+#### load trained model and continue training
+If you expect to load trained model and continue the training again, you can specify the parameter `Global.checkpoints` as the model path to be loaded.
+```shell
+python3 tools/train.py -c configs/table/table_mv3.yml -o Global.checkpoints=./your/trained/model
+```
+**Note**: The priority of `Global.checkpoints` is higher than that of `Global.pretrain_weights`, that is, when two parameters are specified at the same time, the model specified by `Global.checkpoints` will be loaded first. If the model path specified by `Global.checkpoints` is wrong, the one specified by `Global.pretrain_weights` will be loaded.
+### 3.3 Eval
+The table uses [TEDS(Tree-Edit-Distance-based Similarity)](https://github.com/ibm-aur-nlp/PubTabNet/tree/master/src) as the evaluation metric of the model. Before the model evaluation, the three models in the pipeline need to be exported as inference models (we have provided them), and the gt for evaluation needs to be prepared. Examples of gt are as follows:
+```json
+{"PMC4289340_004_00.png": [
+  ["<html>", "<body>", "<table>", "<thead>", "<tr>", "<td>", "</td>", "<td>", "</td>", "<td>", "</td>", "</tr>", "</thead>", "<tbody>", "<tr>", "<td>", "</td>", "<td>", "</td>", "<td>", "</td>", "</tr>",  "</tbody>", "</table>", "</body>", "</html>"], 
+  [[1, 4, 29, 13], [137, 4, 161, 13], [215, 4, 236, 13], [1, 17, 30, 27], [137, 17, 147, 27], [215, 17, 225, 27]], 
+  [["<b>", "F", "e", "a", "t", "u", "r", "e", "</b>"], ["<b>", "G", "b", "3", " ", "+", "</b>"], ["<b>", "G", "b", "3", " ", "-", "</b>"], ["<b>", "P", "a", "t", "i", "e", "n", "t", "s", "</b>"], ["6", "2"], ["4", "5"]]
+]}
+```
+In gt json, the key is the image name, the value is the corresponding gt, and gt is a list composed of four items, and each item is
+1. HTML string list of table structure
+2. The coordinates of each cell (not including the empty text in the cell)
+3. The text information in each cell (not including the empty text in the cell)
+Use the following command to evaluate. After the evaluation is completed, the teds indicator will be output.
+```python
+cd PaddleOCR/ppstructure
+python3 table/eval_table.py --det_model_dir=path/to/det_model_dir --rec_model_dir=path/to/rec_model_dir --table_model_dir=path/to/table_model_dir --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=EN --det_limit_side_len=736 --det_limit_type=min --gt_path=path/to/gt.json
+```
+If the PubLatNet eval dataset is used, it will be output
+```bash
+teds: 93.32
+```
+### 3.4 Inference
+```python
+cd PaddleOCR/ppstructure
+python3 table/predict_table.py --det_model_dir=path/to/det_model_dir --rec_model_dir=path/to/rec_model_dir --table_model_dir=path/to/table_model_dir --image_dir=../doc/table/1.png --rec_char_dict_path=../ppocr/utils/dict/table_dict.txt --table_char_dict_path=../ppocr/utils/dict/table_structure_dict.txt --rec_char_type=EN --det_limit_side_len=736 --det_limit_type=min --output ../output/table
+```
+After running, the excel sheet of each picture will be saved in the directory specified by the output field
+Reference
+1. https://github.com/ibm-aur-nlp/PubTabNet
+2. https://arxiv.org/pdf/1911.10683
\ No newline at end of file