Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into fx_pse

22d81ceb · WenmuZhou · d173bba3 · da5d7ee3 · 22d81ceb · 22d81ceb
Commit 22d81ceb authored Aug 03, 2021 by WenmuZhou
20 changed files
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,7 +2,8 @@ include LICENSE
 include README.md

 recursive-include ppocr/utils *.txt utility.py logging.py network.py
-recursive-include ppocr/data/ *.py
+recursive-include ppocr/data *.py
 recursive-include ppocr/postprocess *.py
 recursive-include tools/infer *.py
-recursive-include ppocr/utils/e2e_utils/ *.py
\ No newline at end of file
+recursive-include ppocr/utils/e2e_utils *.py
+recursive-include ppstructure *.py
\ No newline at end of file
--- a/__init__.py
+++ b/__init__.py
@@ -11,7 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import paddleocr
+from .paddleocr import *

-__all__ = ['PaddleOCR', 'draw_ocr']
-from .paddleocr import PaddleOCR
-from .tools.infer.utility import draw_ocr
+__version__ = paddleocr.VERSION
+__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res','download_with_progressbar']
--- a/deploy/cpp_infer/docs/windows_vs2019_build.md
+++ b/deploy/cpp_infer/docs/windows_vs2019_build.md
@@ -14,7 +14,7 @@ PaddleOCR在Windows 平台下基于`Visual Studio 2019 Community` 进行了测

 ### Step1: 下载PaddlePaddle C++ 预测库 fluid_inference

-PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/guides/05_inference_deployment/inference/windows_cpp_inference.html)
+PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本，请根据实际情况下载:  [C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html#windows)

 解压后`D:\projects\fluid_inference`目录包含内容为：
 ```

--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
@@ -5,23 +5,29 @@
 ### 1.1 安装whl包

 pip安装
+
 ```bash
 pip install "paddleocr>=2.0.1" # 推荐使用2.0.1+版本
 ```

 本地构建并安装
+
 ```bash
 python3 setup.py bdist_wheel
 pip3 install dist/paddleocr-x.x.x-py3-none-any.whl # x.x.x是paddleocr的版本号
 ```

 ## 2 使用
+
 ### 2.1 代码使用
+
 paddleocr whl包会自动下载ppocr轻量级模型作为默认模型，可以根据第3节**自定义模型**进行自定义更换。

 * 检测+方向分类器+识别全流程
+
 ```python
 from paddleocr import PaddleOCR, draw_ocr
+
 # Paddleocr目前支持中英文、英文、法语、德语、韩语、日语，可以通过修改lang参数进行切换
 # 参数依次为`ch`, `en`, `french`, `german`, `korean`, `japan`。
 ocr = PaddleOCR(use_angle_cls=True, lang="ch")  # need to run only once to download and load model into memory
@@ -32,6 +38,7 @@ for line in result:

 # 显示结果
 from PIL import Image
+
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -40,31 +47,36 @@ im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc
 im_show = Image.fromarray(im_show)
 im_show.save('result.jpg')
 ```
+
 结果是一个list，每个item包含了文本框，文字和识别置信度
+
 ```bash
 [[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
 [[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
 [[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['（45元/每公斤，100公斤起订）', 0.9676722]]
 ......
 ```
+
 结果可视化

 <div align="center">
    <img src="../imgs_results/whl/11_det_rec.jpg" width="800">
 </div>

-
 * 检测+识别
+
 ```python
 from paddleocr import PaddleOCR, draw_ocr
+
 ocr = PaddleOCR()  # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs/11.jpg'
-result = ocr.ocr(img_path,cls=False)
+result = ocr.ocr(img_path, cls=False)
 for line in result:
    print(line)

 # 显示结果
 from PIL import Image
+
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -73,37 +85,45 @@ im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc
 im_show = Image.fromarray(im_show)
 im_show.save('result.jpg')
 ```
+
 结果是一个list，每个item包含了文本框，文字和识别置信度
+
 ```bash
 [[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
 [[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
 [[[24.0, 109.0], [333.0, 109.0], [333.0, 136.0], [24.0, 136.0]], ['（45元/每公斤，100公斤起订）', 0.9676722]]
 ......
 ```
+
 结果可视化

 <div align="center">
    <img src="../imgs_results/whl/11_det_rec.jpg" width="800">
 </div>

-
 * 方向分类器+识别
+
 ```python
 from paddleocr import PaddleOCR
+
 ocr = PaddleOCR(use_angle_cls=True)  # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg'
 result = ocr.ocr(img_path, det=False, cls=True)
 for line in result:
    print(line)
 ```
+
 结果是一个list，每个item只包含识别结果和识别置信度
+
 ```bash
 ['韩国小馆', 0.9907421]
 ```

 * 单独执行检测
+
 ```python
 from paddleocr import PaddleOCR, draw_ocr
+
 ocr = PaddleOCR()  # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs/11.jpg'
 result = ocr.ocr(img_path, rec=False)
@@ -118,13 +138,16 @@ im_show = draw_ocr(image, result, txts=None, scores=None, font_path='/path/to/Pa
 im_show = Image.fromarray(im_show)
 im_show.save('result.jpg')
 ```
+
 结果是一个list，每个item只包含文本框
+
 ```bash
 [[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]]
 [[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]]
 [[128.0, 397.0], [273.0, 397.0], [273.0, 414.0], [128.0, 414.0]]
 ......
 ```
+
 结果可视化


@@ -133,29 +156,37 @@ im_show.save('result.jpg')
 </div>

 * 单独执行识别
+
 ```python
 from paddleocr import PaddleOCR
+
 ocr = PaddleOCR()  # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg'
 result = ocr.ocr(img_path, det=False)
 for line in result:
    print(line)
 ```
+
 结果是一个list，每个item只包含识别结果和识别置信度
+
 ```bash
 ['韩国小馆', 0.9907421]
 ```

 * 单独执行方向分类器
+
 ```python
 from paddleocr import PaddleOCR
+
 ocr = PaddleOCR(use_angle_cls=True)  # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs_words/ch/word_1.jpg'
 result = ocr.ocr(img_path, det=False, rec=False, cls=True)
 for line in result:
    print(line)
 ```
+
 结果是一个list，每个item只包含分类结果和分类置信度
+
 ```bash
 ['0', 0.9999924]
 ```
@@ -163,15 +194,19 @@ for line in result:
 ### 2.2 通过命令行使用

 查看帮助信息
+
 ```bash
 paddleocr -h
 ```

 * 检测+方向分类器+识别全流程
+
 ```bash
 paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true
 ```
+
 结果是一个list，每个item包含了文本框，文字和识别置信度
+
 ```bash
 [[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
 [[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
@@ -180,10 +215,13 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true
 ```

 * 检测+识别
+
 ```bash
 paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg
 ```
+
 结果是一个list，每个item包含了文本框，文字和识别置信度
+
 ```bash
 [[[24.0, 36.0], [304.0, 34.0], [304.0, 72.0], [24.0, 74.0]], ['纯臻营养护发素', 0.964739]]
 [[[24.0, 80.0], [172.0, 80.0], [172.0, 104.0], [24.0, 104.0]], ['产品信息/参数', 0.98069626]]
@@ -192,20 +230,25 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg
 ```

 * 方向分类器+识别
+
 ```bash
 paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false
 ```

 结果是一个list，每个item只包含识别结果和识别置信度
+
 ```bash
 ['韩国小馆', 0.9907421]
 ```

 * 单独执行检测
+
 ```bash
 paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false
 ```
+
 结果是一个list，每个item只包含文本框
+
 ```bash
 [[26.0, 457.0], [137.0, 457.0], [137.0, 477.0], [26.0, 477.0]]
 [[25.0, 425.0], [372.0, 425.0], [372.0, 448.0], [25.0, 448.0]]
@@ -214,34 +257,42 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false
 ```

 * 单独执行识别
+
 ```bash
 paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false
 ```

 结果是一个list，每个item只包含识别结果和识别置信度
+
 ```bash
 ['韩国小馆', 0.9907421]
 ```

 * 单独执行方向分类器
+
 ```bash
 paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false --rec false
 ```

 结果是一个list，每个item只包含分类结果和分类置信度
+
 ```bash
 ['0', 0.9999924]
 ```

 ## 3 自定义模型
-当内置模型无法满足需求时，需要使用到自己训练的模型。
-首先，参照[inference.md](./inference.md) 第一节转换将检测、分类和识别模型转换为inference模型，然后按照如下方式使用
+
+当内置模型无法满足需求时，需要使用到自己训练的模型。 首先，参照[inference.md](./inference.md) 第一节转换将检测、分类和识别模型转换为inference模型，然后按照如下方式使用

 ### 3.1 代码使用
+
 ```python
 from paddleocr import PaddleOCR, draw_ocr
+
 # 模型路径下必须含有model和params文件
-ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}', rec_char_dict_path='{your_rec_char_dict_path}', cls_model_dir='{your_cls_model_dir}', use_angle_cls=True)
+ocr = PaddleOCR(det_model_dir='{your_det_model_dir}', rec_model_dir='{your_rec_model_dir}',
+                rec_char_dict_path='{your_rec_char_dict_path}', cls_model_dir='{your_cls_model_dir}',
+                use_angle_cls=True)
 img_path = 'PaddleOCR/doc/imgs/11.jpg'
 result = ocr.ocr(img_path, cls=True)
 for line in result:
@@ -249,6 +300,7 @@ for line in result:

 # 显示结果
 from PIL import Image
+
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -269,8 +321,10 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
 ### 4.1 网络图片

 - 代码使用
+
 ```python
-from paddleocr import PaddleOCR, draw_ocr
+from paddleocr import PaddleOCR, draw_ocr, download_with_progressbar
+
 # Paddleocr目前支持中英文、英文、法语、德语、韩语、日语，可以通过修改lang参数进行切换
 # 参数依次为`ch`, `en`, `french`, `german`, `korean`, `japan`。
 ocr = PaddleOCR(use_angle_cls=True, lang="ch")  # need to run only once to download and load model into memory
@@ -281,7 +335,9 @@ for line in result:

 # 显示结果
 from PIL import Image
-image = Image.open(img_path).convert('RGB')
+
+download_with_progressbar(img_path, 'tmp.jpg')
+image = Image.open('tmp.jpg').convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
 scores = [line[1][1] for line in result]
@@ -289,15 +345,21 @@ im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc
 im_show = Image.fromarray(im_show)
 im_show.save('result.jpg')
 ```
+
 - 命令行模式
+
 ```bash
 paddleocr --image_dir http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg --use_angle_cls=true
 ```

 ### 4.2 numpy数组
+
 仅通过代码使用时支持numpy数组作为输入
+
 ```python
+import cv2
 from paddleocr import PaddleOCR, draw_ocr
+
 # Paddleocr目前支持中英文、英文、法语、德语、韩语、日语，可以通过修改lang参数进行切换
 # 参数依次为`ch`, `en`, `french`, `german`, `korean`, `japan`。
 ocr = PaddleOCR(use_angle_cls=True, lang="ch")  # need to run only once to download and load model into memory
@@ -310,6 +372,7 @@ for line in result:

 # 显示结果
 from PIL import Image
+
 image = Image.open(img_path).convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
@@ -356,3 +419,4 @@ im_show.save('result.jpg')
 | rec                     | 前向时是否启动识别                                                                                                                                                                                                   | TRUE                    |
 | cls                     | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类)                                                                                                                                                                                                | FALSE                    |
 | show_log                     | 是否打印det和rec等信息                                                                                                                                                                                                | FALSE                    |
+| type                     | 执行ocr或者表格结构化, 值可选['ocr','structure']                                                                                                                                                                                             | ocr                    |
--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
@@ -305,7 +305,8 @@ paddleocr --image_dir http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-f
 Support numpy array as input only when used by code

 ```python
-from paddleocr import PaddleOCR, draw_ocr
+import cv2
+from paddleocr import PaddleOCR, draw_ocr, download_with_progressbar
 ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
 img_path = 'PaddleOCR/doc/imgs/11.jpg'
 img = cv2.imread(img_path)
@@ -316,7 +317,9 @@ for line in result:

 # show result
 from PIL import Image
-image = Image.open(img_path).convert('RGB')
+
+download_with_progressbar(img_path, 'tmp.jpg')
+image = Image.open('tmp.jpg').convert('RGB')
 boxes = [line[0] for line in result]
 txts = [line[1][0] for line in result]
 scores = [line[1][1] for line in result]
@@ -362,5 +365,5 @@ im_show.save('result.jpg')
 | det                     | Enable detction when `ppocr.ocr` func exec                                                                                                                                                                                                   | TRUE                    |
 | rec                     | Enable recognition when `ppocr.ocr` func exec                                                                                                                                                                                                   | TRUE                    |
 | cls                     | Enable classification when `ppocr.ocr` func exec((Use use_angle_cls in command line mode to control whether to start classification in the forward direction)                                                                                                                                                                                                   | FALSE                    |
-| show_log                     | Whether to print log in det and rec 
-                                                                                                                                                                                               | FALSE                    |
\ No newline at end of file
+| show_log                     | Whether to print log in det and rec | FALSE                    |
+| type                     | Perform ocr or table structuring, the value is selected in ['ocr','structure']                                                                                                                                                                                             | ocr                    |
\ No newline at end of file
--- a/doc/joinus.PNG
+++ b/doc/joinus.PNG
--- a/doc/table/PaddleDetection_config.png
+++ b/doc/table/PaddleDetection_config.png
--- a/doc/table/layout.jpg
+++ b/doc/table/layout.jpg
--- a/doc/table/pipeline.jpg
+++ b/doc/table/pipeline.jpg
--- a/doc/table/pipeline.png
+++ b/doc/table/pipeline.png
--- a/doc/table/pipeline_en.jpg
+++ b/doc/table/pipeline_en.jpg
--- a/doc/table/ppstructure.GIF
+++ b/doc/table/ppstructure.GIF
--- a/doc/table/result_all.jpg
+++ b/doc/table/result_all.jpg
--- a/doc/table/result_text.jpg
+++ b/doc/table/result_text.jpg
--- a/doc/table/table.jpg
+++ b/doc/table/table.jpg
--- a/doc/table/tableocr_pipeline.jpg
+++ b/doc/table/tableocr_pipeline.jpg
--- a/doc/table/tableocr_pipeline.png
+++ b/doc/table/tableocr_pipeline.png
--- a/doc/table/tableocr_pipeline_en.jpg
+++ b/doc/table/tableocr_pipeline_en.jpg
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -29,16 +29,19 @@ from ppocr.utils.logging import get_logger
 logger = get_logger()
 from ppocr.utils.utility import check_and_read_gif, get_image_file_list
 from ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url
-from tools.infer.utility import draw_ocr, init_args, str2bool
+from tools.infer.utility import draw_ocr, str2bool
+from ppstructure.utility import init_args, draw_structure_result
+from ppstructure.predict_system import OCRSystem, save_structure_res

-__all__ = ['PaddleOCR']
+__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res','download_with_progressbar']

 model_urls = {
    'det': {
        'ch':
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
        'en':
-            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar'
+            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar',
+        'structure': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar'
    },
    'rec': {
        'ch': {
@@ -110,14 +113,21 @@ model_urls = {
            'url':
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
+        },
+        'structure': {
+            'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar',
+            'dict_path': 'ppocr/utils/dict/table_dict.txt'
        }
    },
-    'cls':
-        'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar'
+    'cls': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
+    'table': {
+        'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar',
+        'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
+    }
 }

 SUPPORT_DET_MODEL = ['DB']
-VERSION = '2.1'
+VERSION = '2.2'
 SUPPORT_REC_MODEL = ['CRNN']
 BASE_DIR = os.path.expanduser("~/.paddleocr/")

@@ -129,9 +139,10 @@ def parse_args(mMain=True):
    parser.add_argument("--lang", type=str, default='ch')
    parser.add_argument("--det", type=str2bool, default=True)
    parser.add_argument("--rec", type=str2bool, default=True)
+    parser.add_argument("--type", type=str, default='ocr')

    for action in parser._actions:
-        if action.dest == 'rec_char_dict_path':
+        if action.dest in ['rec_char_dict_path', 'table_char_dict_path']:
            action.default = None
    if mMain:
        return parser.parse_args()
@@ -142,19 +153,7 @@ def parse_args(mMain=True):
        return argparse.Namespace(**inference_args_dict)


-class PaddleOCR(predict_system.TextSystem):
-    def __init__(self, **kwargs):
-        """
-        paddleocr package
-        args:
-            **kwargs: other params show in paddleocr --help
-        """
-        params = parse_args(mMain=False)
-        params.__dict__.update(**kwargs)
-        if not params.show_log:
-            logger.setLevel(logging.INFO)
-        self.use_angle_cls = params.use_angle_cls
-        lang = params.lang
+def parse_lang(lang):
    latin_lang = [
        'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga',
        'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms',
@@ -183,23 +182,36 @@ class PaddleOCR(predict_system.TextSystem):
        model_urls['rec'].keys(), lang)
    if lang == "ch":
        det_lang = "ch"
+    elif lang == 'structure':
+        det_lang = 'structure'
    else:
        det_lang = "en"
-        use_inner_dict = False
-        if params.rec_char_dict_path is None:
-            use_inner_dict = True
-            params.rec_char_dict_path = model_urls['rec'][lang][
-                'dict_path']
+    return lang, det_lang
+
+
+class PaddleOCR(predict_system.TextSystem):
+    def __init__(self, **kwargs):
+        """
+        paddleocr package
+        args:
+            **kwargs: other params show in paddleocr --help
+        """
+        params = parse_args(mMain=False)
+        params.__dict__.update(**kwargs)
+        if not params.show_log:
+            logger.setLevel(logging.INFO)
+        self.use_angle_cls = params.use_angle_cls
+        lang, det_lang = parse_lang(params.lang)

        # init model dir
        params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
-                                                              os.path.join(BASE_DIR, VERSION, 'det', det_lang),
+                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
                                                              model_urls['det'][det_lang])
        params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
-                                                              os.path.join(BASE_DIR, VERSION, 'rec', lang),
+                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
                                                              model_urls['rec'][lang]['url'])
        params.cls_model_dir, cls_url = confirm_model_dir_url(params.cls_model_dir,
-                                                              os.path.join(BASE_DIR, VERSION, 'cls'),
+                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'cls'),
                                                              model_urls['cls'])
        # download model
        maybe_download(params.det_model_dir, det_url)
@@ -212,9 +224,9 @@ class PaddleOCR(predict_system.TextSystem):
        if params.rec_algorithm not in SUPPORT_REC_MODEL:
            logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
            sys.exit(0)
-        if use_inner_dict:
-            params.rec_char_dict_path = str(
-                Path(__file__).parent / params.rec_char_dict_path)
+
+        if params.rec_char_dict_path is None:
+            params.rec_char_dict_path = str(Path(__file__).parent / model_urls['rec'][lang]['dict_path'])

        print(params)
        # init det_model and rec_model
@@ -272,6 +284,59 @@ class PaddleOCR(predict_system.TextSystem):
            return rec_res


+class PPStructure(OCRSystem):
+    def __init__(self, **kwargs):
+        params = parse_args(mMain=False)
+        params.__dict__.update(**kwargs)
+        if not params.show_log:
+            logger.setLevel(logging.INFO)
+        lang, det_lang = parse_lang(params.lang)
+
+        # init model dir
+        params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
+                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
+                                                              model_urls['det'][det_lang])
+        params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
+                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
+                                                              model_urls['rec'][lang]['url'])
+        params.table_model_dir, table_url = confirm_model_dir_url(params.table_model_dir,
+                                                                  os.path.join(BASE_DIR, VERSION, 'ocr', 'table'),
+                                                                  model_urls['table']['url'])
+        # download model
+        maybe_download(params.det_model_dir, det_url)
+        maybe_download(params.rec_model_dir, rec_url)
+        maybe_download(params.table_model_dir, table_url)
+
+        if params.rec_char_dict_path is None:
+            params.rec_char_dict_path = str(Path(__file__).parent / model_urls['rec'][lang]['dict_path'])
+        if params.table_char_dict_path is None:
+            params.table_char_dict_path = str(Path(__file__).parent / model_urls['table']['dict_path'])
+
+        print(params)
+        super().__init__(params)
+
+    def __call__(self, img):
+        if isinstance(img, str):
+            # download net image
+            if img.startswith('http'):
+                download_with_progressbar(img, 'tmp.jpg')
+                img = 'tmp.jpg'
+            image_file = img
+            img, flag = check_and_read_gif(image_file)
+            if not flag:
+                with open(image_file, 'rb') as f:
+                    np_arr = np.frombuffer(f.read(), dtype=np.uint8)
+                    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
+            if img is None:
+                logger.error("error in loading image:{}".format(image_file))
+                return None
+        if isinstance(img, np.ndarray) and len(img.shape) == 2:
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+
+        res = super().__call__(img)
+        return res
+
+
 def main():
    # for cmd
    args = parse_args(mMain=True)
@@ -284,14 +349,29 @@ def main():
    if len(image_file_list) == 0:
        logger.error('no images find in {}'.format(args.image_dir))
        return
+    if args.type == 'ocr':
+        engine = PaddleOCR(**(args.__dict__))
+    elif args.type == 'structure':
+        engine = PPStructure(**(args.__dict__))
+    else:
+        raise NotImplementedError

-    ocr_engine = PaddleOCR(**(args.__dict__))
    for img_path in image_file_list:
+        img_name = os.path.basename(img_path).split('.')[0]
        logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
-        result = ocr_engine.ocr(img_path,
+        if args.type == 'ocr':
+            result = engine.ocr(img_path,
                                det=args.det,
                                rec=args.rec,
                                cls=args.use_angle_cls)
            if result is not None:
                for line in result:
                    logger.info(line)
+        elif args.type == 'structure':
+            result = engine(img_path)
+            save_structure_res(result, args.output, img_name)
+
+            for item in result:
+                item.pop('img')
+                logger.info(item)
+                
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -19,6 +19,7 @@ from __future__ import unicode_literals

 import numpy as np
 import string
+import json


 class ClsLabelEncode(object):
@@ -39,7 +40,6 @@ class DetLabelEncode(object):
        pass

    def __call__(self, data):
-        import json
        label = data['label']
        label = json.loads(label)
        nBox = len(label)
@@ -53,6 +53,8 @@ class DetLabelEncode(object):
                txt_tags.append(True)
            else:
                txt_tags.append(False)
+        if len(boxes) == 0:
+            return None
        boxes = self.expand_points_num(boxes)
        boxes = np.array(boxes, dtype=np.float32)
        txt_tags = np.array(txt_tags, dtype=np.bool)
@@ -352,19 +354,22 @@ class SRNLabelEncode(BaseRecLabelEncode):
                          % beg_or_end
        return idx

+
 class TableLabelEncode(object):
    """ Convert between text-label and text-index """
+
    def __init__(self,
                 max_text_length,
                 max_elem_length,
                 max_cell_num,
                 character_dict_path,
-        span_weight = 1.0, 
+                 span_weight=1.0,
                 **kwargs):
        self.max_text_length = max_text_length
        self.max_elem_length = max_elem_length
        self.max_cell_num = max_cell_num
-        list_character, list_elem = self.load_char_elem_dict(character_dict_path)
+        list_character, list_elem = self.load_char_elem_dict(
+            character_dict_path)
        list_character = self.add_special_char(list_character)
        list_elem = self.add_special_char(list_elem)
        self.dict_character = {}
@@ -380,14 +385,15 @@ class TableLabelEncode(object):
        list_elem = []
        with open(character_dict_path, "rb") as fin:
            lines = fin.readlines()
-            substr = lines[0].decode('utf-8').strip("\n").split("\t")
+            substr = lines[0].decode('utf-8').strip("\r\n").split("\t")
            character_num = int(substr[0])
            elem_num = int(substr[1])
-            for cno in range(1, 1+character_num):
-                character = lines[cno].decode('utf-8').strip("\n")
+
+            for cno in range(1, 1 + character_num):
+                character = lines[cno].decode('utf-8').strip("\r\n")
                list_character.append(character)
-            for eno in range(1+character_num, 1+character_num+elem_num):
-                elem = lines[eno].decode('utf-8').strip("\n")
+            for eno in range(1 + character_num, 1 + character_num + elem_num):
+                elem = lines[eno].decode('utf-8').strip("\r\n")
                list_elem.append(elem)
        return list_character, list_elem

@@ -412,18 +418,22 @@ class TableLabelEncode(object):
            return None
        elem_num = len(structure)
        structure = [0] + structure + [len(self.dict_elem) - 1]
-        structure = structure + [0] * (self.max_elem_length + 2 - len(structure))
+        structure = structure + [0] * (self.max_elem_length + 2 - len(structure)
+                                       )
        structure = np.array(structure)
        data['structure'] = structure
        elem_char_idx1 = self.dict_elem['<td>']
        elem_char_idx2 = self.dict_elem['<td']
        span_idx_list = self.get_span_idx_list()
-        td_idx_list = np.logical_or(structure == elem_char_idx1, structure == elem_char_idx2)
+        td_idx_list = np.logical_or(structure == elem_char_idx1,
+                                    structure == elem_char_idx2)
        td_idx_list = np.where(td_idx_list)[0]

-        structure_mask = np.ones((self.max_elem_length + 2, 1), dtype=np.float32)
+        structure_mask = np.ones(
+            (self.max_elem_length + 2, 1), dtype=np.float32)
        bbox_list = np.zeros((self.max_elem_length + 2, 4), dtype=np.float32)
-        bbox_list_mask = np.zeros((self.max_elem_length + 2, 1), dtype=np.float32)
+        bbox_list_mask = np.zeros(
+            (self.max_elem_length + 2, 1), dtype=np.float32)
        img_height, img_width, img_ch = data['image'].shape
        if len(span_idx_list) > 0:
            span_weight = len(td_idx_list) * 1.0 / len(span_idx_list)
@@ -450,9 +460,11 @@ class TableLabelEncode(object):
        char_end_idx = self.get_beg_end_flag_idx('end', 'char')
        elem_beg_idx = self.get_beg_end_flag_idx('beg', 'elem')
        elem_end_idx = self.get_beg_end_flag_idx('end', 'elem')
-        data['sp_tokens'] = np.array([char_beg_idx, char_end_idx, elem_beg_idx, 
-            elem_end_idx, elem_char_idx1, elem_char_idx2, self.max_text_length, 
-            self.max_elem_length, self.max_cell_num, elem_num])
+        data['sp_tokens'] = np.array([
+            char_beg_idx, char_end_idx, elem_beg_idx, elem_end_idx,
+            elem_char_idx1, elem_char_idx2, self.max_text_length,
+            self.max_elem_length, self.max_cell_num, elem_num
+        ])
        return data

    def encode(self, text, char_or_elem):
@@ -509,4 +521,3 @@ class TableLabelEncode(object):
            assert False, "Unsupport type %s in char_or_elem" \
                              % char_or_elem
        return idx
-    
\ No newline at end of file