Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into dygraph

32fdd08b · LDOUBLEV · b77f9ec0 · b1d26ded · 32fdd08b · 32fdd08b
Commit 32fdd08b authored Dec 01, 2021 by LDOUBLEV
20 changed files
--- a/benchmark/run_benchmark_det.sh
+++ b/benchmark/run_benchmark_det.sh
@@ -17,7 +17,7 @@ function _set_params(){
    skip_steps=2                 # 解析日志，有些模型前几个step耗时长，需要跳过                                    (必填)
    keyword="ips:"               # 解析日志，筛选出数据所在行的关键字                                             (必填)
    index="1"
-    model_name=${model_item}_${run_mode}_bs${batch_size}_${fp_item}        # model_item 用于yml文件名匹配，model_name 用于数据入库前端展示
+    model_name=${model_item}_bs${batch_size}_${fp_item}        # model_item 用于yml文件名匹配，model_name 用于数据入库前端展示
 #   以下不用修改   
    device=${CUDA_VISIBLE_DEVICES//,/ }
    arr=(${device})

--- a/benchmark/run_det.sh
+++ b/benchmark/run_det.sh
@@ -2,6 +2,7 @@
 # 提供可稳定复现性能的脚本，默认在标准docker环境内py37执行： paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7  paddle=2.1.2  py=37
 # 执行目录: ./PaddleOCR
 # 1 安装该模型需要的依赖 (如需开启优化策略请注明)
+log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}
 python -m pip install -r requirements.txt
 # 2 拷贝该模型需要数据、预训练模型
 wget -P ./train_data/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data  && tar xf icdar2015.tar && cd ../
@@ -12,18 +13,22 @@ wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dyg
 model_mode_list=(det_res18_db_v2.0 det_r50_vd_east det_r50_vd_pse)
 fp_item_list=(fp32)
-bs_list=(8 16)
 for model_mode in ${model_mode_list[@]}; do
      for fp_item in ${fp_item_list[@]}; do
+          if [ ${model_mode} == "det_r50_vd_east" ]; then
+              bs_list=(16)
+          else
+              bs_list=(8 16)
+          fi
          for bs_item in ${bs_list[@]}; do
            echo "index is speed, 1gpus, begin, ${model_name}"
            run_mode=sp
-            log_name=ocr_${model_mode}_${run_mode}_bs${bs_item}_${fp_item}
+            log_name=ocr_${model_mode}_bs${bs_item}_${fp_item}_${run_mode}
            CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 1 ${model_mode} | tee ${log_path}/${log_name}_speed_1gpus 2>&1    #  (5min)
            sleep 60
            echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
            run_mode=mp
-            log_name=ocr_${model_mode}_${run_mode}_bs${bs_item}_${fp_item}
+            log_name=ocr_${model_mode}_bs${bs_item}_${fp_item}_${run_mode}
            CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} | tee ${log_path}/${log_name}_speed_8gpus8p 2>&1
            sleep 60
            done

--- a/deploy/slim/prune/export_prune_model.py
+++ b/deploy/slim/prune/export_prune_model.py
@@ -52,12 +52,17 @@ def main(config, device, logger, vdl_writer):
        config['Architecture']["Head"]['out_channels'] = char_num
    model = build_model(config['Architecture'])
-    flops = paddle.flops(model, [1, 3, 640, 640])
+    if config['Architecture']['model_type'] == 'det':
-    logger.info(f"FLOPs before pruning: {flops}")
+        input_shape = [1, 3, 640, 640]
+    elif config['Architecture']['model_type'] == 'rec':
+        input_shape = [1, 3, 32, 320]
+    flops = paddle.flops(model, input_shape)
+    logger.info("FLOPs before pruning: {}".format(flops))
    from paddleslim.dygraph import FPGMFilterPruner
    model.train()
-    pruner = FPGMFilterPruner(model, [1, 3, 640, 640])
+    pruner = FPGMFilterPruner(model, input_shape)
    # build metric
    eval_class = build_metric(config['Metric'])
@@ -65,8 +70,13 @@ def main(config, device, logger, vdl_writer):
    def eval_fn():
        metric = program.eval(model, valid_dataloader, post_process_class,
                              eval_class)
-        logger.info(f"metric['hmean']: {metric['hmean']}")
+        if config['Architecture']['model_type'] == 'det':
-        return metric['hmean']
+            main_indicator = 'hmean'
+        else:
+            main_indicator = 'acc'
+        logger.info("metric[{}]: {}".format(main_indicator, metric[
+            main_indicator]))
+        return metric[main_indicator]
    params_sensitive = pruner.sensitive(
        eval_func=eval_fn,
@@ -81,18 +91,22 @@ def main(config, device, logger, vdl_writer):
    # calculate pruned params's ratio
    params_sensitive = pruner._get_ratios_by_loss(params_sensitive, loss=0.02)
    for key in params_sensitive.keys():
-        logger.info(f"{key}, {params_sensitive[key]}")
+        logger.info("{}, {}".format(key, params_sensitive[key]))
    plan = pruner.prune_vars(params_sensitive, [0])
-    flops = paddle.flops(model, [1, 3, 640, 640])
+    flops = paddle.flops(model, input_shape)
-    logger.info(f"FLOPs after pruning: {flops}")
+    logger.info("FLOPs after pruning: {}".format(flops))
    # load pretrain model
    load_model(config, model)
    metric = program.eval(model, valid_dataloader, post_process_class,
                          eval_class)
-    logger.info(f"metric['hmean']: {metric['hmean']}")
+    if config['Architecture']['model_type'] == 'det':
+        main_indicator = 'hmean'
+    else:
+        main_indicator = 'acc'
+    logger.info("metric['']: {}".format(main_indicator, metric[main_indicator]))
    # start export model
    from paddle.jit import to_static

--- a/deploy/slim/prune/sensitivity_anal.py
+++ b/deploy/slim/prune/sensitivity_anal.py
@@ -73,13 +73,18 @@ def main(config, device, logger, vdl_writer):
        char_num = len(getattr(post_process_class, 'character'))
        config['Architecture']["Head"]['out_channels'] = char_num
    model = build_model(config['Architecture'])
+    if config['Architecture']['model_type'] == 'det':
+        input_shape = [1, 3, 640, 640]
+    elif config['Architecture']['model_type'] == 'rec':
+        input_shape = [1, 3, 32, 320]
+    flops = paddle.flops(model, input_shape)
-    flops = paddle.flops(model, [1, 3, 640, 640])
    logger.info("FLOPs before pruning: {}".format(flops))
    from paddleslim.dygraph import FPGMFilterPruner
    model.train()
-    pruner = FPGMFilterPruner(model, [1, 3, 640, 640])
+    pruner = FPGMFilterPruner(model, input_shape)
    # build loss
    loss_class = build_loss(config['Loss'])
@@ -107,8 +112,14 @@ def main(config, device, logger, vdl_writer):
    def eval_fn():
        metric = program.eval(model, valid_dataloader, post_process_class,
                              eval_class, False)
-        logger.info("metric['hmean']: {}".format(metric['hmean']))
+        if config['Architecture']['model_type'] == 'det':
-        return metric['hmean']
+            main_indicator = 'hmean'
+        else:
+            main_indicator = 'acc'
+        logger.info("metric[{}]: {}".format(main_indicator, metric[
+            main_indicator]))
+        return metric[main_indicator]
    run_sensitive_analysis = False
    """
@@ -149,7 +160,7 @@ def main(config, device, logger, vdl_writer):
    plan = pruner.prune_vars(params_sensitive, [0])
-    flops = paddle.flops(model, [1, 3, 640, 640])
+    flops = paddle.flops(model, input_shape)
    logger.info("FLOPs after pruning: {}".format(flops))
    # start train

--- a/doc/doc_ch/code_and_doc.md
+++ b/doc/doc_ch/code_and_doc.md
+# 附录
+本附录包含了Python、文档规范以及Pull Request流程，请各位开发者遵循相关内容
+- [附录1：Python代码规范](#附录1)
+- [附录2：文档规范](#附录2)
+- [附录3：Pull Request说明](#附录3)
+<a name="附录1"></a>
+## 附录1：Python代码规范
+PaddleOCR的Python代码遵循 [PEP8规范](https://www.python.org/dev/peps/pep-0008/)，其中一些关注的重点包括如下内容
+- 空格 
+  - 空格应该加在逗号、分号、冒号前，而非他们的后面
+    ```python
+    # 正确：
+    print(x, y)
+    # 错误：
+    print(x , y)
+    ```
+  - 在函数中指定关键字参数或默认参数值时, 不要在其两侧使用空格
+    ```python
+    # 正确：
+    def complex(real, imag=0.0)
+    # 错误：
+    def complex(real, imag = 0.0)
+    ```
+- 注释
+  - 行内注释：行内注释使用 `#` 号表示，在代码与 `#` 之间需要空两个空格， `#`  与注释之间应当空一个空格，例如
+    ```python
+    x = x + 1  # Compensate for border
+    ```
+  - 函数和方法：每个函数的定义后的描述应该包括以下内容：
+    - 函数描述：函数的作用，输入输出的
+    - Args：每个参数的名字以及对该参数的描述
+    - Returns：返回值的含义和类型
+    ```python
+    def fetch_bigtable_rows(big_table, keys, other_silly_variable=None):
+        """Fetches rows from a Bigtable.
+        Retrieves rows pertaining to the given keys from the Table instance
+        represented by big_table.  Silly things may happen if
+        other_silly_variable is not None.
+        Args:
+            big_table: An open Bigtable Table instance.
+            keys: A sequence of strings representing the key of each table row
+                to fetch.
+            other_silly_variable: Another optional variable, that has a much
+                longer name than the other args, and which does nothing.
+        Returns:
+            A dict mapping keys to the corresponding table row data
+            fetched. Each row is represented as a tuple of strings. For
+            example:
+            {'Serak': ('Rigel VII', 'Preparer'),
+             'Zim': ('Irk', 'Invader'),
+             'Lrrr': ('Omicron Persei 8', 'Emperor')}
+            If a key from the keys argument is missing from the dictionary,
+            then that row was not found in the table.
+        """
+        pass
+    ```
+<a name="附录2"></a>
+## 附录2：文档规范
+### 2.1 总体说明
+- 文档位置：如果您增加的新功能可以补充在原有的Markdown文件中，请**不要重新新建**一个文件。如果您对添加的位置不清楚，可以先PR代码，然后在commit中询问官方人员。
+- 新增Markdown文档名称：使用英文描述文档内容，一般由小写字母与下划线组合而成，例如  `add_new_algorithm.md`
+- 新增Markdown文档格式：目录 - 正文 - FAQ
+  > 目录生成方法可以使用 [此网站](https://ecotrust-canada.github.io/markdown-toc/) 将md内容复制之后自动提取目录，然后在md文件的每个标题前添加 `<a name="XXXX"></a>` 
+- 中英双语：任何对文档的改动或新增都需要分别在中文和英文文档上进行。
+### 2.2 格式规范
+- 标题格式：文档标题格式按照：阿拉伯数字小数点组合 - 空格 - 标题的格式（例如 `2.1 XXXX` ， `2. XXXX`）
+- 代码块：通过代码块格式展示需要运行的代码，在代码块前描述命令参数的含义。例如：
+  > 检测+方向分类器+识别全流程：设置方向分类器参数 `--use_angle_cls true` 后可对竖排文本进行识别。
+  >
+  > ```
+  > paddleocr --image_dir ./imgs/11.jpg --use_angle_cls true
+  > ```
+- 变量引用：如果在行内引用到代码变量或命令参数，需要用行内代码表示，例如上方  `--use_angle_cls true` ，并在前后各空一格
+- 补充说明：通过引用格式 `>` 补充说明，或对注意事项进行说明
+- 图片：如果在说明文档中增加了图片，请规范图片的命名形式（描述图片内容），并将图片添加在 `doc/` 下
+<a name="附录3"></a>
+## 附录3：Pull Request说明
+### 3.1 PaddleOCR分支说明
+PaddleOCR未来将维护2种分支，分别为：
+- release/x.x系列分支：为稳定的发行版本分支，也是默认分支。PaddleOCR会根据功能更新情况发布新的release分支，同时适配Paddle的release版本。随着版本迭代，release/x.x系列分支会越来越多，默认维护最新版本的release分支。
+- dygraph分支：为开发分支，适配Paddle动态图的dygraph版本，主要用于开发新功能。如果有同学需要进行二次开发，请选择dygraph分支。为了保证dygraph分支能在需要的时候拉出release/x.x分支，dygraph分支的代码只能使用Paddle最新release分支中有效的api。也就是说，如果Paddle dygraph分支中开发了新的api，但尚未出现在release分支代码中，那么请不要在PaddleOCR中使用。除此之外，对于不涉及api的性能优化、参数调整、策略更新等，都可以正常进行开发。
+PaddleOCR的历史分支，未来将不再维护。考虑到一些同学可能仍在使用，这些分支还会继续保留：
+- develop分支：这个分支曾用于静态图的开发与测试，目前兼容>=1.7版本的Paddle。如果有特殊需求，要适配旧版本的Paddle，那还可以使用这个分支，但除了修复bug外不再更新代码。
+PaddleOCR欢迎大家向repo中积极贡献代码，下面给出一些贡献代码的基本流程。
+### 3.2 PaddleOCR代码提交流程与规范
+> 如果你熟悉Git使用，可以直接跳转到 [3.2.10 提交代码的一些约定](#提交代码的一些约定)
+#### 3.2.1 创建你的 `远程仓库`
+- 在PaddleOCR的 [GitHub首页](https://github.com/PaddlePaddle/PaddleOCR)，点击左上角 `Fork`  按钮，在你的个人目录下创建 `远程仓库`，比如`https://github.com/{your_name}/PaddleOCR`。
+![banner](/Users/zhulingfeng01/OCR/PaddleOCR/doc/banner.png)
+- 将 `远程仓库` Clone到本地
+```
+# 拉取develop分支的代码
+git clone https://github.com/{your_name}/PaddleOCR.git -b dygraph
+cd PaddleOCR
+```
+> 多数情况下clone失败是由于网络原因，请稍后重试或配置代理
+#### 3.2.2 和 `远程仓库` 建立连接
+首先查看当前 `远程仓库` 的信息。
+```
+git remote -v
+# origin    https://github.com/{your_name}/PaddleOCR.git (fetch)
+# origin    https://github.com/{your_name}/PaddleOCR.git (push)
+```
+只有clone的 `远程仓库` 的信息，也就是自己用户名下的 PaddleOCR，接下来我们创建一个原始 PaddleOCR 仓库的远程主机，命名为 upstream。
+```
+git remote add upstream https://github.com/PaddlePaddle/PaddleOCR.git
+```
+使用 `git remote -v` 查看当前 `远程仓库` 的信息，输出如下，发现包括了origin和upstream 2个 `远程仓库` 。
+```
+origin    https://github.com/{your_name}/PaddleOCR.git (fetch)
+origin    https://github.com/{your_name}/PaddleOCR.git (push)
+upstream    https://github.com/PaddlePaddle/PaddleOCR.git (fetch)
+upstream    https://github.com/PaddlePaddle/PaddleOCR.git (push)
+```
+这主要是为了后续在提交pull request(PR)时，始终保持本地仓库最新。
+#### 3.2.3 创建本地分支
+可以基于当前分支创建新的本地分支，命令如下。
+```
+git checkout -b new_branch
+```
+也可以基于远程或者上游的分支创建新的分支，命令如下。
+```
+# 基于用户远程仓库(origin)的develop创建new_branch分支
+git checkout -b new_branch origin/develop
+# 基于上游远程仓库(upstream)的develop创建new_branch分支
+# 如果需要从upstream创建新的分支，需要首先使用git fetch upstream获取上游代码
+git checkout -b new_branch upstream/develop
+```
+最终会显示切换到新的分支，输出信息如下
+```
+Branch new_branch set up to track remote branch develop from upstream.
+Switched to a new branch 'new_branch'
+```
+#### 3.2.4 使用pre-commit勾子
+Paddle 开发人员使用 pre-commit 工具来管理 Git 预提交钩子。 它可以帮助我们格式化源代码（C++，Python），在提交（commit）前自动检查一些基本事宜（如每个文件只有一个 EOL，Git 中不要添加大文件等）。
+pre-commit测试是 Travis-CI 中单元测试的一部分，不满足钩子的 PR 不能被提交到 PaddleOCR，首先安装并在当前目录运行它：
+```
+pip install pre-commit
+pre-commit install
+```
+ >  1. Paddle 使用 clang-format 来调整 C/C++ 源代码格式，请确保 `clang-format` 版本在 3.8 以上。
+ >
+ >  2. 通过pip install pre-commit和conda install -c conda-forge pre-commit安装的yapf稍有不同的，PaddleOCR 开发人员使用的是 `pip install pre-commit`。
+#### 3.2.5 修改与提交代码
+ 假设对PaddleOCR的 `README.md` 做了一些修改，可以通过 `git status` 查看改动的文件，然后使用 `git add` 添加改动文件。
+```
+git status # 查看改动文件
+git add README.md
+pre-commit
+```
+重复上述步骤，直到pre-comit格式检查不报错。如下所示。
+[![img](https://github.com/PaddlePaddle/PaddleClas/raw/release/2.3/docs/images/quick_start/community/003_precommit_pass.png)](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/images/quick_start/community/003_precommit_pass.png)
+使用下面的命令完成提交。
+```
+git commit -m "your commit info"
+```
+#### 3.2.6 保持本地仓库最新
+获取 upstream 的最新代码并更新当前分支。这里的upstream来自于2.2节的`和远程仓库建立连接`部分。
+```
+git fetch upstream
+# 如果是希望提交到其他分支，则需要从upstream的其他分支pull代码，这里是develop
+git pull upstream develop
+```
+#### 3.2.7 push到远程仓库
+```
+git push origin new_branch
+```
+#### 3.2.7 提交Pull Request
+点击new pull request，选择本地分支和目标分支，如下图所示。在PR的描述说明中，填写该PR所完成的功能。接下来等待review，如果有需要修改的地方，参照上述步骤更新 origin 中的对应分支即可。
+![banner](/Users/zhulingfeng01/OCR/PaddleOCR/doc/pr.png)
+#### 3.2.8 签署CLA协议和通过单元测试
+- 签署CLA 在首次向PaddlePaddle提交Pull Request时，您需要您签署一次CLA(Contributor License Agreement)协议，以保证您的代码可以被合入，具体签署方式如下：
+  1. 请您查看PR中的Check部分，找到license/cla，并点击右侧detail，进入CLA网站
+  2. 点击CLA网站中的“Sign in with GitHub to agree”,点击完成后将会跳转回您的Pull Request页面
+#### 3.2.9 删除分支
+- 删除远程分支
+  在 PR 被 merge 进主仓库后，我们可以在 PR 的页面删除远程仓库的分支。
+  也可以使用 `git push origin :分支名` 删除远程分支，如：
+  ```
+  git push origin :new_branch
+  ```
+- 删除本地分支
+  ```
+  # 切换到develop分支，否则无法删除当前分支
+  git checkout develop
+  # 删除new_branch分支
+  git branch -D new_branch
+  ```
+<a name="提交代码的一些约定"></a>
+#### 3.2.10 提交代码的一些约定
+为了使官方维护人员在评审代码时更好地专注于代码本身，请您每次提交代码时，遵守以下约定：
+1）请保证Travis-CI 中单元测试能顺利通过。如果没过，说明提交的代码存在问题，官方维护人员一般不做评审。
+2）提交Pull Request前：
+- 请注意commit的数量。
+  原因：如果仅仅修改一个文件但提交了十几个commit，每个commit只做了少量的修改，这会给评审人带来很大困扰。评审人需要逐一查看每个commit才能知道做了哪些修改，且不排除commit之间的修改存在相互覆盖的情况。
+  建议：每次提交时，保持尽量少的commit，可以通过git commit --amend补充上次的commit。对已经Push到远程仓库的多个commit，可以参考[squash commits after push](https://stackoverflow.com/questions/5667884/how-to-squash-commits-in-git-after-they-have-been-pushed)。
+- 请注意每个commit的名称：应能反映当前commit的内容，不能太随意。
+3）如果解决了某个Issue的问题，请在该Pull Request的第一个评论框中加上：fix #issue_number，这样当该Pull Request被合并后，会自动关闭对应的Issue。关键词包括：close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved，请选择合适的词汇。详细可参考[Closing issues via commit messages](https://help.github.com/articles/closing-issues-via-commit-messages)。
+此外，在回复评审人意见时，请您遵守以下约定：
+1）官方维护人员的每一个review意见都希望得到回复，这样会更好地提升开源社区的贡献。
+- 对评审意见同意且按其修改完的，给个简单的Done即可；
+- 对评审意见不同意的，请给出您自己的反驳理由。
+2）如果评审意见比较多:
+- 请给出总体的修改情况。
+- 请采用`start a review`进行回复，而非直接回复的方式。原因是每个回复都会发送一封邮件，会造成邮件灾难。
\ No newline at end of file
--- a/doc/doc_ch/detection.md
+++ b/doc/doc_ch/detection.md
@@ -247,3 +247,7 @@ Q1: 训练模型转inference 模型之后预测效果不一致？
 **A**：此类问题出现较多，问题多是trained model预测时候的预处理、后处理参数和inference model预测的时候的预处理、后处理参数不一致导致的。以det_mv3_db.yml配置文件训练的模型为例，训练模型、inference模型预测结果不一致问题解决方式如下：
 - 检查[trained model预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L116)，和[inference model的预测预处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/predict_det.py#L42)函数是否一致。算法在评估的时候，输入图像大小会影响精度，为了和论文保持一致，训练icdar15配置文件中将图像resize到[736, 1280]，但是在inference model预测的时候只有一套默认参数，会考虑到预测速度问题，默认限制图像最长边为960做resize的。训练模型预处理和inference模型的预处理函数位于[ppocr/data/imaug/operators.py](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/ppocr/data/imaug/operators.py#L147)
 - 检查[trained model后处理](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/configs/det/det_mv3_db.yml#L51)，和[inference 后处理参数](https://github.com/PaddlePaddle/PaddleOCR/blob/c1ed243fb68d5d466258243092e56cbae32e2c14/tools/infer/utility.py#L50)是否一致。
+Q1: 训练EAST模型提示找不到lanms库？
+**A**：执行pip3 install lanms-nova 即可。
--- a/doc/doc_ch/inference.md
+++ b/doc/doc_ch/inference.md
@@ -34,6 +34,8 @@ inference 模型（`paddle.jit.save`保存的模型）
    - [1. 超轻量中文OCR模型推理](#超轻量中文OCR模型推理)
    - [2. 其他模型推理](#其他模型推理)
+- [六、参数解释](参数解释)
 <a name="训练模型转inference模型"></a>
 ## 一、训练模型转inference模型
@@ -394,3 +396,127 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --d
 执行命令后，识别结果图像如下：
 ![](../imgs_results/img_10_east_starnet.jpg)
+<a name="参数解释"></a>
+# 六、参数解释
+更多关于预测过程的参数解释如下所示。
+* 全局信息
+| 参数名称 | 类型 | 默认值 | 含义 |
+| :--: | :--: | :--: | :--: |
+|  image_dir | str | 无，必须显式指定 | 图像或者文件夹路径 |
+|  vis_font_path | str | "./doc/fonts/simfang.ttf" | 用于可视化的字体路径 |
+|  drop_score | float | 0.5 | 识别得分小于该值的结果会被丢弃，不会作为返回结果 |
+|  use_pdserving | bool | False | 是否使用Paddle Serving进行预测 |
+|  warmup | bool | False | 是否开启warmup，在统计预测耗时的时候，可以使用这种方法 |
+|  draw_img_save_dir | str | "./inference_results" | 系统串联预测OCR结果的保存文件夹 |
+|  save_crop_res | bool | False  | 是否保存OCR的识别文本图像 |
+|  crop_res_save_dir | str | "./output" | 保存OCR识别出来的文本图像路径 |
+|  use_mp | bool | False | 是否开启多进程预测  |
+|  total_process_num | int | 6 | 开启的进城数，`use_mp`为`True`时生效  |
+|  process_id | int | 0 | 当前进程的id号，无需自己修改  |
+|  benchmark | bool | False | 是否开启benchmark，对预测速度、显存占用等进行统计  |
+|  save_log_path | str | "./log_output/" | 开启`benchmark`时，日志结果的保存文件夹 |
+|  show_log | bool | True | 是否显示预测中的日志信息  |
+|  use_onnx | bool | False | 是否开启onnx预测 |
+* 预测引擎相关
+| 参数名称 | 类型 | 默认值 | 含义 |
+| :--: | :--: | :--: | :--: |
+|  use_gpu | bool | True | 是否使用GPU进行预测 |
+|  ir_optim | bool | True | 是否对计算图进行分析与优化，开启后可以加速预测过程 |
+|  use_tensorrt | bool | False | 是否开启tensorrt |
+|  min_subgraph_size | int | 15 | tensorrt中最小子图size，当子图的size大于该值时，才会尝试对该子图使用trt engine计算 |
+|  precision | str | fp32 | 预测的精度，支持`fp32`, `fp16`, `int8` 3种输入 |
+|  enable_mkldnn | bool | True | 是否开启mkldnn |
+|  cpu_threads | int | 10 | 开启mkldnn时，cpu预测的线程数 |
+* 文本检测模型相关
+| 参数名称 | 类型 | 默认值 | 含义 |
+| :--: | :--: | :--: | :--: |
+|  det_algorithm | str | "DB" | 文本检测算法名称，目前支持`DB`, `EAST`, `SAST`, `PSE`  |
+|  det_model_dir | str | xx | 检测inference模型路径 |
+|  det_limit_side_len | int | 960 | 检测的图像边长限制 |
+|  det_limit_type | str | "max" | 检测的变成限制类型，目前支持`min`, `max`，`min`表示保证图像最短边不小于`det_limit_side_len`，`max`表示保证图像最长边不大于`det_limit_side_len` |
+其中，DB算法相关参数如下
+| 参数名称 | 类型 | 默认值 | 含义 |
+| :--: | :--: | :--: | :--: |
+|  det_db_thresh | float | 0.3 | DB输出的概率图中，得分大于该阈值的像素点才会被认为是文字像素点 |
+|  det_db_box_thresh | float | 0.6 | 检测结果边框内，所有像素点的平均得分大于该阈值时，该结果会被认为是文字区域 |
+|  det_db_unclip_ratio | float | 1.5 | `Vatti clipping`算法的扩张系数，使用该方法对文字区域进行扩张 |
+|  max_batch_size | int | 10 | 预测的batch size |
+|  use_dilation | bool | False | 是否对分割结果进行膨胀以获取更优检测效果 |
+|  det_db_score_mode | str | "fast" | DB的检测结果得分计算方法，支持`fast`和`slow`，`fast`是根据polygon的外接矩形边框内的所有像素计算平均得分，`slow`是根据原始polygon内的所有像素计算平均得分，计算速度相对较慢一些，但是更加准确一些。 |
+EAST算法相关参数如下
+| 参数名称 | 类型 | 默认值 | 含义 |
+| :--: | :--: | :--: | :--: |
+|  det_east_score_thresh | float | 0.8 | EAST后处理中score map的阈值 |
+|  det_east_cover_thresh | float | 0.1 | EAST后处理中文本框的平均得分阈值 |
+|  det_east_nms_thresh | float | 0.2 | EAST后处理中nms的阈值 |
+SAST算法相关参数如下
+| 参数名称 | 类型 | 默认值 | 含义 |
+| :--: | :--: | :--: | :--: |
+|  det_sast_score_thresh | float | 0.5 | SAST后处理中的得分阈值 |
+|  det_sast_nms_thresh | float | 0.5 | SAST后处理中nms的阈值 |
+|  det_sast_polygon | bool | False | 是否多边形检测，弯曲文本场景（如Total-Text）设置为True |
+PSE算法相关参数如下
+| 参数名称 | 类型 | 默认值 | 含义 |
+| :--: | :--: | :--: | :--: |
+|  det_pse_thresh | float | 0.0 | 对输出图做二值化的阈值 |
+|  det_pse_box_thresh | float | 0.85 | 对box进行过滤的阈值，低于此阈值的丢弃 |
+|  det_pse_min_area | float | 16 | box的最小面积，低于此阈值的丢弃 |
+|  det_pse_box_type | str | "box" | 返回框的类型，box:四点坐标，poly: 弯曲文本的所有点坐标 |
+|  det_pse_scale | int | 1 | 输入图像相对于进后处理的图的比例，如`640*640`的图像，网络输出为`160*160`，scale为2的情况下，进后处理的图片shape为`320*320`。这个值调大可以加快后处理速度，但是会带来精度的下降 |
+* 文本识别模型相关
+| 参数名称 | 类型 | 默认值 | 含义 |
+| :--: | :--: | :--: | :--: |
+|  rec_algorithm | str | "CRNN" | 文本识别算法名称，目前支持`CRNN`, `SRN`, `RARE`, `NETR`, `SAR` |
+|  rec_model_dir | str | 无，如果使用识别模型，该项是必填项 | 识别inference模型路径 |
+|  rec_image_shape | list | [3, 32, 320] | 识别时的图像尺寸， |
+|  rec_batch_num | int | 6 | 识别的batch size |
+|  max_text_length | int | 25 | 识别结果最大长度，在`SRN`中有效 |
+|  rec_char_dict_path | str | "./ppocr/utils/ppocr_keys_v1.txt" | 识别的字符字典文件 |
+|  use_space_char | bool | True | 是否包含空格，如果为`True`，则会在最后字符字典中补充`空格`字符 |
+* 端到端文本检测与识别模型相关
+| 参数名称 | 类型 | 默认值 | 含义 |
+| :--: | :--: | :--: | :--: |
+|  e2e_algorithm | str | "PGNet" | 端到端算法名称，目前支持`PGNet` |
+|  e2e_model_dir | str | 无，如果使用端到端模型，该项是必填项 | 端到端模型inference模型路径 |
+|  e2e_limit_side_len | int | 768 | 端到端的输入图像边长限制 |
+|  e2e_limit_type | str | "max" | 端到端的边长限制类型，目前支持`min`, `max`，`min`表示保证图像最短边不小于`e2e_limit_side_len`，`max`表示保证图像最长边不大于`e2e_limit_side_len` |
+|  e2e_pgnet_score_thresh | float | xx | xx |
+|  e2e_char_dict_path | str | "./ppocr/utils/ic15_dict.txt" | 识别的字典文件路径 |
+|  e2e_pgnet_valid_set | str | "totaltext" | 验证集名称，目前支持`totaltext`, `partvgg`，不同数据集对应的后处理方式不同，与训练过程保持一致即可 |
+|  e2e_pgnet_mode | str | "fast" | PGNet的检测结果得分计算方法，支持`fast`和`slow`，`fast`是根据polygon的外接矩形边框内的所有像素计算平均得分，`slow`是根据原始polygon内的所有像素计算平均得分，计算速度相对较慢一些，但是更加准确一些。 |
+* 方向分类器模型相关
+| 参数名称 | 类型 | 默认值 | 含义 |
+| :--: | :--: | :--: | :--: |
+|  use_angle_cls | bool | False | 是否使用方向分类器 |
+|  cls_model_dir | str | 无，如果需要使用，则必须显式指定路径 | 方向分类器inference模型路径 |
+|  cls_image_shape | list | [3, 48, 192] | 预测尺度 |
+|  label_list | list | ['0', '180'] | class id对应的角度值 |
+|  cls_batch_num | int | 6 | 方向分类器预测的batch size |
+|  cls_thresh | float | 0.9 | 预测阈值，模型预测结果为180度，且得分大于该阈值时，认为最终预测结果为180度，需要翻转 |
--- a/doc/joinus.PNG
+++ b/doc/joinus.PNG
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -16,7 +16,7 @@ __all__ = ["build_backbone"]
 def build_backbone(config, model_type):
-    if model_type == "det":
+    if model_type == "det" or model_type == "table":
        from .det_mobilenet_v3 import MobileNetV3
        from .det_resnet_vd import ResNet
        from .det_resnet_vd_sast import ResNet_SAST
@@ -36,10 +36,6 @@ def build_backbone(config, model_type):
    elif model_type == "e2e":
        from .e2e_resnet_vd_pg import ResNet
        support_dict = ["ResNet"]
-    elif model_type == "table":
-        from .table_resnet_vd import ResNet
-        from .table_mobilenet_v3 import MobileNetV3
-        support_dict = ["ResNet", "MobileNetV3"]
    else:
        raise NotImplementedError

--- a/ppocr/modeling/backbones/rec_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/rec_mobilenet_v3.py
@@ -26,8 +26,10 @@ class MobileNetV3(nn.Layer):
                 scale=0.5,
                 large_stride=None,
                 small_stride=None,
+                 disable_se=False,
                 **kwargs):
        super(MobileNetV3, self).__init__()
+        self.disable_se = disable_se
        if small_stride is None:
            small_stride = [2, 2, 2, 2]
        if large_stride is None:
@@ -101,6 +103,7 @@ class MobileNetV3(nn.Layer):
        block_list = []
        inplanes = make_divisible(inplanes * scale)
        for (k, exp, c, se, nl, s) in cfg:
+            se = se and not self.disable_se
            block_list.append(
                ResidualUnit(
                    in_channels=inplanes,

--- a/ppocr/modeling/backbones/table_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/table_mobilenet_v3.py
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import paddle
-from paddle import nn
-import paddle.nn.functional as F
-from paddle import ParamAttr
-__all__ = ['MobileNetV3']
-def make_divisible(v, divisor=8, min_value=None):
-    if min_value is None:
-        min_value = divisor
-    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-    if new_v < 0.9 * v:
-        new_v += divisor
-    return new_v
-class MobileNetV3(nn.Layer):
-    def __init__(self,
-                 in_channels=3,
-                 model_name='large',
-                 scale=0.5,
-                 disable_se=False,
-                 **kwargs):
-        """
-        the MobilenetV3 backbone network for detection module.
-        Args:
-            params(dict): the super parameters for build network
-        """
-        super(MobileNetV3, self).__init__()
-        self.disable_se = disable_se
-        if model_name == "large":
-            cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, 'relu', 1],
-                [3, 64, 24, False, 'relu', 2],
-                [3, 72, 24, False, 'relu', 1],
-                [5, 72, 40, True, 'relu', 2],
-                [5, 120, 40, True, 'relu', 1],
-                [5, 120, 40, True, 'relu', 1],
-                [3, 240, 80, False, 'hardswish', 2],
-                [3, 200, 80, False, 'hardswish', 1],
-                [3, 184, 80, False, 'hardswish', 1],
-                [3, 184, 80, False, 'hardswish', 1],
-                [3, 480, 112, True, 'hardswish', 1],
-                [3, 672, 112, True, 'hardswish', 1],
-                [5, 672, 160, True, 'hardswish', 2],
-                [5, 960, 160, True, 'hardswish', 1],
-                [5, 960, 160, True, 'hardswish', 1],
-            ]
-            cls_ch_squeeze = 960
-        elif model_name == "small":
-            cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, 'relu', 2],
-                [3, 72, 24, False, 'relu', 2],
-                [3, 88, 24, False, 'relu', 1],
-                [5, 96, 40, True, 'hardswish', 2],
-                [5, 240, 40, True, 'hardswish', 1],
-                [5, 240, 40, True, 'hardswish', 1],
-                [5, 120, 48, True, 'hardswish', 1],
-                [5, 144, 48, True, 'hardswish', 1],
-                [5, 288, 96, True, 'hardswish', 2],
-                [5, 576, 96, True, 'hardswish', 1],
-                [5, 576, 96, True, 'hardswish', 1],
-            ]
-            cls_ch_squeeze = 576
-        else:
-            raise NotImplementedError("mode[" + model_name +
-                                      "_model] is not implemented!")
-        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
-        assert scale in supported_scale, \
-            "supported scale are {} but input scale is {}".format(supported_scale, scale)
-        inplanes = 16
-        # conv1
-        self.conv = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=make_divisible(inplanes * scale),
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            groups=1,
-            if_act=True,
-            act='hardswish',
-            name='conv1')
-        self.stages = []
-        self.out_channels = []
-        block_list = []
-        i = 0
-        inplanes = make_divisible(inplanes * scale)
-        for (k, exp, c, se, nl, s) in cfg:
-            se = se and not self.disable_se
-            start_idx = 2 if model_name == 'large' else 0
-            if s == 2 and i > start_idx:
-                self.out_channels.append(inplanes)
-                self.stages.append(nn.Sequential(*block_list))
-                block_list = []
-            block_list.append(
-                ResidualUnit(
-                    in_channels=inplanes,
-                    mid_channels=make_divisible(scale * exp),
-                    out_channels=make_divisible(scale * c),
-                    kernel_size=k,
-                    stride=s,
-                    use_se=se,
-                    act=nl,
-                    name="conv" + str(i + 2)))
-            inplanes = make_divisible(scale * c)
-            i += 1
-        block_list.append(
-            ConvBNLayer(
-                in_channels=inplanes,
-                out_channels=make_divisible(scale * cls_ch_squeeze),
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                groups=1,
-                if_act=True,
-                act='hardswish',
-                name='conv_last'))
-        self.stages.append(nn.Sequential(*block_list))
-        self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
-        for i, stage in enumerate(self.stages):
-            self.add_sublayer(sublayer=stage, name="stage{}".format(i))
-    def forward(self, x):
-        x = self.conv(x)
-        out_list = []
-        for stage in self.stages:
-            x = stage(x)
-            out_list.append(x)
-        return out_list
-class ConvBNLayer(nn.Layer):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 padding,
-                 groups=1,
-                 if_act=True,
-                 act=None,
-                 name=None):
-        super(ConvBNLayer, self).__init__()
-        self.if_act = if_act
-        self.act = act
-        self.conv = nn.Conv2D(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + '_weights'),
-            bias_attr=False)
-        self.bn = nn.BatchNorm(
-            num_channels=out_channels,
-            act=None,
-            param_attr=ParamAttr(name=name + "_bn_scale"),
-            bias_attr=ParamAttr(name=name + "_bn_offset"),
-            moving_mean_name=name + "_bn_mean",
-            moving_variance_name=name + "_bn_variance")
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        if self.if_act:
-            if self.act == "relu":
-                x = F.relu(x)
-            elif self.act == "hardswish":
-                x = F.hardswish(x)
-            else:
-                print("The activation function({}) is selected incorrectly.".
-                      format(self.act))
-                exit()
-        return x
-class ResidualUnit(nn.Layer):
-    def __init__(self,
-                 in_channels,
-                 mid_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 use_se,
-                 act=None,
-                 name=''):
-        super(ResidualUnit, self).__init__()
-        self.if_shortcut = stride == 1 and in_channels == out_channels
-        self.if_se = use_se
-        self.expand_conv = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=mid_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            if_act=True,
-            act=act,
-            name=name + "_expand")
-        self.bottleneck_conv = ConvBNLayer(
-            in_channels=mid_channels,
-            out_channels=mid_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=int((kernel_size - 1) // 2),
-            groups=mid_channels,
-            if_act=True,
-            act=act,
-            name=name + "_depthwise")
-        if self.if_se:
-            self.mid_se = SEModule(mid_channels, name=name + "_se")
-        self.linear_conv = ConvBNLayer(
-            in_channels=mid_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            if_act=False,
-            act=None,
-            name=name + "_linear")
-    def forward(self, inputs):
-        x = self.expand_conv(inputs)
-        x = self.bottleneck_conv(x)
-        if self.if_se:
-            x = self.mid_se(x)
-        x = self.linear_conv(x)
-        if self.if_shortcut:
-            x = paddle.add(inputs, x)
-        return x
-class SEModule(nn.Layer):
-    def __init__(self, in_channels, reduction=4, name=""):
-        super(SEModule, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2D(1)
-        self.conv1 = nn.Conv2D(
-            in_channels=in_channels,
-            out_channels=in_channels // reduction,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            weight_attr=ParamAttr(name=name + "_1_weights"),
-            bias_attr=ParamAttr(name=name + "_1_offset"))
-        self.conv2 = nn.Conv2D(
-            in_channels=in_channels // reduction,
-            out_channels=in_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            weight_attr=ParamAttr(name + "_2_weights"),
-            bias_attr=ParamAttr(name=name + "_2_offset"))
-    def forward(self, inputs):
-        outputs = self.avg_pool(inputs)
-        outputs = self.conv1(outputs)
-        outputs = F.relu(outputs)
-        outputs = self.conv2(outputs)
-        outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5)
-        return inputs * outputs
\ No newline at end of file
--- a/ppocr/modeling/backbones/table_resnet_vd.py
+++ b/ppocr/modeling/backbones/table_resnet_vd.py
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import paddle
-from paddle import ParamAttr
-import paddle.nn as nn
-import paddle.nn.functional as F
-__all__ = ["ResNet"]
-class ConvBNLayer(nn.Layer):
-    def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=1,
-            groups=1,
-            is_vd_mode=False,
-            act=None,
-            name=None, ):
-        super(ConvBNLayer, self).__init__()
-        self.is_vd_mode = is_vd_mode
-        self._pool2d_avg = nn.AvgPool2D(
-            kernel_size=2, stride=2, padding=0, ceil_mode=True)
-        self._conv = nn.Conv2D(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=(kernel_size - 1) // 2,
-            groups=groups,
-            weight_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False)
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        self._batch_norm = nn.BatchNorm(
-            out_channels,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale'),
-            bias_attr=ParamAttr(bn_name + '_offset'),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-    def forward(self, inputs):
-        if self.is_vd_mode:
-            inputs = self._pool2d_avg(inputs)
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        return y
-class BottleneckBlock(nn.Layer):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 stride,
-                 shortcut=True,
-                 if_first=False,
-                 name=None):
-        super(BottleneckBlock, self).__init__()
-        self.conv0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            act='relu',
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2b")
-        self.conv2 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels * 4,
-            kernel_size=1,
-            act=None,
-            name=name + "_branch2c")
-        if not shortcut:
-            self.short = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=out_channels * 4,
-                kernel_size=1,
-                stride=1,
-                is_vd_mode=False if if_first else True,
-                name=name + "_branch1")
-        self.shortcut = shortcut
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = paddle.add(x=short, y=conv2)
-        y = F.relu(y)
-        return y
-class BasicBlock(nn.Layer):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 stride,
-                 shortcut=True,
-                 if_first=False,
-                 name=None):
-        super(BasicBlock, self).__init__()
-        self.stride = stride
-        self.conv0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            act=None,
-            name=name + "_branch2b")
-        if not shortcut:
-            self.short = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=1,
-                stride=1,
-                is_vd_mode=False if if_first else True,
-                name=name + "_branch1")
-        self.shortcut = shortcut
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = paddle.add(x=short, y=conv1)
-        y = F.relu(y)
-        return y
-class ResNet(nn.Layer):
-    def __init__(self, in_channels=3, layers=50, **kwargs):
-        super(ResNet, self).__init__()
-        self.layers = layers
-        supported_layers = [18, 34, 50, 101, 152, 200]
-        assert layers in supported_layers, \
-            "supported layers are {} but input layer is {}".format(
-                supported_layers, layers)
-        if layers == 18:
-            depth = [2, 2, 2, 2]
-        elif layers == 34 or layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        elif layers == 200:
-            depth = [3, 12, 48, 3]
-        num_channels = [64, 256, 512,
-                        1024] if layers >= 50 else [64, 64, 128, 256]
-        num_filters = [64, 128, 256, 512]
-        self.conv1_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=32,
-            kernel_size=3,
-            stride=2,
-            act='relu',
-            name="conv1_1")
-        self.conv1_2 = ConvBNLayer(
-            in_channels=32,
-            out_channels=32,
-            kernel_size=3,
-            stride=1,
-            act='relu',
-            name="conv1_2")
-        self.conv1_3 = ConvBNLayer(
-            in_channels=32,
-            out_channels=64,
-            kernel_size=3,
-            stride=1,
-            act='relu',
-            name="conv1_3")
-        self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
-        self.stages = []
-        self.out_channels = []
-        if layers >= 50:
-            for block in range(len(depth)):
-                block_list = []
-                shortcut = False
-                for i in range(depth[block]):
-                    if layers in [101, 152] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    bottleneck_block = self.add_sublayer(
-                        'bb_%d_%d' % (block, i),
-                        BottleneckBlock(
-                            in_channels=num_channels[block]
-                            if i == 0 else num_filters[block] * 4,
-                            out_channels=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            if_first=block == i == 0,
-                            name=conv_name))
-                    shortcut = True
-                    block_list.append(bottleneck_block)
-                self.out_channels.append(num_filters[block] * 4)
-                self.stages.append(nn.Sequential(*block_list))
-        else:
-            for block in range(len(depth)):
-                block_list = []
-                shortcut = False
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    basic_block = self.add_sublayer(
-                        'bb_%d_%d' % (block, i),
-                        BasicBlock(
-                            in_channels=num_channels[block]
-                            if i == 0 else num_filters[block],
-                            out_channels=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            if_first=block == i == 0,
-                            name=conv_name))
-                    shortcut = True
-                    block_list.append(basic_block)
-                self.out_channels.append(num_filters[block])
-                self.stages.append(nn.Sequential(*block_list))
-    def forward(self, inputs):
-        y = self.conv1_1(inputs)
-        y = self.conv1_2(y)
-        y = self.conv1_3(y)
-        y = self.pool2d_max(y)
-        out = []
-        for block in self.stages:
-            y = block(y)
-            out.append(y)
-        return out
--- a/ppocr/modeling/transforms/tps_spatial_transformer.py
+++ b/ppocr/modeling/transforms/tps_spatial_transformer.py
@@ -53,7 +53,7 @@ def compute_partial_repr(input_points, control_points):
                                                                         1]
    repr_matrix = 0.5 * pairwise_dist * paddle.log(pairwise_dist)
    # fix numerical error for 0 * log(0), substitute all nan with 0
-    mask = repr_matrix != repr_matrix
+    mask = np.array(repr_matrix != repr_matrix)
    repr_matrix[mask] = 0
    return repr_matrix

--- a/ppocr/postprocess/east_postprocess.py
+++ b/ppocr/postprocess/east_postprocess.py
@@ -20,7 +20,6 @@ import numpy as np
 from .locality_aware_nms import nms_locality
 import cv2
 import paddle
-import lanms
 import os
 import sys
@@ -61,6 +60,7 @@ class EASTPostProcess(object):
        """
        restore text boxes from score map and geo map
        """
        score_map = score_map[0]
        geo_map = np.swapaxes(geo_map, 1, 0)
        geo_map = np.swapaxes(geo_map, 1, 2)
@@ -76,8 +76,15 @@ class EASTPostProcess(object):
        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
        boxes[:, :8] = text_box_restored.reshape((-1, 8))
        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
-        boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
-        # boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
+        try:
+            import lanms
+            boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
+        except:
+            print(
+                'you should install lanms by pip3 install lanms-nova to speed up nms_locality'
+            )
+            boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
        if boxes.shape[0] == 0:
            return []
        # Here we filter some low score boxes by the average score map, 

--- a/ppocr/utils/save_load.py
+++ b/ppocr/utils/save_load.py
@@ -67,6 +67,7 @@ def load_model(config, model, optimizer=None):
            if key not in params:
                logger.warning("{} not in loaded params {} !".format(
                    key, params.keys()))
+                continue
            pre_value = params[key]
            if list(value.shape) == list(pre_value.shape):
                new_state_dict[key] = pre_value
@@ -76,9 +77,14 @@ def load_model(config, model, optimizer=None):
                    format(key, value.shape, pre_value.shape))
        model.set_state_dict(new_state_dict)
-        optim_dict = paddle.load(checkpoints + '.pdopt')
        if optimizer is not None:
-            optimizer.set_state_dict(optim_dict)
+            if os.path.exists(checkpoints + '.pdopt'):
+                optim_dict = paddle.load(checkpoints + '.pdopt')
+                optimizer.set_state_dict(optim_dict)
+            else:
+                logger.warning(
+                    "{}.pdopt is not exists, params of optimizer is not loaded".
+                    format(checkpoints))
        if os.path.exists(checkpoints + '.states'):
            with open(checkpoints + '.states', 'rb') as f:

--- a/requirements.txt
+++ b/requirements.txt
@@ -12,5 +12,4 @@ cython
 lxml
 premailer
 openpyxl
 fasttext==0.9.1
-lanms-nova
\ No newline at end of file
\ No newline at end of file
--- a/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
-===========================ch_ppocr_mobile_v2.0===========================
+===========================ch_PP-OCRv2===========================
 model_name:ch_PP-OCRv2
 python:python3.7
 infer_model:./inference/ch_PP-OCRv2_det_infer/
 infer_export:null
-infer_quant:True
+infer_quant:False
 inference:tools/infer/predict_system.py
--use_gpu:False
+--use_gpu:False|True
--enable_mkldnn:False
+--enable_mkldnn:False|True
 --cpu_threads:1|6
 --rec_batch_num:1
--use_tensorrt:False
+--use_tensorrt:False|True
--precision:int8
+--precision:fp32
 --det_model_dir:
 --image_dir:./inference/ch_det_data_50/all-sum-510/
 --rec_model_dir:./inference/ch_PP-OCRv2_rec_infer/

--- a/test_tipc/configs/ch_PP-OCRv2_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/ch_PP-OCRv2_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
 ===========================kl_quant_params===========================
 model_name:PPOCRv2_ocr_det_kl
 python:python3.7
+Global.pretrained_model:null
+Global.save_inference_dir:null
 infer_model:./inference/ch_PP-OCRv2_det_infer/
 infer_export:deploy/slim/quantization/quant_kl.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o
 infer_quant:True
 inference:tools/infer/predict_det.py
--use_gpu:False
+--use_gpu:False|True
--enable_mkldnn:False
+--enable_mkldnn:True
 --cpu_threads:1|6
 --rec_batch_num:1
--use_tensorrt:False
+--use_tensorrt:False|True
 --precision:int8
 --det_model_dir:
 --image_dir:./inference/ch_det_data_50/all-sum-510/

--- a/test_tipc/configs/ch_PP-OCRv2_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/ch_PP-OCRv2_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
 ===========================kl_quant_params===========================
 model_name:PPOCRv2_ocr_rec_kl
 python:python3.7
+Global.pretrained_model:null
+Global.save_inference_dir:null
 infer_model:./inference/ch_PP-OCRv2_rec_infer/
 infer_export:deploy/slim/quantization/quant_kl.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o
 infer_quant:True
 inference:tools/infer/predict_rec.py
--use_gpu:False
+--use_gpu:False|True
--enable_mkldnn:False
+--enable_mkldnn:False|True
 --cpu_threads:1|6
 --rec_batch_num:1|6
--use_tensorrt:False
+--use_tensorrt:True
 --precision:int8
 --rec_model_dir:
 --image_dir:./inference/rec_inference

--- a/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt
+++ b/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt
@@ -4,7 +4,7 @@ python:python3.7
 gpu_list:0|0,1
 Global.use_gpu:True|True
 Global.auto_cast:null
-Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300
+Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300
 Global.save_model_dir:./output/
 Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
 Global.pretrained_model:null
@@ -15,7 +15,7 @@ null:null
 trainer:fpgm_train
 norm_train:null
 pact_train:null
-fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
+fpgm_train:deploy/slim/prune/sensitivity_anal.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
 distill_train:null
 null:null
 null:null
@@ -29,7 +29,7 @@ Global.save_inference_dir:./output/
 Global.pretrained_model:
 norm_export:null
 quant_export:null
-fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o 
+fpgm_export:deploy/slim/prune/export_prune_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o 
 distill_export:null
 export1:null
 export2:null