Initial commit

ed43fc11 · wanglch · ed43fc11 · ed43fc11 · ed43fc11 · ed43fc11
Commit ed43fc11 authored May 16, 2025 by wanglch
20 changed files
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+.ipynb_checkpoints/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+inference/
+inference_results/
+output/
+train_data/
+log/
+*.DS_Store
+*.vs
+*.user
+*~
+*.vscode
+*.idea
+*.log
+.clang-format
+.clang_format.hook
+build/
+dist/
+paddleocr.egg-info/
+/deploy/android_demo/app/OpenCV/
+/deploy/android_demo/app/PaddleLite/
+/deploy/android_demo/app/.cxx/
+/deploy/android_demo/app/cache/
+test_tipc/web/models/
+test_tipc/web/node_modules/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -   id: check-added-large-files
+        args: ['--maxkb=512']
+    -   id: check-case-conflict
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: detect-private-key
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|py)$
+-   repo: https://github.com/Lucas-C/pre-commit-hooks
+    rev: v1.5.5
+    hooks:
+    -   id: remove-crlf
+    -   id: remove-tabs
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|py)$
+-   repo: local
+    hooks:
+    -   id: clang-format
+        name: clang-format
+        description: Format files with ClangFormat
+        entry: bash .clang_format.hook -i
+        language: system
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
+# For Python files
+-   repo: https://github.com/psf/black.git
+    rev: 24.10.0
+    hooks:
+    -   id: black
+        files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
+# Flake8
+-   repo: https://github.com/pycqa/flake8
+    rev: 7.1.1
+    hooks:
+    -   id: flake8
+        args:
+            - --count
+            - --select=E9,F63,F7,F82,E721
+            - --show-source
+            - --statistics
+        exclude: ^benchmark/|^test_tipc/
--- a/.style.yapf
+++ b/.style.yapf
+[style]
+based_on_style = pep8
+column_limit = 80
--- a/Dockerfile
+++ b/Dockerfile
+FROM image.sourcefind.cn:5000/dcu/admin/base/paddlepaddle:3.0.0-py3.10-dtk24.04.3-ubuntu20.04
\ No newline at end of file
--- a/LICENSE
+++ b/LICENSE
+Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/MANIFEST.in
+++ b/MANIFEST.in
+prune .github
+prune applications
+prune benchmark
+prune configs
+prune deploy
+prune doc
+prune docs
+prune overrides
+prune ppocr/ext_op
+prune ppocr/losses
+prune ppocr/metrics
+prune ppocr/modeling
+prune ppocr/optimizer
+prune ppstructure/docs
+prune test_tipc
+prune tests
+exclude .clang_format.hook
+exclude .gitignore
+exclude .pre-commit-config.yaml
+exclude .style.yapf
+exclude mkdocs.yml
+exclude train.sh
--- a/OCR.png
+++ b/OCR.png
--- a/README.md
+++ b/README.md
+# CRNN
+## 论文
+[An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition](https://arxiv.org/abs/1507.05717)
+## 模型结构
+CRNN模型，即将CNN与RNN网络结合，共同训练。主要用于在一定程度上实现端到端(end-
+to-end)地对不定长的文本序列进行识别，不用先对单个文字进行切割，而是将文本识别转化为时序依赖的序列学习问题，就是基于图像的序列识别。这里有一个很精彩的改动，一共有四个最大池化层，但是最后两个池化层的窗口尺寸由2x2改为1x2,也就是图片的高度减半了四次，而宽度则只减半了两次（,这是因为文本图像多数都是高较小而宽较长，所以其feature map 也是这种高小宽长的矩形形状，如果使用1×2的池化窗口可以尽量保证不丢失在宽度方向的信息，更适合英文字母识别。
+<div align=center>
+    <img src="./imgs/arch.png"/>
+</div>
+## 算法原理
+CRNN（卷积循环神经网络）是一种用于光学字符识别（OCR）的深度学习模型，它结合了卷积神经网络（CNN）强大的特征提取能力、循环神经网络（RNN）对序列信息的建模能力，以及连接时序分类（CTC）损失函数来解决不定长序列对齐问题。CNN从输入图像中提取特征图，RNN对特征序列进行建模并预测字符概率分布，CTC则将这些概率分布转化为最终的文本序列。CRNN能够实现端到端的文字识别，无需显式地进行字符分割，同时能够处理不定长的文本序列，广泛应用于自然场景文字识别、文档扫描与识别等领域，具有较高的识别准确率和鲁棒性。
+<div align=center>
+    <img src="./imgs/theory.png"/>
+</div>
+## 环境配置
+### Docker（方法一）
+推荐使用docker方式运行， 此处提供[光源](https://www.sourcefind.cn/#/service-details)拉取docker镜像的地址与使用步骤
+```
+docker pull image.sourcefind.cn:5000/dcu/admin/base/paddlepaddle:3.0.0-py3.10-dtk24.04.3-ubuntu20.04
+docker run -it --shm-size=1024G -v /path/your_code_data/:/path/your_code_data/ -v /opt/hyhal:/opt/hyhal --network=host --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name CRNN  <your IMAGE ID> bash # <your IMAGE ID>为以上拉取的docker的镜像ID替换
+git clone https://developer.sourcefind.cn/codes/modelzoo/crnn_paddle
+cd /path/your_code_data/
+pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
+pip install paddleocr
+# 需要关联动态库包
+export LD_LIBRARY_PATH=/opt/dtk-24.04.3/lib:/opt/dtk-24.04.3/cuda/extras/CUPTI/lib64:/opt/dtk-24.04.3/cuda/targets/x86_64-linux/lib:$LD_LIBRARY_PATH
+```
+Tips:以上dtk驱动、python、paddlepaddle等DCU相关工具版本需要严格一一对应。
+### Dockerfile（方法二）
+此处提供dockerfile的使用方法
+```
+git clone http://developer.sourcefind.cn/codes/modelzoo/kimi-vl-a3b-instruct_pytorch.git
+docker build -t internvl:latest .
+docker run --shm-size 500g --network=host --name=kimi-vl --privileged --device=/dev/kfd --device=/dev/dri --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -v 项目地址(绝对路径):/home/ -v /opt/hyhal:/opt/hyhal:ro -it <your IMAGE ID> bash
+cd /path/your_code_data/
+pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
+pip install paddleocr
+# 需要关联动态库包
+export LD_LIBRARY_PATH=/opt/dtk-24.04.3/lib:/opt/dtk-24.04.3/cuda/extras/CUPTI/lib64:/opt/dtk-24.04.3/cuda/targets/x86_64-linux/lib:$LD_LIBRARY_PATH
+```
+### Anaconda（方法三）
+此处提供本地配置、编译的详细步骤，例如：
+关于本项目DCU显卡所需的特殊深度学习库可从[光合](https://developer.sourcefind.cn/tool/)开发者社区下载安装。
+```
+DTK驱动:dtk24.04.3
+python:3.10
+paddlepaddle:3.0.0
+```
+`Tips：以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应`
+其它非深度学习库参照requirement.txt安装：
+```
+git clone http://developer.sourcefind.cn/codes/modelzoo/kimi-vl-a3b-instruct_pytorch.git
+cd /path/your_code_data/
+pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
+pip install paddleocr
+export LD_LIBRARY_PATH=/opt/dtk-24.04.3/lib:/opt/dtk-24.04.3/cuda/extras/CUPTI/lib64:/opt/dtk-24.04.3/cuda/targets/x86_64-linux/lib:$LD_LIBRARY_PATH
+```
+## 数据集
+在本测试中可以使用(icdar2015)[./train_data/icdar2015/rec]数据集，可在本项目已经提供并整理了可直接用于验证得数据集文件，可在train_data/icdar2015/rec目录下找到。
+训练集应有如下文件结构：
+```
+|-train_data
+  |-rec
+    |- rec_gt_train.txt
+    |- train
+        |- word_001.png
+        |- word_002.jpg
+        |- word_003.jpg
+        | ...
+```
+## 训练
+训练前需要完成两件事，首先是下载模型，然后是修改配置文件。
+### 模型下载
+下载地址 https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar 
+```
+cd PaddleOCR/
+# 下载CRNN的预训练模型
+wget -P ./pretrain_models/https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar
+# 解压模型参数
+cd pretrain_models
+tar -xf rec_r34_vd_none_bilstm_ctc_v2.0_train.tar && rm -rf rec_r34_vd_none_bilstm_ctc_v2.0_train.tar
+```
+### 配置文件
+```
+# 本项目的配置文件已经修改，其余的模型及任务可以参考该配置文件中的配置方式
+rec/rec_r34_vd_none_bilstm_ctc.yml
+```
+## 推理
+### 单机单卡
+```
+# 预测使用的配置文件必须与训练一致
+python3 tools/infer_rec.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o Global.pretrained_model='./PaddleOCR-main/pretrain_models/rec_r34_vd_none_bilstm_ctc_v2.0_train/best_accuracy' Global.infer_img='./PaddleOCR-main/docs/images/ppocrv4.png'
+```
+## result
+<div align=left>
+    <img src="./imgs/result1.png"/>
+</div>
+### 精度
+无
+## 应用场景
+### 算法类别
+`文字识别`
+### 热点应用行业
+`科研,教育,政府,金融`
+## 预训练权重
+CRNN PaddlePaddle下载地址为：[rec_r34_vd_none_bilstm_ctc.yml](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_r34_vd_none_bilstm_ctc_v2.0_train.tar)
+## 源码仓库及问题反馈
+-  https://developer.sourcefind.cn/codes/modelzoo/crnn_paddle
+## 参考资料
+- https://github.com/PaddlePaddle/PaddleOCR/
--- a/applications/README.md
+++ b/applications/README.md
+移步[docs](https://paddlepaddle.github.io/PaddleOCR/latest/applications/overview.html)
--- a/benchmark/PaddleOCR_DBNet/.gitattributes
+++ b/benchmark/PaddleOCR_DBNet/.gitattributes
+*.html linguist-language=python
+*.ipynb linguist-language=python
--- a/benchmark/PaddleOCR_DBNet/.gitignore
+++ b/benchmark/PaddleOCR_DBNet/.gitignore
+.DS_Store
+*.pth
+*.pyc
+*.pyo
+*.log
+*.tmp
+*.pkl
+__pycache__/
+.idea/
+output/
+test/*.jpg
+datasets/
+index/
+train_log/
+log/
+profiling_log/
--- a/benchmark/PaddleOCR_DBNet/LICENSE.md
+++ b/benchmark/PaddleOCR_DBNet/LICENSE.md
+Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/benchmark/PaddleOCR_DBNet/README.MD
+++ b/benchmark/PaddleOCR_DBNet/README.MD
+# Real-time Scene Text Detection with Differentiable Binarization
+**note**: some code is inherited from [WenmuZhou/DBNet.pytorch](https://github.com/WenmuZhou/DBNet.pytorch)
+[中文解读](https://zhuanlan.zhihu.com/p/94677957)
+![network](imgs/paper/db.jpg)
+## update 
+2020-06-07: 添加灰度图训练，训练灰度图时需要在配置里移除`dataset.args.transforms.Normalize`
+## Install Using Conda
+```
+conda env create -f environment.yml
+git clone https://github.com/WenmuZhou/DBNet.paddle.git
+cd DBNet.paddle/
+```
+or
+## Install Manually 
+```bash
+conda create -n dbnet python=3.6
+conda activate dbnet
+conda install ipython pip
+# python dependencies
+pip install -r requirement.txt
+# clone repo
+git clone https://github.com/WenmuZhou/DBNet.paddle.git
+cd DBNet.paddle/
+```
+## Requirements
+* paddlepaddle 2.4+
+## Download
+TBD
+## Data Preparation
+Training data: prepare a text `train.txt` in the following format, use '\t' as a separator
+```
+./datasets/train/img/001.jpg	./datasets/train/gt/001.txt
+```
+Validation data: prepare a text `test.txt` in the following format, use '\t' as a separator
+```
+./datasets/test/img/001.jpg	./datasets/test/gt/001.txt
+```
+- Store images in the `img` folder
+- Store groundtruth in the `gt` folder
+The groundtruth can be `.txt` files, with the following format:
+```
+x1, y1, x2, y2, x3, y3, x4, y4, annotation
+```
+## Train
+1. config the `dataset['train']['dataset'['data_path']'`,`dataset['validate']['dataset'['data_path']`in [config/icdar2015_resnet18_fpn_DBhead_polyLR.yaml](cconfig/icdar2015_resnet18_fpn_DBhead_polyLR.yaml)
+* . single gpu train
+```bash
+bash single_gpu_train.sh
+```
+* . Multi-gpu training
+```bash
+bash multi_gpu_train.sh
+```
+## Test
+[eval.py](tools/eval.py) is used to test model on test dataset
+1. config `model_path` in [eval.sh](eval.sh)
+2. use following script to test
+```bash
+bash eval.sh
+```
+## Predict 
+[predict.py](tools/predict.py) Can be used to inference on all images in a folder
+1. config `model_path`,`input_folder`,`output_folder` in [predict.sh](predict.sh)
+2. use following script to predict
+```
+bash predict.sh
+```
+You can change the `model_path` in the `predict.sh` file to your model location. 
+tips: if result is not good, you can change `thre` in [predict.sh](predict.sh) 
+## Export Model
+[export_model.py](tools/export_model.py) Can be used to inference on all images in a folder
+use following script to export inference model
+```
+python tools/export_model.py --config_file config/icdar2015_resnet50_FPN_DBhead_polyLR.yaml -o trainer.resume_checkpoint=model_best.pth trainer.output_dir=output/infer
+```
+## Paddle Inference infer
+[infer.py](tools/infer.py) Can be used to inference on all images in a folder
+use following script to export inference model
+```
+python tools/infer.py --model-dir=output/infer/ --img-path imgs/paper/db.jpg 
+```
+<h2 id="Performance">Performance</h2>
+### [ICDAR 2015](http://rrc.cvc.uab.es/?ch=4)
+only train on ICDAR2015 dataset
+| Method                   | image size (short size) |learning rate | Precision (%) | Recall (%) | F-measure (%) | FPS |
+|:--------------------------:|:-------:|:--------:|:--------:|:------------:|:---------------:|:-----:|
+| ImageNet-resnet50-FPN-DBHead（torch）  |736 |1e-3|90.19 | 78.14 | 83.88 | 27 |
+| ImageNet-resnet50-FPN-DBHead（paddle）  |736 |1e-3| 89.47 | 79.03 | 83.92 | 27 |
+| ImageNet-resnet50-FPN-DBHead（paddle_amp）  |736 |1e-3| 88.62 | 79.95 | 84.06 | 27 |
+### examples
+TBD
+### reference
+1. https://arxiv.org/pdf/1911.08947.pdf
+2. https://github.com/WenmuZhou/DBNet.pytorch
+**If this repository helps you，please star it. Thanks.**
--- a/benchmark/PaddleOCR_DBNet/base/__init__.py
+++ b/benchmark/PaddleOCR_DBNet/base/__init__.py
+from .base_trainer import BaseTrainer
+from .base_dataset import BaseDataSet
--- a/benchmark/PaddleOCR_DBNet/base/base_dataset.py
+++ b/benchmark/PaddleOCR_DBNet/base/base_dataset.py
+# -*- coding: utf-8 -*-
+# @Time    : 2019/12/4 13:12
+# @Author  : zhoujun
+import copy
+from paddle.io import Dataset
+from data_loader.modules import *
+class BaseDataSet(Dataset):
+    def __init__(
+        self,
+        data_path: str,
+        img_mode,
+        pre_processes,
+        filter_keys,
+        ignore_tags,
+        transform=None,
+        target_transform=None,
+    ):
+        assert img_mode in ["RGB", "BRG", "GRAY"]
+        self.ignore_tags = ignore_tags
+        self.data_list = self.load_data(data_path)
+        item_keys = ["img_path", "img_name", "text_polys", "texts", "ignore_tags"]
+        for item in item_keys:
+            assert (
+                item in self.data_list[0]
+            ), "data_list from load_data must contains {}".format(item_keys)
+        self.img_mode = img_mode
+        self.filter_keys = filter_keys
+        self.transform = transform
+        self.target_transform = target_transform
+        self._init_pre_processes(pre_processes)
+    def _init_pre_processes(self, pre_processes):
+        self.aug = []
+        if pre_processes is not None:
+            for aug in pre_processes:
+                if "args" not in aug:
+                    args = {}
+                else:
+                    args = aug["args"]
+                if isinstance(args, dict):
+                    cls = eval(aug["type"])(**args)
+                else:
+                    cls = eval(aug["type"])(args)
+                self.aug.append(cls)
+    def load_data(self, data_path: str) -> list:
+        """
+        把数据加载为一个list：
+        :params data_path: 存储数据的文件夹或者文件
+        return a dict ,包含了，'img_path','img_name','text_polys','texts','ignore_tags'
+        """
+        raise NotImplementedError
+    def apply_pre_processes(self, data):
+        for aug in self.aug:
+            data = aug(data)
+        return data
+    def __getitem__(self, index):
+        try:
+            data = copy.deepcopy(self.data_list[index])
+            im = cv2.imread(data["img_path"], 1 if self.img_mode != "GRAY" else 0)
+            if self.img_mode == "RGB":
+                im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+            data["img"] = im
+            data["shape"] = [im.shape[0], im.shape[1]]
+            data = self.apply_pre_processes(data)
+            if self.transform:
+                data["img"] = self.transform(data["img"])
+            data["text_polys"] = data["text_polys"].tolist()
+            if len(self.filter_keys):
+                data_dict = {}
+                for k, v in data.items():
+                    if k not in self.filter_keys:
+                        data_dict[k] = v
+                return data_dict
+            else:
+                return data
+        except:
+            return self.__getitem__(np.random.randint(self.__len__()))
+    def __len__(self):
+        return len(self.data_list)
--- a/benchmark/PaddleOCR_DBNet/base/base_trainer.py
+++ b/benchmark/PaddleOCR_DBNet/base/base_trainer.py
+# -*- coding: utf-8 -*-
+# @Time    : 2019/8/23 21:50
+# @Author  : zhoujun
+import os
+import pathlib
+import shutil
+from pprint import pformat
+import anyconfig
+import paddle
+import numpy as np
+import random
+from paddle.jit import to_static
+from paddle.static import InputSpec
+from utils import setup_logger
+class BaseTrainer:
+    def __init__(
+        self,
+        config,
+        model,
+        criterion,
+        train_loader,
+        validate_loader,
+        metric_cls,
+        post_process=None,
+    ):
+        config["trainer"]["output_dir"] = os.path.join(
+            str(pathlib.Path(os.path.abspath(__name__)).parent),
+            config["trainer"]["output_dir"],
+        )
+        config["name"] = config["name"] + "_" + model.name
+        self.save_dir = config["trainer"]["output_dir"]
+        self.checkpoint_dir = os.path.join(self.save_dir, "checkpoint")
+        os.makedirs(self.checkpoint_dir, exist_ok=True)
+        self.global_step = 0
+        self.start_epoch = 0
+        self.config = config
+        self.criterion = criterion
+        # logger and tensorboard
+        self.visualdl_enable = self.config["trainer"].get("visual_dl", False)
+        self.epochs = self.config["trainer"]["epochs"]
+        self.log_iter = self.config["trainer"]["log_iter"]
+        if paddle.distributed.get_rank() == 0:
+            anyconfig.dump(config, os.path.join(self.save_dir, "config.yaml"))
+            self.logger = setup_logger(os.path.join(self.save_dir, "train.log"))
+            self.logger_info(pformat(self.config))
+        self.model = self.apply_to_static(model)
+        # device
+        if (
+            paddle.device.cuda.device_count() > 0
+            and paddle.device.is_compiled_with_cuda()
+        ):
+            self.with_cuda = True
+            random.seed(self.config["trainer"]["seed"])
+            np.random.seed(self.config["trainer"]["seed"])
+            paddle.seed(self.config["trainer"]["seed"])
+        else:
+            self.with_cuda = False
+        self.logger_info("train with and paddle {}".format(paddle.__version__))
+        # metrics
+        self.metrics = {
+            "recall": 0,
+            "precision": 0,
+            "hmean": 0,
+            "train_loss": float("inf"),
+            "best_model_epoch": 0,
+        }
+        self.train_loader = train_loader
+        if validate_loader is not None:
+            assert post_process is not None and metric_cls is not None
+        self.validate_loader = validate_loader
+        self.post_process = post_process
+        self.metric_cls = metric_cls
+        self.train_loader_len = len(train_loader)
+        if self.validate_loader is not None:
+            self.logger_info(
+                "train dataset has {} samples,{} in dataloader, validate dataset has {} samples,{} in dataloader".format(
+                    len(self.train_loader.dataset),
+                    self.train_loader_len,
+                    len(self.validate_loader.dataset),
+                    len(self.validate_loader),
+                )
+            )
+        else:
+            self.logger_info(
+                "train dataset has {} samples,{} in dataloader".format(
+                    len(self.train_loader.dataset), self.train_loader_len
+                )
+            )
+        self._initialize_scheduler()
+        self._initialize_optimizer()
+        # resume or finetune
+        if self.config["trainer"]["resume_checkpoint"] != "":
+            self._load_checkpoint(
+                self.config["trainer"]["resume_checkpoint"], resume=True
+            )
+        elif self.config["trainer"]["finetune_checkpoint"] != "":
+            self._load_checkpoint(
+                self.config["trainer"]["finetune_checkpoint"], resume=False
+            )
+        if self.visualdl_enable and paddle.distributed.get_rank() == 0:
+            from visualdl import LogWriter
+            self.writer = LogWriter(self.save_dir)
+        # 混合精度训练
+        self.amp = self.config.get("amp", None)
+        if self.amp == "None":
+            self.amp = None
+        if self.amp:
+            self.amp["scaler"] = paddle.amp.GradScaler(
+                init_loss_scaling=self.amp.get("scale_loss", 1024),
+                use_dynamic_loss_scaling=self.amp.get("use_dynamic_loss_scaling", True),
+            )
+            self.model, self.optimizer = paddle.amp.decorate(
+                models=self.model,
+                optimizers=self.optimizer,
+                level=self.amp.get("amp_level", "O2"),
+            )
+        # 分布式训练
+        if paddle.device.cuda.device_count() > 1:
+            self.model = paddle.DataParallel(self.model)
+        # make inverse Normalize
+        self.UN_Normalize = False
+        for t in self.config["dataset"]["train"]["dataset"]["args"]["transforms"]:
+            if t["type"] == "Normalize":
+                self.normalize_mean = t["args"]["mean"]
+                self.normalize_std = t["args"]["std"]
+                self.UN_Normalize = True
+    def apply_to_static(self, model):
+        support_to_static = self.config["trainer"].get("to_static", False)
+        if support_to_static:
+            specs = None
+            print("static")
+            specs = [InputSpec([None, 3, -1, -1])]
+            model = to_static(model, input_spec=specs)
+            self.logger_info(
+                "Successfully to apply @to_static with specs: {}".format(specs)
+            )
+        return model
+    def train(self):
+        """
+        Full training logic
+        """
+        for epoch in range(self.start_epoch + 1, self.epochs + 1):
+            self.epoch_result = self._train_epoch(epoch)
+            self._on_epoch_finish()
+        if paddle.distributed.get_rank() == 0 and self.visualdl_enable:
+            self.writer.close()
+        self._on_train_finish()
+    def _train_epoch(self, epoch):
+        """
+        Training logic for an epoch
+        :param epoch: Current epoch number
+        """
+        raise NotImplementedError
+    def _eval(self, epoch):
+        """
+        eval logic for an epoch
+        :param epoch: Current epoch number
+        """
+        raise NotImplementedError
+    def _on_epoch_finish(self):
+        raise NotImplementedError
+    def _on_train_finish(self):
+        raise NotImplementedError
+    def _save_checkpoint(self, epoch, file_name):
+        """
+        Saving checkpoints
+        :param epoch: current epoch number
+        :param log: logging information of the epoch
+        :param save_best: if True, rename the saved checkpoint to 'model_best.pth.tar'
+        """
+        state_dict = self.model.state_dict()
+        state = {
+            "epoch": epoch,
+            "global_step": self.global_step,
+            "state_dict": state_dict,
+            "optimizer": self.optimizer.state_dict(),
+            "config": self.config,
+            "metrics": self.metrics,
+        }
+        filename = os.path.join(self.checkpoint_dir, file_name)
+        paddle.save(state, filename)
+    def _load_checkpoint(self, checkpoint_path, resume):
+        """
+        Resume from saved checkpoints
+        :param checkpoint_path: Checkpoint path to be resumed
+        """
+        self.logger_info("Loading checkpoint: {} ...".format(checkpoint_path))
+        checkpoint = paddle.load(checkpoint_path)
+        self.model.set_state_dict(checkpoint["state_dict"])
+        if resume:
+            self.global_step = checkpoint["global_step"]
+            self.start_epoch = checkpoint["epoch"]
+            self.config["lr_scheduler"]["args"]["last_epoch"] = self.start_epoch
+            # self.scheduler.load_state_dict(checkpoint['scheduler'])
+            self.optimizer.set_state_dict(checkpoint["optimizer"])
+            if "metrics" in checkpoint:
+                self.metrics = checkpoint["metrics"]
+            self.logger_info(
+                "resume from checkpoint {} (epoch {})".format(
+                    checkpoint_path, self.start_epoch
+                )
+            )
+        else:
+            self.logger_info("finetune from checkpoint {}".format(checkpoint_path))
+    def _initialize(self, name, module, *args, **kwargs):
+        module_name = self.config[name]["type"]
+        module_args = self.config[name].get("args", {})
+        assert all(
+            [k not in module_args for k in kwargs]
+        ), "Overwriting kwargs given in config file is not allowed"
+        module_args.update(kwargs)
+        return getattr(module, module_name)(*args, **module_args)
+    def _initialize_scheduler(self):
+        self.lr_scheduler = self._initialize("lr_scheduler", paddle.optimizer.lr)
+    def _initialize_optimizer(self):
+        self.optimizer = self._initialize(
+            "optimizer",
+            paddle.optimizer,
+            parameters=self.model.parameters(),
+            learning_rate=self.lr_scheduler,
+        )
+    def inverse_normalize(self, batch_img):
+        if self.UN_Normalize:
+            batch_img[:, 0, :, :] = (
+                batch_img[:, 0, :, :] * self.normalize_std[0] + self.normalize_mean[0]
+            )
+            batch_img[:, 1, :, :] = (
+                batch_img[:, 1, :, :] * self.normalize_std[1] + self.normalize_mean[1]
+            )
+            batch_img[:, 2, :, :] = (
+                batch_img[:, 2, :, :] * self.normalize_std[2] + self.normalize_mean[2]
+            )
+    def logger_info(self, s):
+        if paddle.distributed.get_rank() == 0:
+            self.logger.info(s)
--- a/benchmark/PaddleOCR_DBNet/config/SynthText.yaml
+++ b/benchmark/PaddleOCR_DBNet/config/SynthText.yaml
+name: DBNet
+dataset:
+  train:
+    dataset:
+      type: SynthTextDataset # 数据集类型
+      args:
+        data_path: ''# SynthTextDataset 根目录
+        pre_processes: # 数据的预处理过程，包含augment和标签制作
+          - type: IaaAugment # 使用imgaug进行变换
+            args:
+              - {'type':Fliplr, 'args':{'p':0.5}}
+              - {'type': Affine, 'args':{'rotate':[-10,10]}}
+              - {'type':Resize,'args':{'size':[0.5,3]}}
+          - type: EastRandomCropData
+            args:
+              size: [640,640]
+              max_tries: 50
+              keep_ratio: true
+          - type: MakeBorderMap
+            args:
+              shrink_ratio: 0.4
+          - type: MakeShrinkMap
+            args:
+              shrink_ratio: 0.4
+              min_text_size: 8
+        transforms: # 对图片进行的变换方式
+          - type: ToTensor
+            args: {}
+          - type: Normalize
+            args:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+        img_mode: RGB
+        filter_keys: ['img_path','img_name','text_polys','texts','ignore_tags','shape'] # 返回数据之前，从数据字典里删除的key
+        ignore_tags: ['*', '###']
+    loader:
+      batch_size: 1
+      shuffle: true
+      num_workers: 0
+      collate_fn: ''
--- a/benchmark/PaddleOCR_DBNet/config/SynthText_resnet18_FPN_DBhead_polyLR.yaml
+++ b/benchmark/PaddleOCR_DBNet/config/SynthText_resnet18_FPN_DBhead_polyLR.yaml
+name: DBNet
+base: ['config/SynthText.yaml']
+arch:
+  type: Model
+  backbone:
+    type: resnet18
+    pretrained: true
+  neck:
+    type: FPN
+    inner_channels: 256
+  head:
+    type: DBHead
+    out_channels: 2
+    k: 50
+post_processing:
+  type: SegDetectorRepresenter
+  args:
+    thresh: 0.3
+    box_thresh: 0.7
+    max_candidates: 1000
+    unclip_ratio: 1.5 # from paper
+metric:
+  type: QuadMetric
+  args:
+    is_output_polygon: false
+loss:
+  type: DBLoss
+  alpha: 1
+  beta: 10
+  ohem_ratio: 3
+optimizer:
+  type: Adam
+  args:
+    lr: 0.001
+    weight_decay: 0
+    amsgrad: true
+lr_scheduler:
+  type: WarmupPolyLR
+  args:
+    warmup_epoch: 3
+trainer:
+  seed: 2
+  epochs: 1200
+  log_iter: 10
+  show_images_iter: 50
+  resume_checkpoint: ''
+  finetune_checkpoint: ''
+  output_dir: output
+  visual_dl: false
+amp:
+    scale_loss: 1024
+    amp_level: O2
+    custom_white_list: []
+    custom_black_list: ['exp', 'sigmoid', 'concat']
+dataset:
+  train:
+    dataset:
+      args:
+        data_path: ./datasets/SynthText
+        img_mode: RGB
+    loader:
+      batch_size: 2
+      shuffle: true
+      num_workers: 6
+      collate_fn: ''
--- a/benchmark/PaddleOCR_DBNet/config/icdar2015.yaml
+++ b/benchmark/PaddleOCR_DBNet/config/icdar2015.yaml
+name: DBNet
+dataset:
+  train:
+    dataset:
+      type: ICDAR2015Dataset # 数据集类型
+      args:
+        data_path: # 一个存放 img_path \t gt_path的文件
+          - ''
+        pre_processes: # 数据的预处理过程，包含augment和标签制作
+          - type: IaaAugment # 使用imgaug进行变换
+            args:
+              - {'type':Fliplr, 'args':{'p':0.5}}
+              - {'type': Affine, 'args':{'rotate':[-10,10]}}
+              - {'type':Resize,'args':{'size':[0.5,3]}}
+          - type: EastRandomCropData
+            args:
+              size: [640,640]
+              max_tries: 50
+              keep_ratio: true
+          - type: MakeBorderMap
+            args:
+              shrink_ratio: 0.4
+              thresh_min: 0.3
+              thresh_max: 0.7
+          - type: MakeShrinkMap
+            args:
+              shrink_ratio: 0.4
+              min_text_size: 8
+        transforms: # 对图片进行的变换方式
+          - type: ToTensor
+            args: {}
+          - type: Normalize
+            args:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+        img_mode: RGB
+        filter_keys: [img_path,img_name,text_polys,texts,ignore_tags,shape] # 返回数据之前，从数据字典里删除的key
+        ignore_tags: ['*', '###']
+    loader:
+      batch_size: 1
+      shuffle: true
+      num_workers: 0
+      collate_fn: ''
+  validate:
+    dataset:
+      type: ICDAR2015Dataset
+      args:
+        data_path:
+          - ''
+        pre_processes:
+          - type: ResizeShortSize
+            args:
+              short_size: 736
+              resize_text_polys: false
+        transforms:
+          - type: ToTensor
+            args: {}
+          - type: Normalize
+            args:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+        img_mode: RGB
+        filter_keys: []
+        ignore_tags: ['*', '###']
+    loader:
+      batch_size: 1
+      shuffle: true
+      num_workers: 0
+      collate_fn: ICDARCollectFN
--- a/benchmark/PaddleOCR_DBNet/config/icdar2015_dcn_resnet18_FPN_DBhead_polyLR.yaml
+++ b/benchmark/PaddleOCR_DBNet/config/icdar2015_dcn_resnet18_FPN_DBhead_polyLR.yaml
+name: DBNet
+base: ['config/icdar2015.yaml']
+arch:
+  type: Model
+  backbone:
+    type: deformable_resnet18
+    pretrained: true
+  neck:
+    type: FPN
+    inner_channels: 256
+  head:
+    type: DBHead
+    out_channels: 2
+    k: 50
+post_processing:
+  type: SegDetectorRepresenter
+  args:
+    thresh: 0.3
+    box_thresh: 0.7
+    max_candidates: 1000
+    unclip_ratio: 1.5 # from paper
+metric:
+  type: QuadMetric
+  args:
+    is_output_polygon: false
+loss:
+  type: DBLoss
+  alpha: 1
+  beta: 10
+  ohem_ratio: 3
+optimizer:
+  type: Adam
+  args:
+    lr: 0.001
+    weight_decay: 0
+    amsgrad: true
+lr_scheduler:
+  type: WarmupPolyLR
+  args:
+    warmup_epoch: 3
+trainer:
+  seed: 2
+  epochs: 1200
+  log_iter: 10
+  show_images_iter: 50
+  resume_checkpoint: ''
+  finetune_checkpoint: ''
+  output_dir: output
+  visual_dl: false
+amp:
+    scale_loss: 1024
+    amp_level: O2
+    custom_white_list: []
+    custom_black_list: ['exp', 'sigmoid', 'concat']
+dataset:
+  train:
+    dataset:
+      args:
+        data_path:
+          - ./datasets/train.txt
+        img_mode: RGB
+    loader:
+      batch_size: 1
+      shuffle: true
+      num_workers: 6
+      collate_fn: ''
+  validate:
+    dataset:
+      args:
+        data_path:
+          - ./datasets/test.txt
+        pre_processes:
+          - type: ResizeShortSize
+            args:
+              short_size: 736
+              resize_text_polys: false
+        img_mode: RGB
+    loader:
+      batch_size: 1
+      shuffle: true
+      num_workers: 6
+      collate_fn: ICDARCollectFN