Merge remote-tracking branch 'origin/dygraph' into dygraph

404a3b31 · Leif · a5530565 · d128c1df · 404a3b31 · 404a3b31
Commit 404a3b31 authored Oct 20, 2021 by Leif
20 changed files
--- a/configs/rec/multi_language/rec_korean_lite_train.yml
+++ b/configs/rec/multi_language/rec_korean_lite_train.yml
@@ -16,7 +16,6 @@ Global:
  infer_img:
  # for data or label process
  character_dict_path: ppocr/utils/dict/korean_dict.txt
-  character_type: korean
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/multi_language/rec_latin_lite_train.yml
+++ b/configs/rec/multi_language/rec_latin_lite_train.yml
@@ -15,7 +15,6 @@ Global:
  use_visualdl: false
  infer_img: null
  character_dict_path: ppocr/utils/dict/latin_dict.txt
-  character_type: latin
  max_text_length: 25
  infer_mode: false
  use_space_char: true

--- a/configs/rec/rec_icdar15_train.yml
+++ b/configs/rec/rec_icdar15_train.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path: ppocr/utils/en_dict.txt
-  character_type: EN
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_mtb_nrtr.yml
+++ b/configs/rec/rec_mtb_nrtr.yml
@@ -14,8 +14,7 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
-  character_dict_path: 
+  character_dict_path: ppocr/utils/EN_symbol_dict.txt
-  character_type: EN_symbol
  max_text_length: 25
  infer_mode: False
  use_space_char: True

--- a/configs/rec/rec_mv3_none_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_mv3_none_none_ctc.yml
+++ b/configs/rec/rec_mv3_none_none_ctc.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_mv3_tps_bilstm_att.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_att.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words/ch/word_1.jpg
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_r31_sar.yml
+++ b/configs/rec/rec_r31_sar.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: 
  # for data or label process
  character_dict_path: ppocr/utils/dict90.txt
-  character_type: EN_symbol
  max_text_length: 30
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_r34_vd_none_none_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_none_ctc.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_r34_vd_tps_bilstm_att.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_att.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words/ch/word_1.jpg
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_r50_fpn_srn.yml
+++ b/configs/rec/rec_r50_fpn_srn.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words/ch/word_1.jpg
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  num_heads: 8
  infer_mode: False

--- a/configs/rec/rec_resnet_stn_bilstm_att.yml
+++ b/configs/rec/rec_resnet_stn_bilstm_att.yml
@@ -14,8 +14,7 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
-  character_dict_path: 
+  character_dict_path: ppocr/utils/EN_symbol_dict.txt
-  character_type: EN_symbol
  max_text_length: 100
  infer_mode: False
  use_space_char: False

--- a/doc/doc_ch/config.md
+++ b/doc/doc_ch/config.md
@@ -37,10 +37,9 @@
 |      checkpoints         |    加载模型参数路径            |       None        |    用于中断后加载参数继续训练 |
 |      use_visualdl  |    设置是否启用visualdl进行可视化log展示 |          False        |    [教程地址](https://www.paddlepaddle.org.cn/paddle/visualdl) |
 |      infer_img            |    设置预测图像路径或文件夹路径     |       ./infer_img | \|
-|      character_dict_path |    设置字典路径            |  ./ppocr/utils/ppocr_keys_v1.txt  |    \                 |
+|      character_dict_path |    设置字典路径            |  ./ppocr/utils/ppocr_keys_v1.txt  |    如果为空，则默认使用小写字母+数字作为字典                 |
 |      max_text_length     |    设置文本最大长度        |       25          |                \                 |
-|      character_type      |    设置字符类型            |       ch          |    en/ch, en时将使用默认dict，ch时使用自定义dict|
+|      use_space_char     |    设置是否识别空格             |        True      |                           |
-|      use_space_char     |    设置是否识别空格             |        True      |          仅在 character_type=ch 时支持空格                 |
 |      label_list          |    设置方向分类器支持的角度       |    ['0','180']    |     仅在方向分类器中生效 |
 |      save_res_path          |    设置检测模型的结果保存地址       |    ./output/det_db/predicts_db.txt    |     仅在检测模型中生效 |
@@ -191,7 +190,6 @@ PaddleOCR目前已支持80种（除中文外）语种识别，`configs/rec/multi
      use_gpu: True
      epoch_num: 500
      ...
-      character_type: it  # 需要识别的语种
      character_dict_path:  {path/of/dict} # 字典文件所在路径
   Train:
@@ -212,17 +210,17 @@ PaddleOCR目前已支持80种（除中文外）语种识别，`configs/rec/multi
 目前PaddleOCR支持的多语言算法有：
-| 配置文件 |  算法名称 |   backbone |   trans   |   seq      |     pred     |  language | character_type |
+| 配置文件 |  算法名称 |   backbone |   trans   |   seq      |     pred     |  language |
-| :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  | :-----:  |
+| :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  |
-| rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 中文繁体  | chinese_cht|
+| rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 中文繁体  |
-| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 英语（区分大小写）   | EN |
+| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 英语（区分大小写）   |
-| rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 法语 |  french |
+| rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 法语 |  
-| rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 德语   | german |
+| rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 德语   |
-| rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 日语  | japan |
+| rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 日语  |
-| rec_korean_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 韩语  | korean |
+| rec_korean_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 韩语  |
-| rec_latin_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 拉丁字母  | latin |
+| rec_latin_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 拉丁字母  |
-| rec_arabic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 阿拉伯字母 |  ar |
+| rec_arabic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 阿拉伯字母 |  
-| rec_cyrillic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 斯拉夫字母  | cyrillic |
+| rec_cyrillic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 斯拉夫字母  |
-| rec_devanagari_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 梵文字母  | devanagari |
+| rec_devanagari_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 梵文字母  |
 更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)
--- a/doc/doc_ch/enhanced_ctc_loss.md
+++ b/doc/doc_ch/enhanced_ctc_loss.md
@@ -16,7 +16,7 @@ Focal Loss 出自论文《Focal Loss for Dense Object Detection》, 该loss最
 从上图可以看到, 当&gamma;> 0时，调整系数（1-y’）^&gamma; 赋予易分类样本损失一个更小的权重，使得网络更关注于困难的、错分的样本。 调整因子&gamma;用于调节简单样本权重降低的速率，当&gamma;为0时即为交叉熵损失函数，当&gamma;增加时，调整因子的影响也会随之增大。实验发现&gamma;为2是最优。平衡因子&alpha;用来平衡正负样本本身的比例不均，文中&alpha;取0.25。
-对于经典的CTC算法，假设某个特征序列（f<sub>1</sub>, f<sub>2</sub>, ......f<sub>t</sub>), 经过CTC解码之后结果等于label的概率为y’, 则CTC解码结果不为label的概率即为（1-y’)；不难发现 CTCLoss值和y’有如下关系：
+对于经典的CTC算法，假设某个特征序列（f<sub>1</sub>, f<sub>2</sub>, ......f<sub>t</sub>), 经过CTC解码之后结果等于label的概率为y’, 则CTC解码结果不为label的概率即为（1-y’)；不难发现, CTCLoss值和y’有如下关系：
 <div align="center"> 
 <img src="./equation_ctcloss.png" width = "250" /> 
 </div>
@@ -38,7 +38,7 @@ A-CTC Loss是CTC Loss + ACE Loss的简称。 其中ACE Loss出自论文< Aggrega
 <img src="./rec_algo_compare.png" width = "1000" /> 
 </div>
-虽然ACELoss确实如上图所说，可以处理2D预测，在内存占用及推理速度方面具备优势，但在实践过程中，我们发现单独使用ACE Loss,  识别效果并不如CTCLoss.  因此，我们尝试将CTCLoss和ACELoss进行组合，同时以CTCLoss为主，将ACELoss 定位为一个辅助监督loss。 这一尝试收到了效果，在我们内部的实验数据集上，相比单独使用CTCLoss，识别准确率可以提升1%左右。
+虽然ACELoss确实如上图所说，可以处理2D预测，在内存占用及推理速度方面具备优势，但在实践过程中，我们发现单独使用ACE Loss,  识别效果并不如CTCLoss.  因此，我们尝试将CTCLoss和ACELoss进行结合，同时以CTCLoss为主，将ACELoss 定位为一个辅助监督loss。 这一尝试收到了效果，在我们内部的实验数据集上，相比单独使用CTCLoss，识别准确率可以提升1%左右。
 A_CTC Loss定义如下:  
 <div align="center">
 <img src="./equation_a_ctc.png" width = "300" /> 
@@ -47,7 +47,7 @@ A_CTC Loss定义如下:
 实验中，λ = 0.1.  ACE loss实现代码见:  [ace_loss.py](../../ppocr/losses/ace_loss.py)
 ## 3. C-CTC Loss
-C-CTC Loss是CTC Loss + Center Loss的简称。 其中Center Loss出自论文 < A Discriminative Feature Learning Approach for Deep Face Recognition>.  最早用于人脸识别任务，用于增大累间距离，减小类内距离,  是Metric Learning领域一种较早的、也比较常用的一种算法。 
+C-CTC Loss是CTC Loss + Center Loss的简称。 其中Center Loss出自论文 < A Discriminative Feature Learning Approach for Deep Face Recognition>.  最早用于人脸识别任务，用于增大类间距离，减小类内距离,  是Metric Learning领域一种较早的、也比较常用的一种算法。 
 在中文OCR识别任务中，通过对badcase分析， 我们发现中文识别的一大难点是相似字符多，容易误识。 由此我们想到是否可以借鉴Metric Learing的想法， 增大相似字符的类间距，从而提高识别准确率。然而，MetricLearning主要用于图像识别领域，训练数据的标签为一个固定的值；而对于OCR识别来说，其本质上是一个序列识别任务，特征和label之间并不具有显式的对齐关系，因此两者如何结合依然是一个值得探索的方向。
 通过尝试Arcmargin, Cosmargin等方法， 我们最终发现Centerloss 有助于进一步提升识别的准确率。C_CTC Loss定义如下：
 <div align="center">

--- a/doc/doc_ch/environment.md
+++ b/doc/doc_ch/environment.md
 # 运行环境准备
 Windows和Mac用户推荐使用Anaconda搭建Python环境，Linux用户建议使用docker搭建PyThon环境。
+推荐环境：
+- PaddlePaddle >= 2.0.0 (2.1.2)
+- python3.7
+- CUDA10.1 / CUDA10.2
+- CUDNN 7.6
 如果对于Python环境熟悉的用户可以直接跳到第2步安装PaddlePaddle。
 * [1. Python环境搭建](#1)
@@ -294,11 +301,12 @@ cd /home/Projects
 # 首次运行需创建一个docker容器，再次运行时不需要运行当前命令
 # 创建一个名字为ppocr的docker容器，并将当前目录映射到容器的/paddle目录下
-如果您希望在CPU环境下使用docker，使用docker而不是nvidia-docker创建docker
+#如果您希望在CPU环境下使用docker，使用docker而不是nvidia-docker创建docker
-sudo docker run --name ppocr -v $PWD:/paddle --network=host -it paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash
+sudo docker run --name ppocr -v $PWD:/paddle --network=host -it registry.baidubce.com/paddlepaddle/paddle:2.1.3-gpu-cuda10.2-cudnn7 /bin/bash
-如果使用CUDA10，请运行以下命令创建容器，设置docker容器共享内存shm-size为64G，建议设置32G以上
+#如果使用CUDA10，请运行以下命令创建容器，设置docker容器共享内存shm-size为64G，建议设置32G以上
-sudo nvidia-docker run --name ppocr -v $PWD:/paddle --shm-size=64G --network=host -it paddlepaddle/paddle:latest-dev-cuda10.1-cudnn7-gcc82 /bin/bash
+# 如果是CUDA11+CUDNN8，推荐使用镜像registry.baidubce.com/paddlepaddle/paddle:2.1.3-gpu-cuda11.2-cudnn8
+sudo nvidia-docker run --name ppocr -v $PWD:/paddle --shm-size=64G --network=host -it registry.baidubce.com/paddlepaddle/paddle:2.1.3-gpu-cuda10.2-cudnn7 /bin/bash
 # ctrl+P+Q可退出docker 容器，重新进入docker 容器使用如下命令
 sudo docker container exec -it ppocr /bin/bash
@@ -321,8 +329,3 @@ python3 -m pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
 ```
 更多的版本需求，请参照[飞桨官网安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
--- a/doc/doc_ch/inference.md
+++ b/doc/doc_ch/inference.md
@@ -273,7 +273,7 @@ python3 tools/export_model.py -c configs/rec/rec_r34_vd_none_bilstm_ctc.yml -o G
 CRNN 文本识别模型推理，可以执行如下命令：
 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_type="en"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/rec_crnn/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
 ```
 ![](../imgs_words_en/word_336.png)
@@ -288,7 +288,7 @@ Predicts of ./doc/imgs_words_en/word_336.png:('super', 0.9999073)
 - 训练时采用的图像分辨率不同，训练上述模型采用的图像分辨率是[3，32，100]，而中文模型训练时，为了保证长文本的识别效果，训练时采用的图像分辨率是[3, 32, 320]。预测推理程序默认的的形状参数是训练中文采用的图像分辨率，即[3, 32, 320]。因此，这里推理上述英文模型时，需要通过参数rec_image_shape设置识别图像的形状。
- 字符列表，DTRB论文中实验只是针对26个小写英文本母和10个数字进行实验，总共36个字符。所有大小字符都转成了小写字符，不在上面列表的字符都忽略，认为是空格。因此这里没有输入字符字典，而是通过如下命令生成字典.因此在推理时需要设置参数rec_char_type，指定为英文"en"。
+- 字符列表，DTRB论文中实验只是针对26个小写英文本母和10个数字进行实验，总共36个字符。所有大小字符都转成了小写字符，不在上面列表的字符都忽略，认为是空格。因此这里没有输入字符字典，而是通过如下命令生成字典.因此在推理时需要设置参数rec_char_dict_path，指定为英文字典"./ppocr/utils/ic15_dict.txt"。
 ```
 self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
@@ -303,15 +303,15 @@ dict_character = list(self.character_str)
 python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" \
                                   --rec_model_dir="./inference/srn/" \
                                   --rec_image_shape="1, 64, 256" \
-                                   --rec_char_type="en" \
+                                   --rec_char_dict_path="./ppocr/utils/ic15_dict.txt" \
                                   --rec_algorithm="SRN"
 ```
 ### 4. 自定义文本识别字典的推理
-如果训练时修改了文本的字典，在使用inference模型预测时，需要通过`--rec_char_dict_path`指定使用的字典路径，并且设置 `rec_char_type=ch`
+如果训练时修改了文本的字典，在使用inference模型预测时，需要通过`--rec_char_dict_path`指定使用的字典路径
 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100" --rec_char_type="ch" --rec_char_dict_path="your text dict path"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./your inference model" --rec_image_shape="3, 32, 100"  --rec_char_dict_path="your text dict path"
 ```
 <a name="多语言模型的推理"></a>
@@ -320,7 +320,7 @@ python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png
 需要通过 `--vis_font_path` 指定可视化的字体路径，`doc/fonts/` 路径下有默认提供的小语种字体，例如韩文识别：
 ```
-python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model" --rec_char_type="korean" --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/korean/1.jpg" --rec_model_dir="./your inference model"  --rec_char_dict_path="ppocr/utils/dict/korean_dict.txt" --vis_font_path="doc/fonts/korean.ttf"
 ```
 ![](../imgs_words/korean/1.jpg)
@@ -388,7 +388,7 @@ python3 tools/infer/predict_system.py --image_dir="./doc/imgs/00018069.jpg" --de
 下面给出基于EAST文本检测和STAR-Net文本识别执行命令：
 ```
-python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en"
+python3 tools/infer/predict_system.py --image_dir="./doc/imgs_en/img_10.jpg" --det_model_dir="./inference/det_east/" --det_algorithm="EAST" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_dict_path="./ppocr/utils/ic15_dict.txt"
 ```
 执行命令后，识别结果图像如下：

--- a/doc/doc_ch/recognition.md
+++ b/doc/doc_ch/recognition.md
@@ -159,7 +159,6 @@ PaddleOCR内置了一部分字典，可以按需使用。
 - 自定义字典
 如需自定义dic文件，请在 `configs/rec/rec_icdar15_train.yml` 中添加 `character_dict_path` 字段, 指向您的字典路径。
-并将 `character_type` 设置为 `ch`。
 <a name="支持空格"></a>
 ### 1.4 添加空格类别
@@ -246,8 +245,6 @@ Global:
  ...
  # 添加自定义字典，如修改字典请将路径指向新字典
  character_dict_path: ppocr/utils/ppocr_keys_v1.txt
-  # 修改字符类型
-  character_type: ch
  ...
  # 识别空格
  use_space_char: True
@@ -311,18 +308,18 @@ PaddleOCR目前已支持80种（除中文外）语种识别，`configs/rec/multi
 按语系划分，目前PaddleOCR支持的语种有：
-| 配置文件 |  算法名称 |   backbone |   trans   |   seq      |     pred     |  language | character_type |
+| 配置文件 |  算法名称 |   backbone |   trans   |   seq      |     pred     |  language |
-| :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  | :-----:  |
+| :--------: |  :-------:   | :-------:  |   :-------:   |   :-----:   |  :-----:   | :-----:  |
-| rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 中文繁体  | chinese_cht|
+| rec_chinese_cht_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 中文繁体  |
-| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 英语（区分大小写）   | EN |
+| rec_en_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 英语（区分大小写）   |
-| rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 法语 |  french |
+| rec_french_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 法语 |  
-| rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 德语   | german |
+| rec_ger_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 德语   |
-| rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 日语  | japan |
+| rec_japan_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 日语  |
-| rec_korean_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 韩语  | korean |
+| rec_korean_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 韩语  |
-| rec_latin_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 拉丁字母  | latin |
+| rec_latin_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 拉丁字母  |
-| rec_arabic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 阿拉伯字母 |  ar |
+| rec_arabic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 阿拉伯字母 |
-| rec_cyrillic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 斯拉夫字母  | cyrillic |
+| rec_cyrillic_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 斯拉夫字母  |
-| rec_devanagari_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 梵文字母  | devanagari |
+| rec_devanagari_lite_train.yml |  CRNN |   Mobilenet_v3 small 0.5 |  None   |  BiLSTM |  ctc  | 梵文字母  |
 更多支持语种请参考: [多语言模型](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_ch/multi_languages.md#%E8%AF%AD%E7%A7%8D%E7%BC%A9%E5%86%99)