Merge pull request #132 from tink2123/add_rec_score

Add rec score

Merge pull request #132 from tink2123/add_rec_score
Add rec score
4cac91eb · dyning · GitHub · ddefd24d · 9393a1b3 · 4cac91eb
Unverified Commit 4cac91eb authored Jun 05, 2020 by dyning Committed by GitHub Jun 05, 2020
20 changed files
--- a/README.md
+++ b/README.md
@@ -36,6 +36,9 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 #### 2.inference模型下载
+*windows 环境下如果没有安装wget,下载模型时可将链接复制到浏览器中下载，并解压放置在相应目录下*
 #### (1)超轻量级中文OCR模型下载
 ```
 mkdir inference && cd inference
@@ -63,6 +66,9 @@ cd ..
 # 设置PYTHONPATH环境变量
 export PYTHONPATH=.
+# windows下设置环境变量
+SET PYTHONPATH=.
 # 预测image_dir指定的单张图像
 python3 tools/infer/predict_system.py --image_dir="./doc/imgs/11.jpg" --det_model_dir="./inference/ch_det_mv3_db/"  --rec_model_dir="./inference/ch_rec_mv3_crnn/"

--- a/configs/rec/rec_benchmark_reader.yml
+++ b/configs/rec/rec_benchmark_reader.yml
@@ -10,4 +10,3 @@ EvalReader:
 TestReader:
  reader_function: ppocr.data.rec.dataset_traversal,LMDBReader
  lmdb_sets_dir: ./train_data/data_lmdb_release/evaluation/
-  infer_img: ./infer_img
--- a/configs/rec/rec_chinese_lite_train.yml
+++ b/configs/rec/rec_chinese_lite_train.yml
@@ -15,9 +15,11 @@ Global:
  character_dict_path: ./ppocr/utils/ppocr_keys_v1.txt
  loss_type: ctc
  reader_yml: ./configs/rec/rec_chinese_reader.yml
-  pretrain_weights: 
+  pretrain_weights:
  checkpoints:
  save_inference_dir:
+  infer_img:
 Architecture:
  function: ppocr.modeling.architectures.rec_model,RecModel

--- a/configs/rec/rec_chinese_reader.yml
+++ b/configs/rec/rec_chinese_reader.yml
@@ -11,4 +11,3 @@ EvalReader:
 TestReader:
  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
-  infer_img: ./infer_img
--- a/configs/rec/rec_icdar15_reader.yml
+++ b/configs/rec/rec_icdar15_reader.yml
@@ -11,4 +11,3 @@ EvalReader:
 TestReader:
  reader_function: ppocr.data.rec.dataset_traversal,SimpleReader
-  infer_img: ./infer_img
--- a/configs/rec/rec_icdar15_train.yml
+++ b/configs/rec/rec_icdar15_train.yml
@@ -14,9 +14,11 @@ Global:
  character_type: en
  loss_type: ctc
  reader_yml: ./configs/rec/rec_icdar15_reader.yml
-  pretrain_weights: ./pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy 
+  pretrain_weights: ./pretrain_models/rec_mv3_none_bilstm_ctc/best_accuracy
  checkpoints:
  save_inference_dir:
+  infer_img:
 Architecture:
  function: ppocr.modeling.architectures.rec_model,RecModel

--- a/configs/rec/rec_mv3_none_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml
@@ -17,6 +17,7 @@ Global:
  pretrain_weights:
  checkpoints:
  save_inference_dir:
+  infer_img:
 Architecture:
  function: ppocr.modeling.architectures.rec_model,RecModel

--- a/configs/rec/rec_mv3_none_none_ctc.yml
+++ b/configs/rec/rec_mv3_none_none_ctc.yml
@@ -17,6 +17,7 @@ Global:
  pretrain_weights: 
  checkpoints:
  save_inference_dir:
+  infer_img:
 Architecture:
  function: ppocr.modeling.architectures.rec_model,RecModel

--- a/configs/rec/rec_mv3_tps_bilstm_attn.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_attn.yml
@@ -13,11 +13,14 @@ Global:
  max_text_length: 25
  character_type: en
  loss_type: attention
+  tps: true
  reader_yml: ./configs/rec/rec_benchmark_reader.yml
  pretrain_weights:
  checkpoints:
  save_inference_dir:
+  infer_img:
 Architecture:
  function: ppocr.modeling.architectures.rec_model,RecModel

--- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
@@ -13,10 +13,12 @@ Global:
  max_text_length: 25
  character_type: en
  loss_type: ctc
+  tps: true
  reader_yml: ./configs/rec/rec_benchmark_reader.yml
  pretrain_weights:
  checkpoints:
  save_inference_dir:
+  infer_img:
 Architecture:

--- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
@@ -17,7 +17,9 @@ Global:
  pretrain_weights:
  checkpoints:
  save_inference_dir:
+  infer_img:
 Architecture:
  function: ppocr.modeling.architectures.rec_model,RecModel

--- a/configs/rec/rec_r34_vd_none_none_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_none_ctc.yml
@@ -17,6 +17,7 @@ Global:
  pretrain_weights:
  checkpoints:
  save_inference_dir:
+  infer_img:
 Architecture:
  function: ppocr.modeling.architectures.rec_model,RecModel

--- a/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_attn.yml
@@ -17,6 +17,8 @@ Global:
  pretrain_weights:
  checkpoints:
  save_inference_dir:
+  infer_img:
 Architecture:
  function: ppocr.modeling.architectures.rec_model,RecModel

--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
@@ -17,6 +17,8 @@ Global:
  pretrain_weights:
  checkpoints:
  save_inference_dir:
+  infer_img:
 Architecture:
  function: ppocr.modeling.architectures.rec_model,RecModel

--- a/doc/detection.md
+++ b/doc/detection.md
@@ -46,6 +46,9 @@ wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/Res
 ```
 **启动训练**
+*如果您安装的是cpu版本，请将配置文件中的 `use_gpu` 字段修改为false*
 ```
 python3 tools/train.py -c configs/det/det_mv3_db.yml
 ```

--- a/doc/inference.md
+++ b/doc/inference.md
@@ -165,6 +165,16 @@ STAR-Net文本识别模型推理，可以执行如下命令：
 ```
 python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/starnet/" --rec_image_shape="3, 32, 100" --rec_char_type="en"
 ```
+### 3.基于Attention损失的识别模型推理
+基于Attention损失的识别模型与ctc不同，需要额外设置识别算法参数 --rec_algorithm="RARE"
+RARE 文本识别模型推理，可以执行如下命令：
+```
+python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_336.png" --rec_model_dir="./inference/sare/" --rec_image_shape="3, 32, 100" --rec_char_type="en" --rec_algorithm="RARE"
+```
 ![](imgs_words_en/word_336.png)
 执行命令后，上面图像的识别结果如下：

--- a/doc/installation.md
+++ b/doc/installation.md
@@ -8,6 +8,8 @@ PaddleOCR 工作环境
 建议使用我们提供的docker运行PaddleOCR，有关docker使用请参考[链接](https://docs.docker.com/get-started/)。
+*如您希望使用 mac 或 windows直接运行预测代码，可以从第2步开始执行。*
 1. （建议）准备docker环境。第一次使用这个镜像，会自动下载该镜像，请耐心等待。
 ```
 # 切换到工作目录下
@@ -54,6 +56,10 @@ python3 -m pip install paddlepaddle-gpu==1.7.2.post97 -i https://pypi.tuna.tsing
 如果您的机器安装的是CUDA10，请运行以下命令安装
 python3 -m pip install paddlepaddle-gpu==1.7.2.post107 -i https://pypi.tuna.tsinghua.edu.cn/simple
+如果您的机器是CPU，请运行以下命令安装
+python3 -m pip install paddlepaddle==1.7.2 -i https://pypi.tuna.tsinghua.edu.cn/simple
 更多的版本需求，请参照[安装文档](https://www.paddlepaddle.org.cn/install/quick)中的说明进行操作。
 ```

--- a/doc/recognition.md
+++ b/doc/recognition.md
@@ -41,6 +41,8 @@ PaddleOCR 提供了一份用于训练 icdar2015 数据集的标签文件，通
 wget -P ./train_data/ic15_data  https://paddleocr.bj.bcebos.com/dataset/rec_gt_train.txt
 # 测试集标签
 wget -P ./train_data/ic15_data  https://paddleocr.bj.bcebos.com/dataset/rec_gt_test.txt
 ```
 最终训练集应有如下文件结构：
@@ -111,6 +113,8 @@ tar -xf rec_mv3_none_bilstm_ctc.tar && rm -rf rec_mv3_none_bilstm_ctc.tar
 开始训练:
+*如果您安装的是cpu版本，请将配置文件中的 `use_gpu` 字段修改为false*
 ```
 # 设置PYTHONPATH路径
 export PYTHONPATH=$PYTHONPATH:.
@@ -168,10 +172,11 @@ Global:
 评估数据集可以通过 `configs/rec/rec_icdar15_reader.yml`  修改EvalReader中的 `label_file_path` 设置。
+*注意* 评估时必须确保配置文件中 infer_img 字段为空
 ```
 export CUDA_VISIBLE_DEVICES=0
 # GPU 评估， Global.checkpoints 为待测权重
-python3 tools/eval.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
+python3 tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
 ```
 ### 预测
@@ -184,7 +189,7 @@ python3 tools/eval.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkp
 ```
 # 预测英文结果
-python3 tools/infer_rec.py -c configs/rec/rec_chinese_lite_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/en/word_1.jpg
+python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png
 ```
 预测图片：

--- a/ppocr/data/det/dataset_traversal.py
+++ b/ppocr/data/det/dataset_traversal.py
@@ -61,8 +61,6 @@ class TrainReader(object):
                if len(batch_outs) == self.batch_size:
                    yield batch_outs
                    batch_outs = []
-            if len(batch_outs) != 0:
-                yield batch_outs
        return batch_iter_reader

--- a/ppocr/data/det/db_process.py
+++ b/ppocr/data/det/db_process.py
@@ -17,6 +17,8 @@ import cv2
 import numpy as np
 import json
 import sys
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()
 from .data_augment import AugmentData
 from .random_crop_data import RandomCropData
@@ -100,6 +102,7 @@ class DBProcessTrain(object):
        img_path, gt_label = self.convert_label_infor(label_infor)
        imgvalue = cv2.imread(img_path)
        if imgvalue is None:
+            logger.info("{} does not exist!".format(img_path))
            return None
        data = self.make_data_dict(imgvalue, gt_label)
        data = AugmentData(data)