Commit 315c07d5 authored by LDOUBLEV's avatar LDOUBLEV
Browse files

fix conflicts

parents 551377a0 0ec11a29
...@@ -324,7 +324,6 @@ Eval: ...@@ -324,7 +324,6 @@ Eval:
评估数据集可以通过 `configs/rec/rec_icdar15_train.yml` 修改Eval中的 `label_file_path` 设置。 评估数据集可以通过 `configs/rec/rec_icdar15_train.yml` 修改Eval中的 `label_file_path` 设置。
*注意* 评估时必须确保配置文件中 infer_img 字段为空
``` ```
# GPU 评估, Global.checkpoints 为待测权重 # GPU 评估, Global.checkpoints 为待测权重
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
...@@ -342,7 +341,7 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec ...@@ -342,7 +341,7 @@ python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec
``` ```
# 预测英文结果 # 预测英文结果
python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/en/word_1.png python3 tools/infer_rec.py -c configs/rec/rec_icdar15_train.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.png
``` ```
预测图片: 预测图片:
...@@ -361,7 +360,7 @@ infer_img: doc/imgs_words/en/word_1.png ...@@ -361,7 +360,7 @@ infer_img: doc/imgs_words/en/word_1.png
``` ```
# 预测中文结果 # 预测中文结果
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words/ch/word_1.jpg python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
``` ```
预测图片: 预测图片:
......
...@@ -11,11 +11,12 @@ ...@@ -11,11 +11,12 @@
} }
2. DB: 2. DB:
@article{liao2019real, @inproceedings{liao2020real,
title={Real-time Scene Text Detection with Differentiable Binarization}, title={Real-Time Scene Text Detection with Differentiable Binarization.},
author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang}, author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang},
journal={arXiv preprint arXiv:1911.08947}, booktitle={AAAI},
year={2019} pages={11474--11481},
year={2020}
} }
3. DTRB: 3. DTRB:
...@@ -37,10 +38,11 @@ ...@@ -37,10 +38,11 @@
} }
5. SRN: 5. SRN:
@article{yu2020towards, @inproceedings{yu2020towards,
title={Towards Accurate Scene Text Recognition with Semantic Reasoning Networks}, title={Towards accurate scene text recognition with semantic reasoning networks},
author={Yu, Deli and Li, Xuan and Zhang, Chengquan and Han, Junyu and Liu, Jingtuo and Ding, Errui}, author={Yu, Deli and Li, Xuan and Zhang, Chengquan and Liu, Tao and Han, Junyu and Liu, Jingtuo and Ding, Errui},
journal={arXiv preprint arXiv:2003.12294}, booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={12113--12122},
year={2020} year={2020}
} }
...@@ -52,4 +54,62 @@ ...@@ -52,4 +54,62 @@
pages={9086--9095}, pages={9086--9095},
year={2019} year={2019}
} }
7. CRNN:
@article{shi2016end,
title={An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition},
author={Shi, Baoguang and Bai, Xiang and Yao, Cong},
journal={IEEE transactions on pattern analysis and machine intelligence},
volume={39},
number={11},
pages={2298--2304},
year={2016},
publisher={IEEE}
}
8. FPGM:
@inproceedings{he2019filter,
title={Filter pruning via geometric median for deep convolutional neural networks acceleration},
author={He, Yang and Liu, Ping and Wang, Ziwei and Hu, Zhilan and Yang, Yi},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={4340--4349},
year={2019}
}
9. PACT:
@article{choi2018pact,
title={Pact: Parameterized clipping activation for quantized neural networks},
author={Choi, Jungwook and Wang, Zhuo and Venkataramani, Swagath and Chuang, Pierce I-Jen and Srinivasan, Vijayalakshmi and Gopalakrishnan, Kailash},
journal={arXiv preprint arXiv:1805.06085},
year={2018}
}
10.Rosetta
@inproceedings{borisyuk2018rosetta,
title={Rosetta: Large scale system for text detection and recognition in images},
author={Borisyuk, Fedor and Gordo, Albert and Sivakumar, Viswanath},
booktitle={Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery \& Data Mining},
pages={71--79},
year={2018}
}
11.STAR-Net
@inproceedings{liu2016star,
title={STAR-Net: A SpaTial Attention Residue Network for Scene Text Recognition.},
author={Liu, Wei and Chen, Chaofeng and Wong, Kwan-Yee K and Su, Zhizhong and Han, Junyu},
booktitle={BMVC},
volume={2},
pages={7},
year={2016}
}
12.RARE
@inproceedings{shi2016robust,
title={Robust scene text recognition with automatic rectification},
author={Shi, Baoguang and Wang, Xinggang and Lyu, Pengyuan and Yao, Cong and Bai, Xiang},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={4168--4176},
year={2016}
}
``` ```
...@@ -11,9 +11,9 @@ This tutorial lists the text detection algorithms and text recognition algorithm ...@@ -11,9 +11,9 @@ This tutorial lists the text detection algorithms and text recognition algorithm
### 1. Text Detection Algorithm ### 1. Text Detection Algorithm
PaddleOCR open source text detection algorithms list: PaddleOCR open source text detection algorithms list:
- [x] EAST([paper](https://arxiv.org/abs/1704.03155)) - [x] EAST([paper](https://arxiv.org/abs/1704.03155))[2]
- [x] DB([paper](https://arxiv.org/abs/1911.08947)) - [x] DB([paper](https://arxiv.org/abs/1911.08947))[1]
- [x] SAST([paper](https://arxiv.org/abs/1908.05498) )(Baidu Self-Research) - [x] SAST([paper](https://arxiv.org/abs/1908.05498))[4]
On the ICDAR2015 dataset, the text detection result is as follows: On the ICDAR2015 dataset, the text detection result is as follows:
...@@ -39,11 +39,11 @@ For the training guide and use of PaddleOCR text detection algorithms, please re ...@@ -39,11 +39,11 @@ For the training guide and use of PaddleOCR text detection algorithms, please re
### 2. Text Recognition Algorithm ### 2. Text Recognition Algorithm
PaddleOCR open-source text recognition algorithms list: PaddleOCR open-source text recognition algorithms list:
- [x] CRNN([paper](https://arxiv.org/abs/1507.05717)) - [x] CRNN([paper](https://arxiv.org/abs/1507.05717))[7]
- [x] Rosetta([paper](https://arxiv.org/abs/1910.05085)) - [x] Rosetta([paper](https://arxiv.org/abs/1910.05085))[10]
- [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html)) coming soon - [ ] STAR-Net([paper](http://www.bmva.org/bmvc/2016/papers/paper043/index.html))[11] coming soon
- [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1)) coming soon - [ ] RARE([paper](https://arxiv.org/abs/1603.03915v1))[12] coming soon
- [ ] SRN([paper](https://arxiv.org/abs/2003.12294) )(Baidu Self-Research) coming soon - [ ] SRN([paper](https://arxiv.org/abs/2003.12294))[5] coming soon
Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow: Refer to [DTRB](https://arxiv.org/abs/1904.01906), the training and evaluation result of these above text recognition (using MJSynth and SynthText for training, evaluate on IIIT, SVT, IC03, IC13, IC15, SVTP, CUTE) is as follow:
......
...@@ -119,7 +119,7 @@ Use `Global.infer_img` to specify the path of the predicted picture or folder, a ...@@ -119,7 +119,7 @@ Use `Global.infer_img` to specify the path of the predicted picture or folder, a
``` ```
# Predict English results # Predict English results
python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.checkpoints={path/to/weights}/best_accuracy Global.infer_img=doc/imgs_words_en/word_10.png python3 tools/infer_cls.py -c configs/cls/cls_mv3.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words_en/word_10.png
``` ```
Input image: Input image:
......
...@@ -113,16 +113,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat ...@@ -113,16 +113,16 @@ python3 tools/eval.py -c configs/det/det_mv3_db.yml -o Global.checkpoints="{pat
Test the detection result on a single image: Test the detection result on a single image:
```shell ```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
``` ```
When testing the DB model, adjust the post-processing threshold: When testing the DB model, adjust the post-processing threshold:
```shell ```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.checkpoints="./output/det_db/best_accuracy" PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5 python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/img_10.jpg" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false PostProcess.box_thresh=0.6 PostProcess.unclip_ratio=1.5
``` ```
Test the detection result on all images in the folder: Test the detection result on all images in the folder:
```shell ```shell
python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.checkpoints="./output/det_db/best_accuracy" python3 tools/infer_det.py -c configs/det/det_mv3_db.yml -o Global.infer_img="./doc/imgs_en/" Global.pretrained_model="./output/det_db/best_accuracy" Global.load_static_weights=false
``` ```
...@@ -255,15 +255,18 @@ The following will introduce the lightweight Chinese recognition model inference ...@@ -255,15 +255,18 @@ The following will introduce the lightweight Chinese recognition model inference
For lightweight Chinese recognition model inference, you can execute the following commands: For lightweight Chinese recognition model inference, you can execute the following commands:
``` ```
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words/ch/word_4.jpg" --rec_model_dir="./inference/rec_crnn/" # download CRNN text recognition inference model
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar
tar xf ch_ppocr_mobile_v2.0_rec_infer.tar
python3 tools/infer/predict_rec.py --image_dir="./doc/imgs_words_en/word_10.png" --rec_model_dir="ch_ppocr_mobile_v2.0_rec_infer"
``` ```
![](../imgs_words/ch/word_4.jpg) ![](../imgs_words_en/word_10.png)
After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen. After executing the command, the prediction results (recognized text and score) of the above image will be printed on the screen.
```bash ```bash
Predicts of ./doc/imgs_words/ch/word_4.jpg:('实力活力', 0.98458153) Predicts of ./doc/imgs_words_en/word_10.png:('PAIN', 0.9897658)
``` ```
<a name="CTC-BASED_RECOGNITION"></a> <a name="CTC-BASED_RECOGNITION"></a>
...@@ -339,7 +342,12 @@ For angle classification model inference, you can execute the following commands ...@@ -339,7 +342,12 @@ For angle classification model inference, you can execute the following commands
``` ```
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="./inference/cls/" python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="./inference/cls/"
``` ```
```
# download text angle class inference model:
wget https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar
tar xf ch_ppocr_mobile_v2.0_cls_infer.tar
python3 tools/infer/predict_cls.py --image_dir="./doc/imgs_words_en/word_10.png" --cls_model_dir="ch_ppocr_mobile_v2.0_cls_infer"
```
![](../imgs_words_en/word_10.png) ![](../imgs_words_en/word_10.png)
After executing the command, the prediction results (classification angle and score) of the above image will be printed on the screen. After executing the command, the prediction results (classification angle and score) of the above image will be printed on the screen.
......
...@@ -317,11 +317,11 @@ Eval: ...@@ -317,11 +317,11 @@ Eval:
<a name="EVALUATION"></a> <a name="EVALUATION"></a>
### EVALUATION ### EVALUATION
The evaluation data set can be modified via `configs/rec/rec_icdar15_reader.yml` setting of `label_file_path` in EvalReader. The evaluation dataset can be set by modifying the `Eval.dataset.label_file_list` field in the `configs/rec/rec_icdar15_train.yml` file.
``` ```
# GPU evaluation, Global.checkpoints is the weight to be tested # GPU evaluation, Global.checkpoints is the weight to be tested
python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_reader.yml -o Global.checkpoints={path/to/weights}/best_accuracy python3 -m paddle.distributed.launch --gpus '0' tools/eval.py -c configs/rec/rec_icdar15_train.yml -o Global.checkpoints={path/to/weights}/best_accuracy
``` ```
<a name="PREDICTION"></a> <a name="PREDICTION"></a>
...@@ -336,7 +336,7 @@ The default prediction picture is stored in `infer_img`, and the weight is speci ...@@ -336,7 +336,7 @@ The default prediction picture is stored in `infer_img`, and the weight is speci
``` ```
# Predict English results # Predict English results
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/en/word_1.jpg python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/en/word_1.jpg
``` ```
Input image: Input image:
...@@ -354,7 +354,7 @@ The configuration file used for prediction must be consistent with the training. ...@@ -354,7 +354,7 @@ The configuration file used for prediction must be consistent with the training.
``` ```
# Predict Chinese results # Predict Chinese results
python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.checkpoints={path/to/weights}/best_accuracy TestReader.infer_img=doc/imgs_words/ch/word_1.jpg python3 tools/infer_rec.py -c configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml -o Global.pretrained_model={path/to/weights}/best_accuracy Global.load_static_weights=false Global.infer_img=doc/imgs_words/ch/word_1.jpg
``` ```
Input image: Input image:
......
...@@ -262,8 +262,8 @@ class PaddleOCR(predict_system.TextSystem): ...@@ -262,8 +262,8 @@ class PaddleOCR(predict_system.TextSystem):
logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL)) logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
sys.exit(0) sys.exit(0)
postprocess_params.rec_char_dict_path = Path( postprocess_params.rec_char_dict_path = str(
__file__).parent / postprocess_params.rec_char_dict_path Path(__file__).parent / postprocess_params.rec_char_dict_path)
# init det_model and rec_model # init det_model and rec_model
super().__init__(postprocess_params) super().__init__(postprocess_params)
......
...@@ -45,7 +45,6 @@ class BalanceLoss(nn.Layer): ...@@ -45,7 +45,6 @@ class BalanceLoss(nn.Layer):
self.balance_loss = balance_loss self.balance_loss = balance_loss
self.main_loss_type = main_loss_type self.main_loss_type = main_loss_type
self.negative_ratio = negative_ratio self.negative_ratio = negative_ratio
self.main_loss_type = main_loss_type
self.return_origin = return_origin self.return_origin = return_origin
self.eps = eps self.eps = eps
......
...@@ -19,7 +19,6 @@ from __future__ import print_function ...@@ -19,7 +19,6 @@ from __future__ import print_function
import paddle import paddle
from paddle import nn from paddle import nn
from .det_basic_loss import DiceLoss from .det_basic_loss import DiceLoss
import paddle.fluid as fluid
import numpy as np import numpy as np
...@@ -27,9 +26,7 @@ class SASTLoss(nn.Layer): ...@@ -27,9 +26,7 @@ class SASTLoss(nn.Layer):
""" """
""" """
def __init__(self, def __init__(self, eps=1e-6, **kwargs):
eps=1e-6,
**kwargs):
super(SASTLoss, self).__init__() super(SASTLoss, self).__init__()
self.dice_loss = DiceLoss(eps=eps) self.dice_loss = DiceLoss(eps=eps)
...@@ -53,10 +50,12 @@ class SASTLoss(nn.Layer): ...@@ -53,10 +50,12 @@ class SASTLoss(nn.Layer):
score_loss = 1.0 - 2 * intersection / (union + 1e-5) score_loss = 1.0 - 2 * intersection / (union + 1e-5)
#border loss #border loss
l_border_split, l_border_norm = paddle.split(l_border, num_or_sections=[4, 1], axis=1) l_border_split, l_border_norm = paddle.split(
l_border, num_or_sections=[4, 1], axis=1)
f_border_split = f_border f_border_split = f_border
border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1]) border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1])
l_border_norm_split = paddle.expand(x=l_border_norm, shape=border_ex_shape) l_border_norm_split = paddle.expand(
x=l_border_norm, shape=border_ex_shape)
l_border_score = paddle.expand(x=l_score, shape=border_ex_shape) l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)
l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape) l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)
...@@ -72,7 +71,8 @@ class SASTLoss(nn.Layer): ...@@ -72,7 +71,8 @@ class SASTLoss(nn.Layer):
(paddle.sum(l_border_score * l_border_mask) + 1e-5) (paddle.sum(l_border_score * l_border_mask) + 1e-5)
#tvo_loss #tvo_loss
l_tvo_split, l_tvo_norm = paddle.split(l_tvo, num_or_sections=[8, 1], axis=1) l_tvo_split, l_tvo_norm = paddle.split(
l_tvo, num_or_sections=[8, 1], axis=1)
f_tvo_split = f_tvo f_tvo_split = f_tvo
tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1]) tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1])
l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape) l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape)
...@@ -91,7 +91,8 @@ class SASTLoss(nn.Layer): ...@@ -91,7 +91,8 @@ class SASTLoss(nn.Layer):
(paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5) (paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5)
#tco_loss #tco_loss
l_tco_split, l_tco_norm = paddle.split(l_tco, num_or_sections=[2, 1], axis=1) l_tco_split, l_tco_norm = paddle.split(
l_tco, num_or_sections=[2, 1], axis=1)
f_tco_split = f_tco f_tco_split = f_tco
tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1]) tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1])
l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape) l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape)
...@@ -109,7 +110,6 @@ class SASTLoss(nn.Layer): ...@@ -109,7 +110,6 @@ class SASTLoss(nn.Layer):
tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \ tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \
(paddle.sum(l_tco_score * l_tco_mask) + 1e-5) (paddle.sum(l_tco_score * l_tco_mask) + 1e-5)
# total loss # total loss
tvo_lw, tco_lw = 1.5, 1.5 tvo_lw, tco_lw = 1.5, 1.5
score_lw, border_lw = 1.0, 1.0 score_lw, border_lw = 1.0, 1.0
......
...@@ -32,7 +32,7 @@ setup( ...@@ -32,7 +32,7 @@ setup(
package_dir={'paddleocr': ''}, package_dir={'paddleocr': ''},
include_package_data=True, include_package_data=True,
entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]}, entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
version='2.0.1', version='2.0.2',
install_requires=requirements, install_requires=requirements,
license='Apache License 2.0', license='Apache License 2.0',
description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices', description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
......
...@@ -24,7 +24,6 @@ import numpy as np ...@@ -24,7 +24,6 @@ import numpy as np
import math import math
import time import time
import traceback import traceback
import paddle.fluid as fluid
import tools.infer.utility as utility import tools.infer.utility as utility
from ppocr.postprocess import build_post_process from ppocr.postprocess import build_post_process
...@@ -39,7 +38,6 @@ class TextClassifier(object): ...@@ -39,7 +38,6 @@ class TextClassifier(object):
self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")] self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
self.cls_batch_num = args.cls_batch_num self.cls_batch_num = args.cls_batch_num
self.cls_thresh = args.cls_thresh self.cls_thresh = args.cls_thresh
self.use_zero_copy_run = args.use_zero_copy_run
postprocess_params = { postprocess_params = {
'name': 'ClsPostProcess', 'name': 'ClsPostProcess',
"label_list": args.label_list, "label_list": args.label_list,
...@@ -99,12 +97,8 @@ class TextClassifier(object): ...@@ -99,12 +97,8 @@ class TextClassifier(object):
norm_img_batch = norm_img_batch.copy() norm_img_batch = norm_img_batch.copy()
starttime = time.time() starttime = time.time()
if self.use_zero_copy_run:
self.input_tensor.copy_from_cpu(norm_img_batch) self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.zero_copy_run() self.predictor.run()
else:
norm_img_batch = fluid.core.PaddleTensor(norm_img_batch)
self.predictor.run([norm_img_batch])
prob_out = self.output_tensors[0].copy_to_cpu() prob_out = self.output_tensors[0].copy_to_cpu()
cls_result = self.postprocess_op(prob_out) cls_result = self.postprocess_op(prob_out)
elapse += time.time() - starttime elapse += time.time() - starttime
...@@ -143,10 +137,11 @@ def main(args): ...@@ -143,10 +137,11 @@ def main(args):
"Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ") "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
exit() exit()
for ino in range(len(img_list)): for ino in range(len(img_list)):
logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], cls_res[ logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
ino])) cls_res[ino]))
logger.info("Total predict time for {} images, cost: {:.3f}".format( logger.info("Total predict time for {} images, cost: {:.3f}".format(
len(img_list), predict_time)) len(img_list), predict_time))
if __name__ == "__main__": if __name__ == "__main__":
main(utility.parse_args()) main(utility.parse_args())
...@@ -22,7 +22,6 @@ import cv2 ...@@ -22,7 +22,6 @@ import cv2
import numpy as np import numpy as np
import time import time
import sys import sys
import paddle
import tools.infer.utility as utility import tools.infer.utility as utility
from ppocr.utils.logging import get_logger from ppocr.utils.logging import get_logger
...@@ -35,8 +34,8 @@ logger = get_logger() ...@@ -35,8 +34,8 @@ logger = get_logger()
class TextDetector(object): class TextDetector(object):
def __init__(self, args): def __init__(self, args):
self.args = args
self.det_algorithm = args.det_algorithm self.det_algorithm = args.det_algorithm
self.use_zero_copy_run = args.use_zero_copy_run
pre_process_list = [{ pre_process_list = [{
'DetResizeForTest': { 'DetResizeForTest': {
'limit_side_len': args.det_limit_side_len, 'limit_side_len': args.det_limit_side_len,
...@@ -70,6 +69,11 @@ class TextDetector(object): ...@@ -70,6 +69,11 @@ class TextDetector(object):
postprocess_params["cover_thresh"] = args.det_east_cover_thresh postprocess_params["cover_thresh"] = args.det_east_cover_thresh
postprocess_params["nms_thresh"] = args.det_east_nms_thresh postprocess_params["nms_thresh"] = args.det_east_nms_thresh
elif self.det_algorithm == "SAST": elif self.det_algorithm == "SAST":
pre_process_list[0] = {
'DetResizeForTest': {
'resize_long': args.det_limit_side_len
}
}
postprocess_params['name'] = 'SASTPostProcess' postprocess_params['name'] = 'SASTPostProcess'
postprocess_params["score_thresh"] = args.det_sast_score_thresh postprocess_params["score_thresh"] = args.det_sast_score_thresh
postprocess_params["nms_thresh"] = args.det_sast_nms_thresh postprocess_params["nms_thresh"] = args.det_sast_nms_thresh
...@@ -157,12 +161,8 @@ class TextDetector(object): ...@@ -157,12 +161,8 @@ class TextDetector(object):
img = img.copy() img = img.copy()
starttime = time.time() starttime = time.time()
if self.use_zero_copy_run:
self.input_tensor.copy_from_cpu(img) self.input_tensor.copy_from_cpu(img)
self.predictor.zero_copy_run() self.predictor.run()
else:
im = paddle.fluid.core.PaddleTensor(img)
self.predictor.run([im])
outputs = [] outputs = []
for output_tensor in self.output_tensors: for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu() output = output_tensor.copy_to_cpu()
......
...@@ -23,7 +23,6 @@ import numpy as np ...@@ -23,7 +23,6 @@ import numpy as np
import math import math
import time import time
import traceback import traceback
import paddle.fluid as fluid
import tools.infer.utility as utility import tools.infer.utility as utility
from ppocr.postprocess import build_post_process from ppocr.postprocess import build_post_process
...@@ -39,7 +38,6 @@ class TextRecognizer(object): ...@@ -39,7 +38,6 @@ class TextRecognizer(object):
self.character_type = args.rec_char_type self.character_type = args.rec_char_type
self.rec_batch_num = args.rec_batch_num self.rec_batch_num = args.rec_batch_num
self.rec_algorithm = args.rec_algorithm self.rec_algorithm = args.rec_algorithm
self.use_zero_copy_run = args.use_zero_copy_run
postprocess_params = { postprocess_params = {
'name': 'CTCLabelDecode', 'name': 'CTCLabelDecode',
"character_type": args.rec_char_type, "character_type": args.rec_char_type,
...@@ -101,12 +99,8 @@ class TextRecognizer(object): ...@@ -101,12 +99,8 @@ class TextRecognizer(object):
norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = np.concatenate(norm_img_batch)
norm_img_batch = norm_img_batch.copy() norm_img_batch = norm_img_batch.copy()
starttime = time.time() starttime = time.time()
if self.use_zero_copy_run:
self.input_tensor.copy_from_cpu(norm_img_batch) self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.zero_copy_run() self.predictor.run()
else:
norm_img_batch = fluid.core.PaddleTensor(norm_img_batch)
self.predictor.run([norm_img_batch])
outputs = [] outputs = []
for output_tensor in self.output_tensors: for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu() output = output_tensor.copy_to_cpu()
...@@ -145,8 +139,8 @@ def main(args): ...@@ -145,8 +139,8 @@ def main(args):
"Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ") "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
exit() exit()
for ino in range(len(img_list)): for ino in range(len(img_list)):
logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], rec_res[ logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
ino])) rec_res[ino]))
logger.info("Total predict time for {} images, cost: {:.3f}".format( logger.info("Total predict time for {} images, cost: {:.3f}".format(
len(img_list), predict_time)) len(img_list), predict_time))
......
...@@ -20,8 +20,7 @@ import numpy as np ...@@ -20,8 +20,7 @@ import numpy as np
import json import json
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
import math import math
from paddle.fluid.core import AnalysisConfig from paddle import inference
from paddle.fluid.core import create_paddle_predictor
def parse_args(): def parse_args():
...@@ -33,6 +32,7 @@ def parse_args(): ...@@ -33,6 +32,7 @@ def parse_args():
parser.add_argument("--use_gpu", type=str2bool, default=True) parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--ir_optim", type=str2bool, default=True) parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False) parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--use_fp16", type=str2bool, default=False)
parser.add_argument("--gpu_mem", type=int, default=8000) parser.add_argument("--gpu_mem", type=int, default=8000)
# params for text detector # params for text detector
...@@ -46,7 +46,7 @@ def parse_args(): ...@@ -46,7 +46,7 @@ def parse_args():
parser.add_argument("--det_db_thresh", type=float, default=0.3) parser.add_argument("--det_db_thresh", type=float, default=0.3)
parser.add_argument("--det_db_box_thresh", type=float, default=0.5) parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6) parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6)
parser.add_argument("--max_batch_size", type=int, default=10)
# EAST parmas # EAST parmas
parser.add_argument("--det_east_score_thresh", type=float, default=0.8) parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
parser.add_argument("--det_east_cover_thresh", type=float, default=0.1) parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
...@@ -62,7 +62,7 @@ def parse_args(): ...@@ -62,7 +62,7 @@ def parse_args():
parser.add_argument("--rec_model_dir", type=str) parser.add_argument("--rec_model_dir", type=str)
parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320") parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
parser.add_argument("--rec_char_type", type=str, default='ch') parser.add_argument("--rec_char_type", type=str, default='ch')
parser.add_argument("--rec_batch_num", type=int, default=6) parser.add_argument("--rec_batch_num", type=int, default=1)
parser.add_argument("--max_text_length", type=int, default=25) parser.add_argument("--max_text_length", type=int, default=25)
parser.add_argument( parser.add_argument(
"--rec_char_dict_path", "--rec_char_dict_path",
...@@ -78,12 +78,10 @@ def parse_args(): ...@@ -78,12 +78,10 @@ def parse_args():
parser.add_argument("--cls_model_dir", type=str) parser.add_argument("--cls_model_dir", type=str)
parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192") parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
parser.add_argument("--label_list", type=list, default=['0', '180']) parser.add_argument("--label_list", type=list, default=['0', '180'])
parser.add_argument("--cls_batch_num", type=int, default=30) parser.add_argument("--cls_batch_num", type=int, default=6)
parser.add_argument("--cls_thresh", type=float, default=0.9) parser.add_argument("--cls_thresh", type=float, default=0.9)
parser.add_argument("--enable_mkldnn", type=str2bool, default=False) parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
parser.add_argument("--use_zero_copy_run", type=str2bool, default=False)
parser.add_argument("--use_pdserving", type=str2bool, default=False) parser.add_argument("--use_pdserving", type=str2bool, default=False)
return parser.parse_args() return parser.parse_args()
...@@ -109,10 +107,15 @@ def create_predictor(args, mode, logger): ...@@ -109,10 +107,15 @@ def create_predictor(args, mode, logger):
logger.info("not find params file path {}".format(params_file_path)) logger.info("not find params file path {}".format(params_file_path))
sys.exit(0) sys.exit(0)
config = AnalysisConfig(model_file_path, params_file_path) config = inference.Config(model_file_path, params_file_path)
if args.use_gpu: if args.use_gpu:
config.enable_use_gpu(args.gpu_mem, 0) config.enable_use_gpu(args.gpu_mem, 0)
if args.use_tensorrt:
config.enable_tensorrt_engine(
precision_mode=inference.PrecisionType.Half
if args.use_fp16 else inference.PrecisionType.Float32,
max_batch_size=args.max_batch_size)
else: else:
config.disable_gpu() config.disable_gpu()
config.set_cpu_math_library_num_threads(6) config.set_cpu_math_library_num_threads(6)
...@@ -124,20 +127,18 @@ def create_predictor(args, mode, logger): ...@@ -124,20 +127,18 @@ def create_predictor(args, mode, logger):
# config.enable_memory_optim() # config.enable_memory_optim()
config.disable_glog_info() config.disable_glog_info()
if args.use_zero_copy_run:
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
config.switch_use_feed_fetch_ops(False) config.switch_use_feed_fetch_ops(False)
else:
config.switch_use_feed_fetch_ops(True)
predictor = create_paddle_predictor(config) # create predictor
predictor = inference.create_predictor(config)
input_names = predictor.get_input_names() input_names = predictor.get_input_names()
for name in input_names: for name in input_names:
input_tensor = predictor.get_input_tensor(name) input_tensor = predictor.get_input_handle(name)
output_names = predictor.get_output_names() output_names = predictor.get_output_names()
output_tensors = [] output_tensors = []
for output_name in output_names: for output_name in output_names:
output_tensor = predictor.get_output_tensor(output_name) output_tensor = predictor.get_output_handle(output_name)
output_tensors.append(output_tensor) output_tensors.append(output_tensor)
return predictor, input_tensor, output_tensors return predictor, input_tensor, output_tensors
......
...@@ -131,7 +131,7 @@ def check_gpu(use_gpu): ...@@ -131,7 +131,7 @@ def check_gpu(use_gpu):
"model on CPU" "model on CPU"
try: try:
if use_gpu and not paddle.fluid.is_compiled_with_cuda(): if use_gpu and not paddle.is_compiled_with_cuda():
print(err) print(err)
sys.exit(1) sys.exit(1)
except Exception as e: except Exception as e:
...@@ -332,7 +332,7 @@ def eval(model, valid_dataloader, post_process_class, eval_class): ...@@ -332,7 +332,7 @@ def eval(model, valid_dataloader, post_process_class, eval_class):
return metirc return metirc
def preprocess(): def preprocess(is_train=False):
FLAGS = ArgsParser().parse_args() FLAGS = ArgsParser().parse_args()
config = load_config(FLAGS.config) config = load_config(FLAGS.config)
merge_config(FLAGS.opt) merge_config(FLAGS.opt)
...@@ -350,15 +350,17 @@ def preprocess(): ...@@ -350,15 +350,17 @@ def preprocess():
device = paddle.set_device(device) device = paddle.set_device(device)
config['Global']['distributed'] = dist.get_world_size() != 1 config['Global']['distributed'] = dist.get_world_size() != 1
if is_train:
# save_config # save_config
save_model_dir = config['Global']['save_model_dir'] save_model_dir = config['Global']['save_model_dir']
os.makedirs(save_model_dir, exist_ok=True) os.makedirs(save_model_dir, exist_ok=True)
with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f: with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
yaml.dump(dict(config), f, default_flow_style=False, sort_keys=False) yaml.dump(
dict(config), f, default_flow_style=False, sort_keys=False)
logger = get_logger( log_file = '{}/train.log'.format(save_model_dir)
name='root', log_file='{}/train.log'.format(save_model_dir)) else:
log_file = None
logger = get_logger(name='root', log_file=log_file)
if config['Global']['use_visualdl']: if config['Global']['use_visualdl']:
from visualdl import LogWriter from visualdl import LogWriter
vdl_writer_path = '{}/vdl/'.format(save_model_dir) vdl_writer_path = '{}/vdl/'.format(save_model_dir)
......
...@@ -110,6 +110,6 @@ def test_reader(config, device, logger): ...@@ -110,6 +110,6 @@ def test_reader(config, device, logger):
if __name__ == '__main__': if __name__ == '__main__':
config, device, logger, vdl_writer = program.preprocess() config, device, logger, vdl_writer = program.preprocess(is_train=True)
main(config, device, logger, vdl_writer) main(config, device, logger, vdl_writer)
# test_reader(config, device, logger) # test_reader(config, device, logger)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment