"...git@developer.sourcefind.cn:chenpangpang/open-webui.git" did not exist on "2aecd7d0b9fef162d9f8ae93278fadbbbfa9296d"
Commit 00586b9c authored by Leif's avatar Leif
Browse files

Merge remote-tracking branch 'origin/dygraph' into dygraph

parents 5d24bdc2 b8e7c6ce
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <gflags/gflags.h>
// common args
DECLARE_bool(use_gpu);
DECLARE_bool(use_tensorrt);
DECLARE_int32(gpu_id);
DECLARE_int32(gpu_mem);
DECLARE_int32(cpu_threads);
DECLARE_bool(enable_mkldnn);
DECLARE_string(precision);
DECLARE_bool(benchmark);
DECLARE_string(output);
DECLARE_string(image_dir);
DECLARE_string(type);
// detection related
DECLARE_string(det_model_dir);
DECLARE_int32(max_side_len);
DECLARE_double(det_db_thresh);
DECLARE_double(det_db_box_thresh);
DECLARE_double(det_db_unclip_ratio);
DECLARE_bool(use_dilation);
DECLARE_string(det_db_score_mode);
DECLARE_bool(visualize);
// classification related
DECLARE_bool(use_angle_cls);
DECLARE_string(cls_model_dir);
DECLARE_double(cls_thresh);
DECLARE_int32(cls_batch_num);
// recognition related
DECLARE_string(rec_model_dir);
DECLARE_int32(rec_batch_num);
DECLARE_string(rec_char_dict_path);
// forward related
DECLARE_bool(det);
DECLARE_bool(rec);
DECLARE_bool(cls);
...@@ -42,7 +42,8 @@ public: ...@@ -42,7 +42,8 @@ public:
const int &gpu_id, const int &gpu_mem, const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads, const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const double &cls_thresh, const bool &use_mkldnn, const double &cls_thresh,
const bool &use_tensorrt, const std::string &precision) { const bool &use_tensorrt, const std::string &precision,
const int &cls_batch_num) {
this->use_gpu_ = use_gpu; this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id; this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem; this->gpu_mem_ = gpu_mem;
...@@ -52,14 +53,17 @@ public: ...@@ -52,14 +53,17 @@ public:
this->cls_thresh = cls_thresh; this->cls_thresh = cls_thresh;
this->use_tensorrt_ = use_tensorrt; this->use_tensorrt_ = use_tensorrt;
this->precision_ = precision; this->precision_ = precision;
this->cls_batch_num_ = cls_batch_num;
LoadModel(model_dir); LoadModel(model_dir);
} }
double cls_thresh = 0.9;
// Load Paddle inference model // Load Paddle inference model
void LoadModel(const std::string &model_dir); void LoadModel(const std::string &model_dir);
cv::Mat Run(cv::Mat &img); void Run(std::vector<cv::Mat> img_list, std::vector<int> &cls_labels,
std::vector<float> &cls_scores, std::vector<double> &times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<Predictor> predictor_;
...@@ -69,17 +73,17 @@ private: ...@@ -69,17 +73,17 @@ private:
int gpu_mem_ = 4000; int gpu_mem_ = 4000;
int cpu_math_library_num_threads_ = 4; int cpu_math_library_num_threads_ = 4;
bool use_mkldnn_ = false; bool use_mkldnn_ = false;
double cls_thresh = 0.5;
std::vector<float> mean_ = {0.5f, 0.5f, 0.5f}; std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
std::vector<float> scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f}; std::vector<float> scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
bool is_scale_ = true; bool is_scale_ = true;
bool use_tensorrt_ = false; bool use_tensorrt_ = false;
std::string precision_ = "fp32"; std::string precision_ = "fp32";
int cls_batch_num_ = 1;
// pre-process // pre-process
ClsResizeImg resize_op_; ClsResizeImg resize_op_;
Normalize normalize_op_; Normalize normalize_op_;
Permute permute_op_; PermuteBatch permute_op_;
}; // class Classifier }; // class Classifier
......
...@@ -45,8 +45,9 @@ public: ...@@ -45,8 +45,9 @@ public:
const double &det_db_thresh, const double &det_db_thresh,
const double &det_db_box_thresh, const double &det_db_box_thresh,
const double &det_db_unclip_ratio, const double &det_db_unclip_ratio,
const bool &use_polygon_score, const bool &use_dilation, const std::string &det_db_score_mode,
const bool &use_tensorrt, const std::string &precision) { const bool &use_dilation, const bool &use_tensorrt,
const std::string &precision) {
this->use_gpu_ = use_gpu; this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id; this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem; this->gpu_mem_ = gpu_mem;
...@@ -58,7 +59,7 @@ public: ...@@ -58,7 +59,7 @@ public:
this->det_db_thresh_ = det_db_thresh; this->det_db_thresh_ = det_db_thresh;
this->det_db_box_thresh_ = det_db_box_thresh; this->det_db_box_thresh_ = det_db_box_thresh;
this->det_db_unclip_ratio_ = det_db_unclip_ratio; this->det_db_unclip_ratio_ = det_db_unclip_ratio;
this->use_polygon_score_ = use_polygon_score; this->det_db_score_mode_ = det_db_score_mode;
this->use_dilation_ = use_dilation; this->use_dilation_ = use_dilation;
this->use_tensorrt_ = use_tensorrt; this->use_tensorrt_ = use_tensorrt;
...@@ -72,7 +73,7 @@ public: ...@@ -72,7 +73,7 @@ public:
// Run predictor // Run predictor
void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes, void Run(cv::Mat &img, std::vector<std::vector<std::vector<int>>> &boxes,
std::vector<double> *times); std::vector<double> &times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<Predictor> predictor_;
...@@ -88,7 +89,7 @@ private: ...@@ -88,7 +89,7 @@ private:
double det_db_thresh_ = 0.3; double det_db_thresh_ = 0.3;
double det_db_box_thresh_ = 0.5; double det_db_box_thresh_ = 0.5;
double det_db_unclip_ratio_ = 2.0; double det_db_unclip_ratio_ = 2.0;
bool use_polygon_score_ = false; std::string det_db_score_mode_ = "slow";
bool use_dilation_ = false; bool use_dilation_ = false;
bool visualize_ = true; bool visualize_ = true;
......
...@@ -30,7 +30,6 @@ ...@@ -30,7 +30,6 @@
#include <numeric> #include <numeric>
#include <include/ocr_cls.h> #include <include/ocr_cls.h>
#include <include/postprocess_op.h>
#include <include/preprocess_op.h> #include <include/preprocess_op.h>
#include <include/utility.h> #include <include/utility.h>
...@@ -68,7 +67,7 @@ public: ...@@ -68,7 +67,7 @@ public:
void LoadModel(const std::string &model_dir); void LoadModel(const std::string &model_dir);
void Run(std::vector<cv::Mat> img_list, std::vector<std::string> &rec_texts, void Run(std::vector<cv::Mat> img_list, std::vector<std::string> &rec_texts,
std::vector<float> &rec_text_scores, std::vector<double> *times); std::vector<float> &rec_text_scores, std::vector<double> &times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<Predictor> predictor_;
...@@ -93,9 +92,6 @@ private: ...@@ -93,9 +92,6 @@ private:
Normalize normalize_op_; Normalize normalize_op_;
PermuteBatch permute_op_; PermuteBatch permute_op_;
// post-process
PostProcessor post_processor_;
}; // class CrnnRecognizer }; // class CrnnRecognizer
} // namespace PaddleOCR } // namespace PaddleOCR
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>
#include <cstring>
#include <fstream>
#include <numeric>
#include <include/ocr_cls.h>
#include <include/ocr_det.h>
#include <include/ocr_rec.h>
#include <include/preprocess_op.h>
#include <include/utility.h>
using namespace paddle_infer;
namespace PaddleOCR {
class PaddleOCR {
public:
explicit PaddleOCR();
~PaddleOCR();
std::vector<std::vector<OCRPredictResult>>
ocr(std::vector<cv::String> cv_all_img_names, bool det = true,
bool rec = true, bool cls = true);
private:
DBDetector *detector_ = nullptr;
Classifier *classifier_ = nullptr;
CRNNRecognizer *recognizer_ = nullptr;
void det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results,
std::vector<double> &times);
void rec(std::vector<cv::Mat> img_list,
std::vector<OCRPredictResult> &ocr_results,
std::vector<double> &times);
void cls(std::vector<cv::Mat> img_list,
std::vector<OCRPredictResult> &ocr_results,
std::vector<double> &times);
void log(std::vector<double> &det_times, std::vector<double> &rec_times,
std::vector<double> &cls_times, int img_num);
};
} // namespace PaddleOCR
...@@ -56,7 +56,7 @@ public: ...@@ -56,7 +56,7 @@ public:
std::vector<std::vector<std::vector<int>>> std::vector<std::vector<std::vector<int>>>
BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap, BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
const float &box_thresh, const float &det_db_unclip_ratio, const float &box_thresh, const float &det_db_unclip_ratio,
const bool &use_polygon_score); const std::string &det_db_score_mode);
std::vector<std::vector<std::vector<int>>> std::vector<std::vector<std::vector<int>>>
FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes, FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
......
...@@ -32,14 +32,21 @@ ...@@ -32,14 +32,21 @@
namespace PaddleOCR { namespace PaddleOCR {
struct OCRPredictResult {
std::vector<std::vector<int>> box;
std::string text;
float score = -1.0;
float cls_score;
int cls_label = -1;
};
class Utility { class Utility {
public: public:
static std::vector<std::string> ReadDict(const std::string &path); static std::vector<std::string> ReadDict(const std::string &path);
static void static void VisualizeBboxes(const cv::Mat &srcimg,
VisualizeBboxes(const cv::Mat &srcimg, const std::vector<OCRPredictResult> &ocr_result,
const std::vector<std::vector<std::vector<int>>> &boxes, const std::string &save_path);
const std::string &save_path);
template <class ForwardIterator> template <class ForwardIterator>
inline static size_t argmax(ForwardIterator first, ForwardIterator last) { inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
...@@ -55,6 +62,10 @@ public: ...@@ -55,6 +62,10 @@ public:
static std::vector<int> argsort(const std::vector<float> &array); static std::vector<int> argsort(const std::vector<float> &array);
static std::string basename(const std::string &filename); static std::string basename(const std::string &filename);
static bool PathExists(const std::string &path);
static void print_result(const std::vector<OCRPredictResult> &ocr_result);
}; };
} // namespace PaddleOCR } // namespace PaddleOCR
\ No newline at end of file
...@@ -9,9 +9,12 @@ ...@@ -9,9 +9,12 @@
- [2.1 将模型导出为inference model](#21-将模型导出为inference-model) - [2.1 将模型导出为inference model](#21-将模型导出为inference-model)
- [2.2 编译PaddleOCR C++预测demo](#22-编译paddleocr-c预测demo) - [2.2 编译PaddleOCR C++预测demo](#22-编译paddleocr-c预测demo)
- [2.3 运行demo](#23-运行demo) - [2.3 运行demo](#23-运行demo)
- [1. 只调用检测:](#1-只调用检测) - [1. 检测+分类+识别:](#1-检测分类识别)
- [2. 只调用识别:](#2-只调用识别) - [2. 检测+识别:](#2-检测识别)
- [3. 调用串联:](#3-调用串联) - [3. 检测:](#3-检测)
- [4. 分类+识别:](#4-分类识别)
- [5. 识别:](#5-识别)
- [6. 分类:](#6-分类)
- [3. FAQ](#3-faq) - [3. FAQ](#3-faq)
# 服务器端C++预测 # 服务器端C++预测
...@@ -181,6 +184,9 @@ inference/ ...@@ -181,6 +184,9 @@ inference/
|-- rec_rcnn |-- rec_rcnn
| |--inference.pdiparams | |--inference.pdiparams
| |--inference.pdmodel | |--inference.pdmodel
|-- cls
| |--inference.pdiparams
| |--inference.pdmodel
``` ```
<a name="22"></a> <a name="22"></a>
...@@ -213,36 +219,71 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -213,36 +219,71 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
运行方式: 运行方式:
```shell ```shell
./build/ppocr <mode> [--param1] [--param2] [...] ./build/ppocr [--param1] [--param2] [...]
```
具体命令如下:
##### 1. 检测+分类+识别:
```shell
./build/ppocr --det_model_dir=inference/det_db \
--rec_model_dir=inference/rec_rcnn \
--cls_model_dir=inference/cls \
--image_dir=../../doc/imgs/12.jpg \
--use_angle_cls=true \
--det=true \
--rec=true \
--cls=true \
```
##### 2. 检测+识别:
```shell
./build/ppocr --det_model_dir=inference/det_db \
--rec_model_dir=inference/rec_rcnn \
--image_dir=../../doc/imgs/12.jpg \
--use_angle_cls=false \
--det=true \
--rec=true \
--cls=false \
```
##### 3. 检测:
```shell
./build/ppocr --det_model_dir=inference/det_db \
--image_dir=../../doc/imgs/12.jpg \
--det=true \
--rec=false
``` ```
其中,`mode`为必选参数,表示选择的功能,取值范围['det', 'rec', 'system'],分别表示调用检测、识别、检测识别串联(包括方向分类器)。具体命令如下:
##### 1. 只调用检测 ##### 4. 分类+识别
```shell ```shell
./build/ppocr det \ ./build/ppocr --rec_model_dir=inference/rec_rcnn \
--det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \ --cls_model_dir=inference/cls \
--image_dir=../../doc/imgs/12.jpg --image_dir=../../doc/imgs_words/ch/word_1.jpg \
--use_angle_cls=true \
--det=false \
--rec=true \
--cls=true \
``` ```
##### 2. 只调用识别:
##### 5. 识别:
```shell ```shell
./build/ppocr rec \ ./build/ppocr --rec_model_dir=inference/rec_rcnn \
--rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \ --image_dir=../../doc/imgs_words/ch/word_1.jpg \
--image_dir=../../doc/imgs_words/ch/ --use_angle_cls=false \
--det=false \
--rec=true \
--cls=false \
``` ```
##### 3. 调用串联:
##### 6. 分类:
```shell ```shell
# 不使用方向分类器 ./build/ppocr --cls_model_dir=inference/cls \
./build/ppocr system \ --cls_model_dir=inference/cls \
--det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \ --image_dir=../../doc/imgs_words/ch/word_1.jpg \
--rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
--image_dir=../../doc/imgs/12.jpg
# 使用方向分类器
./build/ppocr system \
--det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
--use_angle_cls=true \ --use_angle_cls=true \
--cls_model_dir=inference/ch_ppocr_mobile_v2.0_cls_infer \ --det=false \
--rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \ --rec=false \
--image_dir=../../doc/imgs/12.jpg --cls=true \
``` ```
更多支持的可调节参数解释如下: 更多支持的可调节参数解释如下:
...@@ -258,6 +299,15 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -258,6 +299,15 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
|enable_mkldnn|bool|true|是否使用mkldnn库| |enable_mkldnn|bool|true|是否使用mkldnn库|
|output|str|./output|可视化结果保存的路径| |output|str|./output|可视化结果保存的路径|
- 前向相关
|参数名称|类型|默认参数|意义|
| :---: | :---: | :---: | :---: |
|det|bool|true|前向是否执行文字检测|
|rec|bool|true|前向是否执行文字识别|
|cls|bool|false|前向是否执行文字方向分类|
- 检测模型相关 - 检测模型相关
|参数名称|类型|默认参数|意义| |参数名称|类型|默认参数|意义|
...@@ -267,7 +317,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -267,7 +317,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
|det_db_thresh|float|0.3|用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显| |det_db_thresh|float|0.3|用于过滤DB预测的二值化图像,设置为0.-0.3对结果影响不明显|
|det_db_box_thresh|float|0.5|DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小| |det_db_box_thresh|float|0.5|DB后处理过滤box的阈值,如果检测存在漏框情况,可酌情减小|
|det_db_unclip_ratio|float|1.6|表示文本框的紧致程度,越小则文本框更靠近文本| |det_db_unclip_ratio|float|1.6|表示文本框的紧致程度,越小则文本框更靠近文本|
|use_polygon_score|bool|false|是否使用多边形框计算bbox score,false表示使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。| |det_db_score_mode|string|slow|slow:使用多边形框计算bbox score,fast:使用矩形框计算。矩形框计算速度更快,多边形框对弯曲文本区域计算更准确。|
|visualize|bool|true|是否对结果进行可视化,为1时,预测结果会保存在`output`字段指定的文件夹下和输入图像同名的图像上。| |visualize|bool|true|是否对结果进行可视化,为1时,预测结果会保存在`output`字段指定的文件夹下和输入图像同名的图像上。|
- 方向分类器相关 - 方向分类器相关
...@@ -277,6 +327,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -277,6 +327,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
|use_angle_cls|bool|false|是否使用方向分类器| |use_angle_cls|bool|false|是否使用方向分类器|
|cls_model_dir|string|-|方向分类器inference model地址| |cls_model_dir|string|-|方向分类器inference model地址|
|cls_thresh|float|0.9|方向分类器的得分阈值| |cls_thresh|float|0.9|方向分类器的得分阈值|
|cls_batch_num|int|1|方向分类器batchsize|
- 识别模型相关 - 识别模型相关
...@@ -284,15 +335,22 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir ...@@ -284,15 +335,22 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
| :---: | :---: | :---: | :---: | | :---: | :---: | :---: | :---: |
|rec_model_dir|string|-|识别模型inference model地址| |rec_model_dir|string|-|识别模型inference model地址|
|rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件| |rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件|
|rec_batch_num|int|6|识别模型batchsize|
* PaddleOCR也支持多语言的预测,更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分,如果希望进行多语言预测,只需将修改`rec_char_dict_path`(字典文件路径)以及`rec_model_dir`(inference模型路径)字段即可。 * PaddleOCR也支持多语言的预测,更多支持的语言和模型可以参考[识别文档](../../doc/doc_ch/recognition.md)中的多语言字典与模型部分,如果希望进行多语言预测,只需将修改`rec_char_dict_path`(字典文件路径)以及`rec_model_dir`(inference模型路径)字段即可。
最终屏幕上会输出检测结果如下。 最终屏幕上会输出检测结果如下。
<div align="center"> ```bash
<img src="./imgs/cpp_infer_pred_12.png" width="600"> predict img: ../../doc/imgs/12.jpg
</div> ../../doc/imgs/12.jpg
0 det boxes: [[79,553],[399,541],[400,573],[80,585]] rec text: 打浦路252935号 rec score: 0.933757
1 det boxes: [[31,509],[510,488],[511,529],[33,549]] rec text: 绿洲仕格维花园公寓 rec score: 0.951745
2 det boxes: [[181,456],[395,448],[396,480],[182,488]] rec text: 打浦路15号 rec score: 0.91956
3 det boxes: [[43,413],[480,391],[481,428],[45,450]] rec text: 上海斯格威铂尔多大酒店 rec score: 0.915914
The detection visualized image saved in ./output//12.jpg
```
## 3. FAQ ## 3. FAQ
......
...@@ -9,9 +9,12 @@ ...@@ -9,9 +9,12 @@
- [2.1 Export the inference model](#21-export-the-inference-model) - [2.1 Export the inference model](#21-export-the-inference-model)
- [2.2 Compile PaddleOCR C++ inference demo](#22-compile-paddleocr-c-inference-demo) - [2.2 Compile PaddleOCR C++ inference demo](#22-compile-paddleocr-c-inference-demo)
- [Run the demo](#run-the-demo) - [Run the demo](#run-the-demo)
- [1. run det demo:](#1-run-det-demo) - [1. det+cls+rec:](#1-detclsrec)
- [2. run rec demo:](#2-run-rec-demo) - [2. det+rec:](#2-detrec)
- [3. run system demo:](#3-run-system-demo) - [3. det](#3-det)
- [4. cls+rec:](#4-clsrec)
- [5. rec](#5-rec)
- [6. cls](#6-cls)
- [3. FAQ](#3-faq) - [3. FAQ](#3-faq)
# Server-side C++ Inference # Server-side C++ Inference
...@@ -166,6 +169,9 @@ inference/ ...@@ -166,6 +169,9 @@ inference/
|-- rec_rcnn |-- rec_rcnn
| |--inference.pdiparams | |--inference.pdiparams
| |--inference.pdmodel | |--inference.pdmodel
|-- cls
| |--inference.pdiparams
| |--inference.pdmodel
``` ```
...@@ -198,44 +204,72 @@ or the generated Paddle inference library path (`build/paddle_inference_install_ ...@@ -198,44 +204,72 @@ or the generated Paddle inference library path (`build/paddle_inference_install_
### Run the demo ### Run the demo
Execute the built executable file: Execute the built executable file:
```shell ```shell
./build/ppocr <mode> [--param1] [--param2] [...] ./build/ppocr [--param1] [--param2] [...]
``` ```
`mode` is a required parameter,and the valid values are
mode value | Model used
-----|------
det | Detection only
rec | Recognition only
system | End-to-end system
Specifically, Specifically,
##### 1. run det demo: ##### 1. det+cls+rec:
```shell
./build/ppocr --det_model_dir=inference/det_db \
--rec_model_dir=inference/rec_rcnn \
--cls_model_dir=inference/cls \
--image_dir=../../doc/imgs/12.jpg \
--use_angle_cls=true \
--det=true \
--rec=true \
--cls=true \
```
##### 2. det+rec:
```shell ```shell
./build/ppocr det \ ./build/ppocr --det_model_dir=inference/det_db \
--det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \ --rec_model_dir=inference/rec_rcnn \
--image_dir=../../doc/imgs/12.jpg --image_dir=../../doc/imgs/12.jpg \
--use_angle_cls=false \
--det=true \
--rec=true \
--cls=false \
``` ```
##### 2. run rec demo:
##### 3. det
```shell ```shell
./build/ppocr rec \ ./build/ppocr --det_model_dir=inference/det_db \
--rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \ --image_dir=../../doc/imgs/12.jpg \
--image_dir=../../doc/imgs_words/ch/ --det=true \
--rec=false
``` ```
##### 3. run system demo:
##### 4. cls+rec:
```shell ```shell
# without text direction classifier ./build/ppocr --rec_model_dir=inference/rec_rcnn \
./build/ppocr system \ --cls_model_dir=inference/cls \
--det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \ --image_dir=../../doc/imgs_words/ch/word_1.jpg \
--rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \
--image_dir=../../doc/imgs/12.jpg
# with text direction classifier
./build/ppocr system \
--det_model_dir=inference/ch_ppocr_mobile_v2.0_det_infer \
--use_angle_cls=true \ --use_angle_cls=true \
--cls_model_dir=inference/ch_ppocr_mobile_v2.0_cls_infer \ --det=false \
--rec_model_dir=inference/ch_ppocr_mobile_v2.0_rec_infer \ --rec=true \
--image_dir=../../doc/imgs/12.jpg --cls=true \
```
##### 5. rec
```shell
./build/ppocr --rec_model_dir=inference/rec_rcnn \
--image_dir=../../doc/imgs_words/ch/word_1.jpg \
--use_angle_cls=false \
--det=false \
--rec=true \
--cls=false \
```
##### 6. cls
```shell
./build/ppocr --cls_model_dir=inference/cls \
--cls_model_dir=inference/cls \
--image_dir=../../doc/imgs_words/ch/word_1.jpg \
--use_angle_cls=true \
--det=false \
--rec=false \
--cls=true \
``` ```
More parameters are as follows, More parameters are as follows,
...@@ -251,6 +285,16 @@ More parameters are as follows, ...@@ -251,6 +285,16 @@ More parameters are as follows,
|enable_mkldnn|bool|true|Whether to use mkdlnn library| |enable_mkldnn|bool|true|Whether to use mkdlnn library|
|output|str|./output|Path where visualization results are saved| |output|str|./output|Path where visualization results are saved|
- forward
|parameter|data type|default|meaning|
| :---: | :---: | :---: | :---: |
|det|bool|true|前向是否执行文字检测|
|rec|bool|true|前向是否执行文字识别|
|cls|bool|false|前向是否执行文字方向分类|
- Detection related parameters - Detection related parameters
|parameter|data type|default|meaning| |parameter|data type|default|meaning|
...@@ -260,7 +304,7 @@ More parameters are as follows, ...@@ -260,7 +304,7 @@ More parameters are as follows,
|det_db_thresh|float|0.3|Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result| |det_db_thresh|float|0.3|Used to filter the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result|
|det_db_box_thresh|float|0.5|DB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate| |det_db_box_thresh|float|0.5|DB post-processing filter box threshold, if there is a missing box detected, it can be reduced as appropriate|
|det_db_unclip_ratio|float|1.6|Indicates the compactness of the text box, the smaller the value, the closer the text box to the text| |det_db_unclip_ratio|float|1.6|Indicates the compactness of the text box, the smaller the value, the closer the text box to the text|
|use_polygon_score|bool|false|Whether to use polygon box to calculate bbox score, false means to use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.| |det_db_score_mode|string|slow| slow: use polygon box to calculate bbox score, fast: use rectangle box to calculate. Use rectangular box to calculate faster, and polygonal box more accurate for curved text area.|
|visualize|bool|true|Whether to visualize the results,when it is set as true, the prediction results will be saved in the folder specified by the `output` field on an image with the same name as the input image.| |visualize|bool|true|Whether to visualize the results,when it is set as true, the prediction results will be saved in the folder specified by the `output` field on an image with the same name as the input image.|
- Classifier related parameters - Classifier related parameters
...@@ -270,6 +314,7 @@ More parameters are as follows, ...@@ -270,6 +314,7 @@ More parameters are as follows,
|use_angle_cls|bool|false|Whether to use the direction classifier| |use_angle_cls|bool|false|Whether to use the direction classifier|
|cls_model_dir|string|-|Address of direction classifier inference model| |cls_model_dir|string|-|Address of direction classifier inference model|
|cls_thresh|float|0.9|Score threshold of the direction classifier| |cls_thresh|float|0.9|Score threshold of the direction classifier|
|cls_batch_num|int|1|batch size of classifier|
- Recognition related parameters - Recognition related parameters
...@@ -277,15 +322,22 @@ More parameters are as follows, ...@@ -277,15 +322,22 @@ More parameters are as follows,
| --- | --- | --- | --- | | --- | --- | --- | --- |
|rec_model_dir|string|-|Address of recognition inference model| |rec_model_dir|string|-|Address of recognition inference model|
|rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|dictionary file| |rec_char_dict_path|string|../../ppocr/utils/ppocr_keys_v1.txt|dictionary file|
|rec_batch_num|int|6|batch size of recognition|
* Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `rec_char_dict_path` and `rec_model_dir`. * Multi-language inference is also supported in PaddleOCR, you can refer to [recognition tutorial](../../doc/doc_en/recognition_en.md) for more supported languages and models in PaddleOCR. Specifically, if you want to infer using multi-language models, you just need to modify values of `rec_char_dict_path` and `rec_model_dir`.
The detection results will be shown on the screen, which is as follows. The detection results will be shown on the screen, which is as follows.
<div align="center"> ```bash
<img src="./imgs/cpp_infer_pred_12.png" width="600"> predict img: ../../doc/imgs/12.jpg
</div> ../../doc/imgs/12.jpg
0 det boxes: [[79,553],[399,541],[400,573],[80,585]] rec text: 打浦路252935号 rec score: 0.933757
1 det boxes: [[31,509],[510,488],[511,529],[33,549]] rec text: 绿洲仕格维花园公寓 rec score: 0.951745
2 det boxes: [[181,456],[395,448],[396,480],[182,488]] rec text: 打浦路15号 rec score: 0.91956
3 det boxes: [[43,413],[480,391],[481,428],[45,450]] rec text: 上海斯格威铂尔多大酒店 rec score: 0.915914
The detection visualized image saved in ./output//12.jpg
```
## 3. FAQ ## 3. FAQ
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
// common args
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU.");
DEFINE_bool(use_tensorrt, false, "Whether use tensorrt.");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute.");
DEFINE_int32(gpu_mem, 4000, "GPU id when infering with GPU.");
DEFINE_int32(cpu_threads, 10, "Num of threads with CPU.");
DEFINE_bool(enable_mkldnn, false, "Whether use mkldnn with CPU.");
DEFINE_string(precision, "fp32", "Precision be one of fp32/fp16/int8");
DEFINE_bool(benchmark, false, "Whether use benchmark.");
DEFINE_string(output, "./output/", "Save benchmark log path.");
DEFINE_string(image_dir, "", "Dir of input image.");
DEFINE_string(
type, "ocr",
"Perform ocr or structure, the value is selected in ['ocr','structure'].");
// detection related
DEFINE_string(det_model_dir, "", "Path of det inference model.");
DEFINE_int32(max_side_len, 960, "max_side_len of input image.");
DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh.");
DEFINE_double(det_db_box_thresh, 0.6, "Threshold of det_db_box_thresh.");
DEFINE_double(det_db_unclip_ratio, 1.5, "Threshold of det_db_unclip_ratio.");
DEFINE_bool(use_dilation, false, "Whether use the dilation on output map.");
DEFINE_string(det_db_score_mode, "slow", "Whether use polygon score.");
DEFINE_bool(visualize, true, "Whether show the detection results.");
// classification related
DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls.");
DEFINE_string(cls_model_dir, "", "Path of cls inference model.");
DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh.");
DEFINE_int32(cls_batch_num, 1, "cls_batch_num.");
// recognition related
DEFINE_string(rec_model_dir, "", "Path of rec inference model.");
DEFINE_int32(rec_batch_num, 6, "rec_batch_num.");
DEFINE_string(rec_char_dict_path, "../../ppocr/utils/ppocr_keys_v1.txt",
"Path of dictionary.");
// ocr forward related
DEFINE_bool(det, true, "Whether use det in forward.");
DEFINE_bool(rec, true, "Whether use rec in forward.");
DEFINE_bool(cls, false, "Whether use cls in forward.");
\ No newline at end of file
...@@ -11,273 +11,19 @@ ...@@ -11,273 +11,19 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "omp.h"
#include "opencv2/core.hpp" #include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp" #include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp" #include "opencv2/imgproc.hpp"
#include <chrono>
#include <iomanip>
#include <iostream> #include <iostream>
#include <ostream>
#include <sys/stat.h>
#include <vector> #include <vector>
#include <cstring> #include <include/args.h>
#include <fstream> #include <include/paddleocr.h>
#include <numeric>
#include <include/ocr_cls.h>
#include <include/ocr_det.h>
#include <include/ocr_rec.h>
#include <include/utility.h>
#include <sys/stat.h>
#include "auto_log/autolog.h"
#include <gflags/gflags.h>
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU.");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute.");
DEFINE_int32(gpu_mem, 4000, "GPU id when infering with GPU.");
DEFINE_int32(cpu_threads, 10, "Num of threads with CPU.");
DEFINE_bool(enable_mkldnn, false, "Whether use mkldnn with CPU.");
DEFINE_bool(use_tensorrt, false, "Whether use tensorrt.");
DEFINE_string(precision, "fp32", "Precision be one of fp32/fp16/int8");
DEFINE_bool(benchmark, false, "Whether use benchmark.");
DEFINE_string(output, "./output/", "Save benchmark log path.");
// detection related
DEFINE_string(image_dir, "", "Dir of input image.");
DEFINE_string(det_model_dir, "", "Path of det inference model.");
DEFINE_int32(max_side_len, 960, "max_side_len of input image.");
DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh.");
DEFINE_double(det_db_box_thresh, 0.6, "Threshold of det_db_box_thresh.");
DEFINE_double(det_db_unclip_ratio, 1.5, "Threshold of det_db_unclip_ratio.");
DEFINE_bool(use_polygon_score, false, "Whether use polygon score.");
DEFINE_bool(use_dilation, false, "Whether use the dilation on output map.");
DEFINE_bool(visualize, true, "Whether show the detection results.");
// classification related
DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls.");
DEFINE_string(cls_model_dir, "", "Path of cls inference model.");
DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh.");
// recognition related
DEFINE_string(rec_model_dir, "", "Path of rec inference model.");
DEFINE_int32(rec_batch_num, 6, "rec_batch_num.");
DEFINE_string(rec_char_dict_path, "../../ppocr/utils/ppocr_keys_v1.txt",
"Path of dictionary.");
using namespace std;
using namespace cv;
using namespace PaddleOCR; using namespace PaddleOCR;
static bool PathExists(const std::string &path) { void check_params() {
#ifdef _WIN32 if (FLAGS_det) {
struct _stat buffer;
return (_stat(path.c_str(), &buffer) == 0);
#else
struct stat buffer;
return (stat(path.c_str(), &buffer) == 0);
#endif // !_WIN32
}
int main_det(std::vector<cv::String> cv_all_img_names) {
std::vector<double> time_info = {0, 0, 0};
DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
FLAGS_max_side_len, FLAGS_det_db_thresh,
FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
FLAGS_use_polygon_score, FLAGS_use_dilation,
FLAGS_use_tensorrt, FLAGS_precision);
if (!PathExists(FLAGS_output)) {
mkdir(FLAGS_output.c_str(), 0777);
}
for (int i = 0; i < cv_all_img_names.size(); ++i) {
if (!FLAGS_benchmark) {
cout << "The predict img: " << cv_all_img_names[i] << endl;
}
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl;
exit(1);
}
std::vector<std::vector<std::vector<int>>> boxes;
std::vector<double> det_times;
det.Run(srcimg, boxes, &det_times);
// visualization
if (FLAGS_visualize) {
std::string file_name = Utility::basename(cv_all_img_names[i]);
Utility::VisualizeBboxes(srcimg, boxes, FLAGS_output + "/" + file_name);
}
time_info[0] += det_times[0];
time_info[1] += det_times[1];
time_info[2] += det_times[2];
if (FLAGS_benchmark) {
cout << cv_all_img_names[i] << '\t';
for (int n = 0; n < boxes.size(); n++) {
for (int m = 0; m < boxes[n].size(); m++) {
cout << boxes[n][m][0] << ' ' << boxes[n][m][1] << ' ';
}
}
cout << endl;
}
}
if (FLAGS_benchmark) {
AutoLogger autolog("ocr_det", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads, 1, "dynamic",
FLAGS_precision, time_info, cv_all_img_names.size());
autolog.report();
}
return 0;
}
int main_rec(std::vector<cv::String> cv_all_img_names) {
std::vector<double> time_info = {0, 0, 0};
std::string rec_char_dict_path = FLAGS_rec_char_dict_path;
if (FLAGS_benchmark)
rec_char_dict_path = FLAGS_rec_char_dict_path.substr(6);
cout << "label file: " << rec_char_dict_path << endl;
CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
rec_char_dict_path, FLAGS_use_tensorrt, FLAGS_precision,
FLAGS_rec_batch_num);
std::vector<cv::Mat> img_list;
for (int i = 0; i < cv_all_img_names.size(); ++i) {
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl;
exit(1);
}
img_list.push_back(srcimg);
}
std::vector<std::string> rec_texts(img_list.size(), "");
std::vector<float> rec_text_scores(img_list.size(), 0);
std::vector<double> rec_times;
rec.Run(img_list, rec_texts, rec_text_scores, &rec_times);
// output rec results
for (int i = 0; i < rec_texts.size(); i++) {
cout << "The predict img: " << cv_all_img_names[i] << "\t" << rec_texts[i]
<< "\t" << rec_text_scores[i] << endl;
}
time_info[0] += rec_times[0];
time_info[1] += rec_times[1];
time_info[2] += rec_times[2];
if (FLAGS_benchmark) {
AutoLogger autolog("ocr_rec", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads,
FLAGS_rec_batch_num, "dynamic", FLAGS_precision,
time_info, cv_all_img_names.size());
autolog.report();
}
return 0;
}
int main_system(std::vector<cv::String> cv_all_img_names) {
std::vector<double> time_info_det = {0, 0, 0};
std::vector<double> time_info_rec = {0, 0, 0};
if (!PathExists(FLAGS_output)) {
mkdir(FLAGS_output.c_str(), 0777);
}
DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
FLAGS_max_side_len, FLAGS_det_db_thresh,
FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
FLAGS_use_polygon_score, FLAGS_use_dilation,
FLAGS_use_tensorrt, FLAGS_precision);
Classifier *cls = nullptr;
if (FLAGS_use_angle_cls) {
cls = new Classifier(FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
FLAGS_cls_thresh, FLAGS_use_tensorrt, FLAGS_precision);
}
std::string rec_char_dict_path = FLAGS_rec_char_dict_path;
if (FLAGS_benchmark)
rec_char_dict_path = FLAGS_rec_char_dict_path.substr(6);
cout << "label file: " << rec_char_dict_path << endl;
CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_enable_mkldnn,
rec_char_dict_path, FLAGS_use_tensorrt, FLAGS_precision,
FLAGS_rec_batch_num);
for (int i = 0; i < cv_all_img_names.size(); ++i) {
cout << "The predict img: " << cv_all_img_names[i] << endl;
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl;
exit(1);
}
// det
std::vector<std::vector<std::vector<int>>> boxes;
std::vector<double> det_times;
std::vector<double> rec_times;
det.Run(srcimg, boxes, &det_times);
if (FLAGS_visualize) {
std::string file_name = Utility::basename(cv_all_img_names[i]);
Utility::VisualizeBboxes(srcimg, boxes, FLAGS_output + "/" + file_name);
}
time_info_det[0] += det_times[0];
time_info_det[1] += det_times[1];
time_info_det[2] += det_times[2];
// rec
std::vector<cv::Mat> img_list;
for (int j = 0; j < boxes.size(); j++) {
cv::Mat crop_img;
crop_img = Utility::GetRotateCropImage(srcimg, boxes[j]);
if (cls != nullptr) {
crop_img = cls->Run(crop_img);
}
img_list.push_back(crop_img);
}
std::vector<std::string> rec_texts(img_list.size(), "");
std::vector<float> rec_text_scores(img_list.size(), 0);
rec.Run(img_list, rec_texts, rec_text_scores, &rec_times);
// output rec results
for (int i = 0; i < rec_texts.size(); i++) {
std::cout << i << "\t" << rec_texts[i] << "\t" << rec_text_scores[i]
<< std::endl;
}
time_info_rec[0] += rec_times[0];
time_info_rec[1] += rec_times[1];
time_info_rec[2] += rec_times[2];
}
if (FLAGS_benchmark) {
AutoLogger autolog_det("ocr_det", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads, 1, "dynamic",
FLAGS_precision, time_info_det,
cv_all_img_names.size());
AutoLogger autolog_rec("ocr_rec", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads,
FLAGS_rec_batch_num, "dynamic", FLAGS_precision,
time_info_rec, cv_all_img_names.size());
autolog_det.report();
std::cout << endl;
autolog_rec.report();
}
return 0;
}
void check_params(char *mode) {
if (strcmp(mode, "det") == 0) {
if (FLAGS_det_model_dir.empty() || FLAGS_image_dir.empty()) { if (FLAGS_det_model_dir.empty() || FLAGS_image_dir.empty()) {
std::cout << "Usage[det]: ./ppocr " std::cout << "Usage[det]: ./ppocr "
"--det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ " "--det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
...@@ -285,7 +31,7 @@ void check_params(char *mode) { ...@@ -285,7 +31,7 @@ void check_params(char *mode) {
exit(1); exit(1);
} }
} }
if (strcmp(mode, "rec") == 0) { if (FLAGS_rec) {
if (FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) { if (FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) {
std::cout << "Usage[rec]: ./ppocr " std::cout << "Usage[rec]: ./ppocr "
"--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ " "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
...@@ -293,19 +39,10 @@ void check_params(char *mode) { ...@@ -293,19 +39,10 @@ void check_params(char *mode) {
exit(1); exit(1);
} }
} }
if (strcmp(mode, "system") == 0) { if (FLAGS_cls && FLAGS_use_angle_cls) {
if ((FLAGS_det_model_dir.empty() || FLAGS_rec_model_dir.empty() || if (FLAGS_cls_model_dir.empty() || FLAGS_image_dir.empty()) {
FLAGS_image_dir.empty()) || std::cout << "Usage[cls]: ./ppocr "
(FLAGS_use_angle_cls && FLAGS_cls_model_dir.empty())) { << "--cls_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
std::cout << "Usage[system without angle cls]: ./ppocr "
"--det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
<< "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
std::cout << "Usage[system with angle cls]: ./ppocr "
"--det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
<< "--use_angle_cls=true "
<< "--cls_model_dir=/PATH/TO/CLS_INFERENCE_MODEL/ "
<< "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl; << "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
exit(1); exit(1);
} }
...@@ -318,19 +55,11 @@ void check_params(char *mode) { ...@@ -318,19 +55,11 @@ void check_params(char *mode) {
} }
int main(int argc, char **argv) { int main(int argc, char **argv) {
if (argc <= 1 ||
(strcmp(argv[1], "det") != 0 && strcmp(argv[1], "rec") != 0 &&
strcmp(argv[1], "system") != 0)) {
std::cout << "Please choose one mode of [det, rec, system] !" << std::endl;
return -1;
}
std::cout << "mode: " << argv[1] << endl;
// Parsing command-line // Parsing command-line
google::ParseCommandLineFlags(&argc, &argv, true); google::ParseCommandLineFlags(&argc, &argv, true);
check_params(argv[1]); check_params();
if (!PathExists(FLAGS_image_dir)) { if (!Utility::PathExists(FLAGS_image_dir)) {
std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir
<< endl; << endl;
exit(1); exit(1);
...@@ -340,13 +69,37 @@ int main(int argc, char **argv) { ...@@ -340,13 +69,37 @@ int main(int argc, char **argv) {
cv::glob(FLAGS_image_dir, cv_all_img_names); cv::glob(FLAGS_image_dir, cv_all_img_names);
std::cout << "total images num: " << cv_all_img_names.size() << endl; std::cout << "total images num: " << cv_all_img_names.size() << endl;
if (strcmp(argv[1], "det") == 0) { PaddleOCR::PaddleOCR ocr = PaddleOCR::PaddleOCR();
return main_det(cv_all_img_names);
} std::vector<std::vector<OCRPredictResult>> ocr_results =
if (strcmp(argv[1], "rec") == 0) { ocr.ocr(cv_all_img_names, FLAGS_det, FLAGS_rec, FLAGS_cls);
return main_rec(cv_all_img_names);
} for (int i = 0; i < cv_all_img_names.size(); ++i) {
if (strcmp(argv[1], "system") == 0) { if (FLAGS_benchmark) {
return main_system(cv_all_img_names); cout << cv_all_img_names[i] << '\t';
for (int n = 0; n < ocr_results[i].size(); n++) {
for (int m = 0; m < ocr_results[i][n].box.size(); m++) {
cout << ocr_results[i][n].box[m][0] << ' '
<< ocr_results[i][n].box[m][1] << ' ';
}
}
cout << endl;
} else {
cout << cv_all_img_names[i] << "\n";
Utility::print_result(ocr_results[i]);
if (FLAGS_visualize && FLAGS_det) {
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl;
exit(1);
}
std::string file_name = Utility::basename(cv_all_img_names[i]);
Utility::VisualizeBboxes(srcimg, ocr_results[i],
FLAGS_output + "/" + file_name);
}
cout << "***************************" << endl;
}
} }
} }
...@@ -16,57 +16,84 @@ ...@@ -16,57 +16,84 @@
namespace PaddleOCR { namespace PaddleOCR {
cv::Mat Classifier::Run(cv::Mat &img) { void Classifier::Run(std::vector<cv::Mat> img_list,
cv::Mat src_img; std::vector<int> &cls_labels,
img.copyTo(src_img); std::vector<float> &cls_scores,
cv::Mat resize_img; std::vector<double> &times) {
std::chrono::duration<float> preprocess_diff =
std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
std::chrono::duration<float> inference_diff =
std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
std::chrono::duration<float> postprocess_diff =
std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
int img_num = img_list.size();
std::vector<int> cls_image_shape = {3, 48, 192}; std::vector<int> cls_image_shape = {3, 48, 192};
int index = 0; for (int beg_img_no = 0; beg_img_no < img_num;
float wh_ratio = float(img.cols) / float(img.rows); beg_img_no += this->cls_batch_num_) {
auto preprocess_start = std::chrono::steady_clock::now();
this->resize_op_.Run(img, resize_img, this->use_tensorrt_, cls_image_shape); int end_img_no = min(img_num, beg_img_no + this->cls_batch_num_);
int batch_num = end_img_no - beg_img_no;
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, // preprocess
this->is_scale_); std::vector<cv::Mat> norm_img_batch;
for (int ino = beg_img_no; ino < end_img_no; ino++) {
std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f); cv::Mat srcimg;
img_list[ino].copyTo(srcimg);
this->permute_op_.Run(&resize_img, input.data()); cv::Mat resize_img;
this->resize_op_.Run(srcimg, resize_img, this->use_tensorrt_,
// Inference. cls_image_shape);
auto input_names = this->predictor_->GetInputNames();
auto input_t = this->predictor_->GetInputHandle(input_names[0]); this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); this->is_scale_);
input_t->CopyFromCpu(input.data()); norm_img_batch.push_back(resize_img);
this->predictor_->Run();
std::vector<float> softmax_out;
std::vector<int64_t> label_out;
auto output_names = this->predictor_->GetOutputNames();
auto softmax_out_t = this->predictor_->GetOutputHandle(output_names[0]);
auto softmax_shape_out = softmax_out_t->shape();
int softmax_out_num =
std::accumulate(softmax_shape_out.begin(), softmax_shape_out.end(), 1,
std::multiplies<int>());
softmax_out.resize(softmax_out_num);
softmax_out_t->CopyToCpu(softmax_out.data());
float score = 0;
int label = 0;
for (int i = 0; i < softmax_out_num; i++) {
if (softmax_out[i] > score) {
score = softmax_out[i];
label = i;
} }
std::vector<float> input(batch_num * cls_image_shape[0] *
cls_image_shape[1] * cls_image_shape[2],
0.0f);
this->permute_op_.Run(norm_img_batch, input.data());
auto preprocess_end = std::chrono::steady_clock::now();
preprocess_diff += preprocess_end - preprocess_start;
// inference.
auto input_names = this->predictor_->GetInputNames();
auto input_t = this->predictor_->GetInputHandle(input_names[0]);
input_t->Reshape({batch_num, cls_image_shape[0], cls_image_shape[1],
cls_image_shape[2]});
auto inference_start = std::chrono::steady_clock::now();
input_t->CopyFromCpu(input.data());
this->predictor_->Run();
std::vector<float> predict_batch;
auto output_names = this->predictor_->GetOutputNames();
auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
auto predict_shape = output_t->shape();
int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1,
std::multiplies<int>());
predict_batch.resize(out_num);
output_t->CopyToCpu(predict_batch.data());
auto inference_end = std::chrono::steady_clock::now();
inference_diff += inference_end - inference_start;
// postprocess
auto postprocess_start = std::chrono::steady_clock::now();
for (int batch_idx = 0; batch_idx < predict_shape[0]; batch_idx++) {
int label = int(
Utility::argmax(&predict_batch[batch_idx * predict_shape[1]],
&predict_batch[(batch_idx + 1) * predict_shape[1]]));
float score = float(*std::max_element(
&predict_batch[batch_idx * predict_shape[1]],
&predict_batch[(batch_idx + 1) * predict_shape[1]]));
cls_labels[beg_img_no + batch_idx] = label;
cls_scores[beg_img_no + batch_idx] = score;
}
auto postprocess_end = std::chrono::steady_clock::now();
postprocess_diff += postprocess_end - postprocess_start;
} }
if (label % 2 == 1 && score > this->cls_thresh) { times.push_back(double(preprocess_diff.count() * 1000));
cv::rotate(src_img, src_img, 1); times.push_back(double(inference_diff.count() * 1000));
} times.push_back(double(postprocess_diff.count() * 1000));
return src_img;
} }
void Classifier::LoadModel(const std::string &model_dir) { void Classifier::LoadModel(const std::string &model_dir) {
...@@ -81,13 +108,10 @@ void Classifier::LoadModel(const std::string &model_dir) { ...@@ -81,13 +108,10 @@ void Classifier::LoadModel(const std::string &model_dir) {
if (this->precision_ == "fp16") { if (this->precision_ == "fp16") {
precision = paddle_infer::Config::Precision::kHalf; precision = paddle_infer::Config::Precision::kHalf;
} }
if (this->precision_ == "int8") { if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} }
config.EnableTensorRtEngine( config.EnableTensorRtEngine(1 << 20, 10, 3, precision, false, false);
1 << 20, 10, 3,
precision,
false, false);
} }
} else { } else {
config.DisableGpu(); config.DisableGpu();
......
...@@ -94,7 +94,7 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -94,7 +94,7 @@ void DBDetector::LoadModel(const std::string &model_dir) {
void DBDetector::Run(cv::Mat &img, void DBDetector::Run(cv::Mat &img,
std::vector<std::vector<std::vector<int>>> &boxes, std::vector<std::vector<std::vector<int>>> &boxes,
std::vector<double> *times) { std::vector<double> &times) {
float ratio_h{}; float ratio_h{};
float ratio_w{}; float ratio_w{};
...@@ -161,20 +161,19 @@ void DBDetector::Run(cv::Mat &img, ...@@ -161,20 +161,19 @@ void DBDetector::Run(cv::Mat &img,
boxes = post_processor_.BoxesFromBitmap( boxes = post_processor_.BoxesFromBitmap(
pred_map, bit_map, this->det_db_box_thresh_, this->det_db_unclip_ratio_, pred_map, bit_map, this->det_db_box_thresh_, this->det_db_unclip_ratio_,
this->use_polygon_score_); this->det_db_score_mode_);
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
auto postprocess_end = std::chrono::steady_clock::now(); auto postprocess_end = std::chrono::steady_clock::now();
std::cout << "Detected boxes num: " << boxes.size() << endl;
std::chrono::duration<float> preprocess_diff = std::chrono::duration<float> preprocess_diff =
preprocess_end - preprocess_start; preprocess_end - preprocess_start;
times->push_back(double(preprocess_diff.count() * 1000)); times.push_back(double(preprocess_diff.count() * 1000));
std::chrono::duration<float> inference_diff = inference_end - inference_start; std::chrono::duration<float> inference_diff = inference_end - inference_start;
times->push_back(double(inference_diff.count() * 1000)); times.push_back(double(inference_diff.count() * 1000));
std::chrono::duration<float> postprocess_diff = std::chrono::duration<float> postprocess_diff =
postprocess_end - postprocess_start; postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000)); times.push_back(double(postprocess_diff.count() * 1000));
} }
} // namespace PaddleOCR } // namespace PaddleOCR
...@@ -19,7 +19,7 @@ namespace PaddleOCR { ...@@ -19,7 +19,7 @@ namespace PaddleOCR {
void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
std::vector<std::string> &rec_texts, std::vector<std::string> &rec_texts,
std::vector<float> &rec_text_scores, std::vector<float> &rec_text_scores,
std::vector<double> *times) { std::vector<double> &times) {
std::chrono::duration<float> preprocess_diff = std::chrono::duration<float> preprocess_diff =
std::chrono::steady_clock::now() - std::chrono::steady_clock::now(); std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
std::chrono::duration<float> inference_diff = std::chrono::duration<float> inference_diff =
...@@ -38,6 +38,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -38,6 +38,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
beg_img_no += this->rec_batch_num_) { beg_img_no += this->rec_batch_num_) {
auto preprocess_start = std::chrono::steady_clock::now(); auto preprocess_start = std::chrono::steady_clock::now();
int end_img_no = min(img_num, beg_img_no + this->rec_batch_num_); int end_img_no = min(img_num, beg_img_no + this->rec_batch_num_);
int batch_num = end_img_no - beg_img_no;
float max_wh_ratio = 0; float max_wh_ratio = 0;
for (int ino = beg_img_no; ino < end_img_no; ino++) { for (int ino = beg_img_no; ino < end_img_no; ino++) {
int h = img_list[indices[ino]].rows; int h = img_list[indices[ino]].rows;
...@@ -45,6 +46,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -45,6 +46,7 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
float wh_ratio = w * 1.0 / h; float wh_ratio = w * 1.0 / h;
max_wh_ratio = max(max_wh_ratio, wh_ratio); max_wh_ratio = max(max_wh_ratio, wh_ratio);
} }
int batch_width = 0; int batch_width = 0;
std::vector<cv::Mat> norm_img_batch; std::vector<cv::Mat> norm_img_batch;
for (int ino = beg_img_no; ino < end_img_no; ino++) { for (int ino = beg_img_no; ino < end_img_no; ino++) {
...@@ -59,15 +61,14 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -59,15 +61,14 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
batch_width = max(resize_img.cols, batch_width); batch_width = max(resize_img.cols, batch_width);
} }
std::vector<float> input(this->rec_batch_num_ * 3 * 32 * batch_width, 0.0f); std::vector<float> input(batch_num * 3 * 32 * batch_width, 0.0f);
this->permute_op_.Run(norm_img_batch, input.data()); this->permute_op_.Run(norm_img_batch, input.data());
auto preprocess_end = std::chrono::steady_clock::now(); auto preprocess_end = std::chrono::steady_clock::now();
preprocess_diff += preprocess_end - preprocess_start; preprocess_diff += preprocess_end - preprocess_start;
// Inference. // Inference.
auto input_names = this->predictor_->GetInputNames(); auto input_names = this->predictor_->GetInputNames();
auto input_t = this->predictor_->GetInputHandle(input_names[0]); auto input_t = this->predictor_->GetInputHandle(input_names[0]);
input_t->Reshape({this->rec_batch_num_, 3, 32, batch_width}); input_t->Reshape({batch_num, 3, 32, batch_width});
auto inference_start = std::chrono::steady_clock::now(); auto inference_start = std::chrono::steady_clock::now();
input_t->CopyFromCpu(input.data()); input_t->CopyFromCpu(input.data());
this->predictor_->Run(); this->predictor_->Run();
...@@ -84,7 +85,6 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -84,7 +85,6 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
output_t->CopyToCpu(predict_batch.data()); output_t->CopyToCpu(predict_batch.data());
auto inference_end = std::chrono::steady_clock::now(); auto inference_end = std::chrono::steady_clock::now();
inference_diff += inference_end - inference_start; inference_diff += inference_end - inference_start;
// ctc decode // ctc decode
auto postprocess_start = std::chrono::steady_clock::now(); auto postprocess_start = std::chrono::steady_clock::now();
for (int m = 0; m < predict_shape[0]; m++) { for (int m = 0; m < predict_shape[0]; m++) {
...@@ -120,9 +120,9 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, ...@@ -120,9 +120,9 @@ void CRNNRecognizer::Run(std::vector<cv::Mat> img_list,
auto postprocess_end = std::chrono::steady_clock::now(); auto postprocess_end = std::chrono::steady_clock::now();
postprocess_diff += postprocess_end - postprocess_start; postprocess_diff += postprocess_end - postprocess_start;
} }
times->push_back(double(preprocess_diff.count() * 1000)); times.push_back(double(preprocess_diff.count() * 1000));
times->push_back(double(inference_diff.count() * 1000)); times.push_back(double(inference_diff.count() * 1000));
times->push_back(double(postprocess_diff.count() * 1000)); times.push_back(double(postprocess_diff.count() * 1000));
} }
void CRNNRecognizer::LoadModel(const std::string &model_dir) { void CRNNRecognizer::LoadModel(const std::string &model_dir) {
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <include/args.h>
#include <include/paddleocr.h>
#include "auto_log/autolog.h"
#include <numeric>
#include <sys/stat.h>
namespace PaddleOCR {
PaddleOCR::PaddleOCR() {
if (FLAGS_det) {
this->detector_ = new DBDetector(
FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem,
FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_max_side_len,
FLAGS_det_db_thresh, FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
FLAGS_det_db_score_mode, FLAGS_use_dilation, FLAGS_use_tensorrt,
FLAGS_precision);
}
if (FLAGS_cls && FLAGS_use_angle_cls) {
this->classifier_ = new Classifier(
FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem,
FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_cls_thresh,
FLAGS_use_tensorrt, FLAGS_precision, FLAGS_cls_batch_num);
}
if (FLAGS_rec) {
this->recognizer_ = new CRNNRecognizer(
FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, FLAGS_gpu_mem,
FLAGS_cpu_threads, FLAGS_enable_mkldnn, FLAGS_rec_char_dict_path,
FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num);
}
};
void PaddleOCR::det(cv::Mat img, std::vector<OCRPredictResult> &ocr_results,
std::vector<double> &times) {
std::vector<std::vector<std::vector<int>>> boxes;
std::vector<double> det_times;
this->detector_->Run(img, boxes, det_times);
for (int i = 0; i < boxes.size(); i++) {
OCRPredictResult res;
res.box = boxes[i];
ocr_results.push_back(res);
}
times[0] += det_times[0];
times[1] += det_times[1];
times[2] += det_times[2];
}
void PaddleOCR::rec(std::vector<cv::Mat> img_list,
std::vector<OCRPredictResult> &ocr_results,
std::vector<double> &times) {
std::vector<std::string> rec_texts(img_list.size(), "");
std::vector<float> rec_text_scores(img_list.size(), 0);
std::vector<double> rec_times;
this->recognizer_->Run(img_list, rec_texts, rec_text_scores, rec_times);
// output rec results
for (int i = 0; i < rec_texts.size(); i++) {
ocr_results[i].text = rec_texts[i];
ocr_results[i].score = rec_text_scores[i];
}
times[0] += rec_times[0];
times[1] += rec_times[1];
times[2] += rec_times[2];
}
void PaddleOCR::cls(std::vector<cv::Mat> img_list,
std::vector<OCRPredictResult> &ocr_results,
std::vector<double> &times) {
std::vector<int> cls_labels(img_list.size(), 0);
std::vector<float> cls_scores(img_list.size(), 0);
std::vector<double> cls_times;
this->classifier_->Run(img_list, cls_labels, cls_scores, cls_times);
// output cls results
for (int i = 0; i < cls_labels.size(); i++) {
ocr_results[i].cls_label = cls_labels[i];
ocr_results[i].cls_score = cls_scores[i];
}
times[0] += cls_times[0];
times[1] += cls_times[1];
times[2] += cls_times[2];
}
std::vector<std::vector<OCRPredictResult>>
PaddleOCR::ocr(std::vector<cv::String> cv_all_img_names, bool det, bool rec,
bool cls) {
std::vector<double> time_info_det = {0, 0, 0};
std::vector<double> time_info_rec = {0, 0, 0};
std::vector<double> time_info_cls = {0, 0, 0};
std::vector<std::vector<OCRPredictResult>> ocr_results;
if (!det) {
std::vector<OCRPredictResult> ocr_result;
// read image
std::vector<cv::Mat> img_list;
for (int i = 0; i < cv_all_img_names.size(); ++i) {
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl;
exit(1);
}
img_list.push_back(srcimg);
OCRPredictResult res;
ocr_result.push_back(res);
}
if (cls && this->classifier_ != nullptr) {
this->cls(img_list, ocr_result, time_info_cls);
for (int i = 0; i < img_list.size(); i++) {
if (ocr_result[i].cls_label % 2 == 1 &&
ocr_result[i].cls_score > this->classifier_->cls_thresh) {
cv::rotate(img_list[i], img_list[i], 1);
}
}
}
if (rec) {
this->rec(img_list, ocr_result, time_info_rec);
}
for (int i = 0; i < cv_all_img_names.size(); ++i) {
std::vector<OCRPredictResult> ocr_result_tmp;
ocr_result_tmp.push_back(ocr_result[i]);
ocr_results.push_back(ocr_result_tmp);
}
} else {
if (!Utility::PathExists(FLAGS_output) && FLAGS_det) {
mkdir(FLAGS_output.c_str(), 0777);
}
for (int i = 0; i < cv_all_img_names.size(); ++i) {
std::vector<OCRPredictResult> ocr_result;
if (!FLAGS_benchmark) {
cout << "predict img: " << cv_all_img_names[i] << endl;
}
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: "
<< cv_all_img_names[i] << endl;
exit(1);
}
// det
this->det(srcimg, ocr_result, time_info_det);
// crop image
std::vector<cv::Mat> img_list;
for (int j = 0; j < ocr_result.size(); j++) {
cv::Mat crop_img;
crop_img = Utility::GetRotateCropImage(srcimg, ocr_result[j].box);
img_list.push_back(crop_img);
}
// cls
if (cls && this->classifier_ != nullptr) {
this->cls(img_list, ocr_result, time_info_cls);
for (int i = 0; i < img_list.size(); i++) {
if (ocr_result[i].cls_label % 2 == 1 &&
ocr_result[i].cls_score > this->classifier_->cls_thresh) {
cv::rotate(img_list[i], img_list[i], 1);
}
}
}
// rec
if (rec) {
this->rec(img_list, ocr_result, time_info_rec);
}
ocr_results.push_back(ocr_result);
}
}
if (FLAGS_benchmark) {
this->log(time_info_det, time_info_rec, time_info_cls,
cv_all_img_names.size());
}
return ocr_results;
} // namespace PaddleOCR
void PaddleOCR::log(std::vector<double> &det_times,
std::vector<double> &rec_times,
std::vector<double> &cls_times, int img_num) {
if (det_times[0] + det_times[1] + det_times[2] > 0) {
AutoLogger autolog_det("ocr_det", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads, 1, "dynamic",
FLAGS_precision, det_times, img_num);
autolog_det.report();
}
if (rec_times[0] + rec_times[1] + rec_times[2] > 0) {
AutoLogger autolog_rec("ocr_rec", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads,
FLAGS_rec_batch_num, "dynamic", FLAGS_precision,
rec_times, img_num);
autolog_rec.report();
}
if (cls_times[0] + cls_times[1] + cls_times[2] > 0) {
AutoLogger autolog_cls("ocr_cls", FLAGS_use_gpu, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_cpu_threads,
FLAGS_cls_batch_num, "dynamic", FLAGS_precision,
cls_times, img_num);
autolog_cls.report();
}
}
PaddleOCR::~PaddleOCR() {
if (this->detector_ != nullptr) {
delete this->detector_;
}
if (this->classifier_ != nullptr) {
delete this->classifier_;
}
if (this->recognizer_ != nullptr) {
delete this->recognizer_;
}
};
} // namespace PaddleOCR
...@@ -12,8 +12,8 @@ ...@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <include/clipper.h>
#include <include/postprocess_op.h> #include <include/postprocess_op.h>
#include <include/clipper.cpp>
namespace PaddleOCR { namespace PaddleOCR {
...@@ -187,23 +187,22 @@ float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour, ...@@ -187,23 +187,22 @@ float PostProcessor::PolygonScoreAcc(std::vector<cv::Point> contour,
cv::Mat mask; cv::Mat mask;
mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1); mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
cv::Point *rook_point = new cv::Point[contour.size()];
cv::Point* rook_point = new cv::Point[contour.size()];
for (int i = 0; i < contour.size(); ++i) { for (int i = 0; i < contour.size(); ++i) {
rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin); rook_point[i] = cv::Point(int(box_x[i]) - xmin, int(box_y[i]) - ymin);
} }
const cv::Point *ppt[1] = {rook_point}; const cv::Point *ppt[1] = {rook_point};
int npt[] = {int(contour.size())}; int npt[] = {int(contour.size())};
cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1)); cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
cv::Mat croppedImg; cv::Mat croppedImg;
pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1)).copyTo(croppedImg); pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
.copyTo(croppedImg);
float score = cv::mean(croppedImg, mask)[0]; float score = cv::mean(croppedImg, mask)[0];
delete []rook_point; delete[] rook_point;
return score; return score;
} }
...@@ -247,7 +246,7 @@ float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array, ...@@ -247,7 +246,7 @@ float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap( std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap(
const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh, const cv::Mat pred, const cv::Mat bitmap, const float &box_thresh,
const float &det_db_unclip_ratio, const bool &use_polygon_score) { const float &det_db_unclip_ratio, const std::string &det_db_score_mode) {
const int min_size = 3; const int min_size = 3;
const int max_candidates = 1000; const int max_candidates = 1000;
...@@ -281,7 +280,7 @@ std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap( ...@@ -281,7 +280,7 @@ std::vector<std::vector<std::vector<int>>> PostProcessor::BoxesFromBitmap(
} }
float score; float score;
if (use_polygon_score) if (det_db_score_mode == "slow")
/* compute using polygon*/ /* compute using polygon*/
score = PolygonScoreAcc(contours[_i], pred); score = PolygonScoreAcc(contours[_i], pred);
else else
......
...@@ -38,16 +38,16 @@ std::vector<std::string> Utility::ReadDict(const std::string &path) { ...@@ -38,16 +38,16 @@ std::vector<std::string> Utility::ReadDict(const std::string &path) {
return m_vec; return m_vec;
} }
void Utility::VisualizeBboxes( void Utility::VisualizeBboxes(const cv::Mat &srcimg,
const cv::Mat &srcimg, const std::vector<OCRPredictResult> &ocr_result,
const std::vector<std::vector<std::vector<int>>> &boxes, const std::string &save_path) {
const std::string &save_path) {
cv::Mat img_vis; cv::Mat img_vis;
srcimg.copyTo(img_vis); srcimg.copyTo(img_vis);
for (int n = 0; n < boxes.size(); n++) { for (int n = 0; n < ocr_result.size(); n++) {
cv::Point rook_points[4]; cv::Point rook_points[4];
for (int m = 0; m < boxes[n].size(); m++) { for (int m = 0; m < ocr_result[n].box.size(); m++) {
rook_points[m] = cv::Point(int(boxes[n][m][0]), int(boxes[n][m][1])); rook_points[m] =
cv::Point(int(ocr_result[n].box[m][0]), int(ocr_result[n].box[m][1]));
} }
const cv::Point *ppt[1] = {rook_points}; const cv::Point *ppt[1] = {rook_points};
...@@ -196,4 +196,43 @@ std::string Utility::basename(const std::string &filename) { ...@@ -196,4 +196,43 @@ std::string Utility::basename(const std::string &filename) {
return filename.substr(index + 1, len - index); return filename.substr(index + 1, len - index);
} }
bool Utility::PathExists(const std::string &path) {
#ifdef _WIN32
struct _stat buffer;
return (_stat(path.c_str(), &buffer) == 0);
#else
struct stat buffer;
return (stat(path.c_str(), &buffer) == 0);
#endif // !_WIN32
}
void Utility::print_result(const std::vector<OCRPredictResult> &ocr_result) {
for (int i = 0; i < ocr_result.size(); i++) {
std::cout << i << "\t";
// det
std::vector<std::vector<int>> boxes = ocr_result[i].box;
if (boxes.size() > 0) {
std::cout << "det boxes: [";
for (int n = 0; n < boxes.size(); n++) {
std::cout << '[' << boxes[n][0] << ',' << boxes[n][1] << "]";
if (n != boxes.size() - 1) {
std::cout << ',';
}
}
std::cout << "] ";
}
// rec
if (ocr_result[i].score != -1.0) {
std::cout << "rec text: " << ocr_result[i].text
<< " rec score: " << ocr_result[i].score << " ";
}
// cls
if (ocr_result[i].cls_label != -1) {
std::cout << "cls label: " << ocr_result[i].cls_label
<< " cls score: " << ocr_result[i].cls_score;
}
std::cout << std::endl;
}
}
} // namespace PaddleOCR } // namespace PaddleOCR
\ No newline at end of file
...@@ -29,8 +29,7 @@ def read_params(): ...@@ -29,8 +29,7 @@ def read_params():
cfg.rec_model_dir = "./inference/ch_PP-OCRv2_rec_infer/" cfg.rec_model_dir = "./inference/ch_PP-OCRv2_rec_infer/"
cfg.rec_image_shape = "3, 32, 320" cfg.rec_image_shape = "3, 32, 320"
cfg.rec_char_type = 'ch' cfg.rec_batch_num = 6
cfg.rec_batch_num = 30
cfg.max_text_length = 25 cfg.max_text_length = 25
cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt" cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment