Commit 3691291a authored by LDOUBLEV's avatar LDOUBLEV
Browse files

Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into fix_trt_res

parents 1bf24676 49382181
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <include/config.h>
namespace PaddleOCR {
std::vector<std::string> OCRConfig::split(const std::string &str,
const std::string &delim) {
std::vector<std::string> res;
if ("" == str)
return res;
int strlen = str.length() + 1;
chars *strs = new char[strlen];
std::strcpy(strs, str.c_str());
int delimlen = delim.length() + 1;
char *d = new char[delimlen];
std::strcpy(d, delim.c_str());
delete[] strs;
delete[] d;
char *p = std::strtok(strs, d);
while (p) {
std::string s = p;
res.push_back(s);
p = std::strtok(NULL, d);
}
return res;
}
std::map<std::string, std::string>
OCRConfig::LoadConfig(const std::string &config_path) {
auto config = Utility::ReadDict(config_path);
std::map<std::string, std::string> dict;
for (int i = 0; i < config.size(); i++) {
// pass for empty line or comment
if (config[i].size() <= 1 || config[i][0] == '#') {
continue;
}
std::vector<std::string> res = split(config[i], " ");
dict[res[0]] = res[1];
}
return dict;
}
void OCRConfig::PrintConfigInfo() {
std::cout << "=======Paddle OCR inference config======" << std::endl;
for (auto iter = config_map_.begin(); iter != config_map_.end(); iter++) {
std::cout << iter->first << " : " << iter->second << std::endl;
}
std::cout << "=======End of Paddle OCR inference config======" << std::endl;
}
} // namespace PaddleOCR
...@@ -28,76 +28,276 @@ ...@@ -28,76 +28,276 @@
#include <numeric> #include <numeric>
#include <glog/logging.h> #include <glog/logging.h>
#include <include/config.h>
#include <include/ocr_det.h> #include <include/ocr_det.h>
#include <include/ocr_cls.h>
#include <include/ocr_rec.h> #include <include/ocr_rec.h>
#include <include/utility.h> #include <include/utility.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <gflags/gflags.h>
DEFINE_bool(use_gpu, false, "Infering with GPU or CPU.");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute.");
DEFINE_int32(gpu_mem, 4000, "GPU id when infering with GPU.");
DEFINE_int32(cpu_math_library_num_threads, 10, "Num of threads with CPU.");
DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU.");
DEFINE_bool(use_tensorrt, false, "Whether use tensorrt.");
DEFINE_string(precision, "fp32", "Precision be one of fp32/fp16/int8");
DEFINE_bool(benchmark, true, "Whether use benchmark.");
DEFINE_string(save_log_path, "./log_output/", "Save benchmark log path.");
// detection related
DEFINE_string(image_dir, "", "Dir of input image.");
DEFINE_string(det_model_dir, "", "Path of det inference model.");
DEFINE_int32(max_side_len, 960, "max_side_len of input image.");
DEFINE_double(det_db_thresh, 0.3, "Threshold of det_db_thresh.");
DEFINE_double(det_db_box_thresh, 0.5, "Threshold of det_db_box_thresh.");
DEFINE_double(det_db_unclip_ratio, 1.6, "Threshold of det_db_unclip_ratio.");
DEFINE_bool(use_polygon_score, false, "Whether use polygon score.");
DEFINE_bool(visualize, true, "Whether show the detection results.");
// classification related
DEFINE_bool(use_angle_cls, false, "Whether use use_angle_cls.");
DEFINE_string(cls_model_dir, "", "Path of cls inference model.");
DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh.");
// recognition related
DEFINE_string(rec_model_dir, "", "Path of rec inference model.");
DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt", "Path of dictionary.");
using namespace std; using namespace std;
using namespace cv; using namespace cv;
using namespace PaddleOCR; using namespace PaddleOCR;
void PrintBenchmarkLog(std::string model_name,
int batch_size,
std::string input_shape,
std::vector<double> time_info,
int img_num){
LOG(INFO) << "----------------------- Config info -----------------------";
LOG(INFO) << "runtime_device: " << (FLAGS_use_gpu ? "gpu" : "cpu");
LOG(INFO) << "ir_optim: " << "True";
LOG(INFO) << "enable_memory_optim: " << "True";
LOG(INFO) << "enable_tensorrt: " << FLAGS_use_tensorrt;
LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False");
LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_math_library_num_threads;
LOG(INFO) << "----------------------- Data info -----------------------";
LOG(INFO) << "batch_size: " << batch_size;
LOG(INFO) << "input_shape: " << input_shape;
LOG(INFO) << "data_num: " << img_num;
LOG(INFO) << "----------------------- Model info -----------------------";
LOG(INFO) << "model_name: " << model_name;
LOG(INFO) << "precision: " << FLAGS_precision;
LOG(INFO) << "----------------------- Perf info ------------------------";
LOG(INFO) << "Total time spent(ms): "
<< std::accumulate(time_info.begin(), time_info.end(), 0);
LOG(INFO) << "preprocess_time(ms): " << time_info[0] / img_num
<< ", inference_time(ms): " << time_info[1] / img_num
<< ", postprocess_time(ms): " << time_info[2] / img_num;
}
static bool PathExists(const std::string& path){
#ifdef _WIN32
struct _stat buffer;
return (_stat(path.c_str(), &buffer) == 0);
#else
struct stat buffer;
return (stat(path.c_str(), &buffer) == 0);
#endif // !_WIN32
}
int main_det(std::vector<cv::String> cv_all_img_names) {
std::vector<double> time_info = {0, 0, 0};
DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads,
FLAGS_use_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
FLAGS_use_polygon_score, FLAGS_visualize,
FLAGS_use_tensorrt, FLAGS_precision);
for (int i = 0; i < cv_all_img_names.size(); ++i) {
LOG(INFO) << "The predict img: " << cv_all_img_names[i];
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl;
exit(1);
}
std::vector<std::vector<std::vector<int>>> boxes;
std::vector<double> det_times;
det.Run(srcimg, boxes, &det_times);
time_info[0] += det_times[0];
time_info[1] += det_times[1];
time_info[2] += det_times[2];
}
if (FLAGS_benchmark) {
PrintBenchmarkLog("det", 1, "dynamic", time_info, cv_all_img_names.size());
}
return 0;
}
int main_rec(std::vector<cv::String> cv_all_img_names) {
std::vector<double> time_info = {0, 0, 0};
CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads,
FLAGS_use_mkldnn, FLAGS_char_list_file,
FLAGS_use_tensorrt, FLAGS_precision);
for (int i = 0; i < cv_all_img_names.size(); ++i) {
LOG(INFO) << "The predict img: " << cv_all_img_names[i];
cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl;
exit(1);
}
std::vector<double> rec_times;
rec.Run(srcimg, &rec_times);
time_info[0] += rec_times[0];
time_info[1] += rec_times[1];
time_info[2] += rec_times[2];
}
if (FLAGS_benchmark) {
PrintBenchmarkLog("rec", 1, "dynamic", time_info, cv_all_img_names.size());
}
return 0;
}
int main_system(std::vector<cv::String> cv_all_img_names) {
DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads,
FLAGS_use_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
FLAGS_use_polygon_score, FLAGS_visualize,
FLAGS_use_tensorrt, FLAGS_precision);
Classifier *cls = nullptr;
if (FLAGS_use_angle_cls) {
cls = new Classifier(FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads,
FLAGS_use_mkldnn, FLAGS_cls_thresh,
FLAGS_use_tensorrt, FLAGS_precision);
}
CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads,
FLAGS_use_mkldnn, FLAGS_char_list_file,
FLAGS_use_tensorrt, FLAGS_precision);
auto start = std::chrono::system_clock::now();
for (int i = 0; i < cv_all_img_names.size(); ++i) {
LOG(INFO) << "The predict img: " << cv_all_img_names[i];
cv::Mat srcimg = cv::imread(FLAGS_image_dir, cv::IMREAD_COLOR);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl;
exit(1);
}
std::vector<std::vector<std::vector<int>>> boxes;
std::vector<double> det_times;
std::vector<double> rec_times;
det.Run(srcimg, boxes, &det_times);
cv::Mat crop_img;
for (int j = 0; j < boxes.size(); j++) {
crop_img = Utility::GetRotateCropImage(srcimg, boxes[j]);
if (cls != nullptr) {
crop_img = cls->Run(crop_img);
}
rec.Run(crop_img, &rec_times);
}
auto end = std::chrono::system_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Cost "
<< double(duration.count()) *
std::chrono::microseconds::period::num /
std::chrono::microseconds::period::den
<< "s" << std::endl;
}
return 0;
}
void check_params(char* mode) {
if (strcmp(mode, "det")==0) {
if (FLAGS_det_model_dir.empty() || FLAGS_image_dir.empty()) {
std::cout << "Usage[det]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
exit(1);
}
}
if (strcmp(mode, "rec")==0) {
if (FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) {
std::cout << "Usage[rec]: ./ppocr --rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
exit(1);
}
}
if (strcmp(mode, "system")==0) {
if ((FLAGS_det_model_dir.empty() || FLAGS_rec_model_dir.empty() || FLAGS_image_dir.empty()) ||
(FLAGS_use_angle_cls && FLAGS_cls_model_dir.empty())) {
std::cout << "Usage[system without angle cls]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
<< "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
std::cout << "Usage[system with angle cls]: ./ppocr --det_model_dir=/PATH/TO/DET_INFERENCE_MODEL/ "
<< "--use_angle_cls=true "
<< "--cls_model_dir=/PATH/TO/CLS_INFERENCE_MODEL/ "
<< "--rec_model_dir=/PATH/TO/REC_INFERENCE_MODEL/ "
<< "--image_dir=/PATH/TO/INPUT/IMAGE/" << std::endl;
exit(1);
}
}
if (FLAGS_precision != "fp32" && FLAGS_precision != "fp16" && FLAGS_precision != "int8") {
cout << "precison should be 'fp32'(default), 'fp16' or 'int8'. " << endl;
exit(1);
}
}
int main(int argc, char **argv) { int main(int argc, char **argv) {
if (argc < 3) { if (argc<=1 || (strcmp(argv[1], "det")!=0 && strcmp(argv[1], "rec")!=0 && strcmp(argv[1], "system")!=0)) {
std::cerr << "[ERROR] usage: " << argv[0] std::cout << "Please choose one mode of [det, rec, system] !" << std::endl;
<< " configure_filepath image_path\n"; return -1;
exit(1); }
} std::cout << "mode: " << argv[1] << endl;
OCRConfig config(argv[1]); // Parsing command-line
google::ParseCommandLineFlags(&argc, &argv, true);
config.PrintConfigInfo(); check_params(argv[1]);
std::string img_path(argv[2]); if (!PathExists(FLAGS_image_dir)) {
std::vector<std::string> all_img_names; std::cerr << "[ERROR] image path not exist! image_dir: " << FLAGS_image_dir << endl;
Utility::GetAllFiles((char *)img_path.c_str(), all_img_names); exit(1);
DBDetector det(config.det_model_dir, config.use_gpu, config.gpu_id,
config.gpu_mem, config.cpu_math_library_num_threads,
config.use_mkldnn, config.max_side_len, config.det_db_thresh,
config.det_db_box_thresh, config.det_db_unclip_ratio,
config.use_polygon_score, config.visualize,
config.use_tensorrt, config.use_fp16);
Classifier *cls = nullptr;
if (config.use_angle_cls == true) {
cls = new Classifier(config.cls_model_dir, config.use_gpu, config.gpu_id,
config.gpu_mem, config.cpu_math_library_num_threads,
config.use_mkldnn, config.cls_thresh,
config.use_tensorrt, config.use_fp16);
}
CRNNRecognizer rec(config.rec_model_dir, config.use_gpu, config.gpu_id,
config.gpu_mem, config.cpu_math_library_num_threads,
config.use_mkldnn, config.char_list_file,
config.use_tensorrt, config.use_fp16);
auto start = std::chrono::system_clock::now();
for (auto img_dir : all_img_names) {
LOG(INFO) << "The predict img: " << img_dir;
cv::Mat srcimg = cv::imread(img_dir, cv::IMREAD_COLOR);
if (!srcimg.data) {
std::cerr << "[ERROR] image read failed! image path: " << img_path
<< "\n";
exit(1);
} }
std::vector<std::vector<std::vector<int>>> boxes;
std::vector<cv::String> cv_all_img_names;
det.Run(srcimg, boxes); cv::glob(FLAGS_image_dir, cv_all_img_names);
std::cout << "total images num: " << cv_all_img_names.size() << endl;
rec.Run(boxes, srcimg, cls);
auto end = std::chrono::system_clock::now(); if (strcmp(argv[1], "det")==0) {
auto duration = return main_det(cv_all_img_names);
std::chrono::duration_cast<std::chrono::microseconds>(end - start); }
std::cout << "Cost " if (strcmp(argv[1], "rec")==0) {
<< double(duration.count()) * return main_rec(cv_all_img_names);
std::chrono::microseconds::period::num / }
std::chrono::microseconds::period::den if (strcmp(argv[1], "system")==0) {
<< "s" << std::endl; return main_system(cv_all_img_names);
} }
return 0;
} }
...@@ -77,10 +77,16 @@ void Classifier::LoadModel(const std::string &model_dir) { ...@@ -77,10 +77,16 @@ void Classifier::LoadModel(const std::string &model_dir) {
if (this->use_gpu_) { if (this->use_gpu_) {
config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
if (this->use_tensorrt_) { if (this->use_tensorrt_) {
auto precision = paddle_infer::Config::Precision::kFloat32;
if (this->precision_ == "fp16") {
precision = paddle_infer::Config::Precision::kHalf;
}
if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8;
}
config.EnableTensorRtEngine( config.EnableTensorRtEngine(
1 << 20, 10, 3, 1 << 20, 10, 3,
this->use_fp16_ ? paddle_infer::Config::Precision::kHalf precision,
: paddle_infer::Config::Precision::kFloat32,
false, false); false, false);
} }
} else { } else {
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <include/ocr_det.h> #include <include/ocr_det.h>
namespace PaddleOCR { namespace PaddleOCR {
void DBDetector::LoadModel(const std::string &model_dir) { void DBDetector::LoadModel(const std::string &model_dir) {
...@@ -25,10 +26,16 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -25,10 +26,16 @@ void DBDetector::LoadModel(const std::string &model_dir) {
if (this->use_gpu_) { if (this->use_gpu_) {
config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
if (this->use_tensorrt_) { if (this->use_tensorrt_) {
auto precision = paddle_infer::Config::Precision::kFloat32;
if (this->precision_ == "fp16") {
precision = paddle_infer::Config::Precision::kHalf;
}
if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8;
}
config.EnableTensorRtEngine( config.EnableTensorRtEngine(
1 << 20, 10, 3, 1 << 20, 10, 3,
this->use_fp16_ ? paddle_infer::Config::Precision::kHalf precision,
: paddle_infer::Config::Precision::kFloat32,
false, false); false, false);
std::map<std::string, std::vector<int>> min_input_shape = { std::map<std::string, std::vector<int>> min_input_shape = {
{"x", {1, 3, 50, 50}}, {"x", {1, 3, 50, 50}},
...@@ -90,13 +97,16 @@ void DBDetector::LoadModel(const std::string &model_dir) { ...@@ -90,13 +97,16 @@ void DBDetector::LoadModel(const std::string &model_dir) {
} }
void DBDetector::Run(cv::Mat &img, void DBDetector::Run(cv::Mat &img,
std::vector<std::vector<std::vector<int>>> &boxes) { std::vector<std::vector<std::vector<int>>> &boxes,
std::vector<double> *times) {
float ratio_h{}; float ratio_h{};
float ratio_w{}; float ratio_w{};
cv::Mat srcimg; cv::Mat srcimg;
cv::Mat resize_img; cv::Mat resize_img;
img.copyTo(srcimg); img.copyTo(srcimg);
auto preprocess_start = std::chrono::steady_clock::now();
this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w, this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w,
this->use_tensorrt_); this->use_tensorrt_);
...@@ -105,14 +115,17 @@ void DBDetector::Run(cv::Mat &img, ...@@ -105,14 +115,17 @@ void DBDetector::Run(cv::Mat &img,
std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f); std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
this->permute_op_.Run(&resize_img, input.data()); this->permute_op_.Run(&resize_img, input.data());
auto preprocess_end = std::chrono::steady_clock::now();
// Inference. // Inference.
auto input_names = this->predictor_->GetInputNames(); auto input_names = this->predictor_->GetInputNames();
auto input_t = this->predictor_->GetInputHandle(input_names[0]); auto input_t = this->predictor_->GetInputHandle(input_names[0]);
input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
auto inference_start = std::chrono::steady_clock::now();
input_t->CopyFromCpu(input.data()); input_t->CopyFromCpu(input.data());
this->predictor_->Run(); this->predictor_->Run();
std::vector<float> out_data; std::vector<float> out_data;
auto output_names = this->predictor_->GetOutputNames(); auto output_names = this->predictor_->GetOutputNames();
auto output_t = this->predictor_->GetOutputHandle(output_names[0]); auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
...@@ -122,7 +135,9 @@ void DBDetector::Run(cv::Mat &img, ...@@ -122,7 +135,9 @@ void DBDetector::Run(cv::Mat &img,
out_data.resize(out_num); out_data.resize(out_num);
output_t->CopyToCpu(out_data.data()); output_t->CopyToCpu(out_data.data());
auto inference_end = std::chrono::steady_clock::now();
auto postprocess_start = std::chrono::steady_clock::now();
int n2 = output_shape[2]; int n2 = output_shape[2];
int n3 = output_shape[3]; int n3 = output_shape[3];
int n = n2 * n3; int n = n2 * n3;
...@@ -150,7 +165,16 @@ void DBDetector::Run(cv::Mat &img, ...@@ -150,7 +165,16 @@ void DBDetector::Run(cv::Mat &img,
this->det_db_unclip_ratio_, this->use_polygon_score_); this->det_db_unclip_ratio_, this->use_polygon_score_);
boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg); boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
auto postprocess_end = std::chrono::steady_clock::now();
std::cout << "Detected boxes num: " << boxes.size() << endl;
std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
times->push_back(double(preprocess_diff.count() * 1000));
std::chrono::duration<float> inference_diff = inference_end - inference_start;
times->push_back(double(inference_diff.count() * 1000));
std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000));
//// visualization //// visualization
if (this->visualize_) { if (this->visualize_) {
Utility::VisualizeBboxes(srcimg, boxes); Utility::VisualizeBboxes(srcimg, boxes);
......
...@@ -16,80 +16,80 @@ ...@@ -16,80 +16,80 @@
namespace PaddleOCR { namespace PaddleOCR {
void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes, void CRNNRecognizer::Run(cv::Mat &img, std::vector<double> *times) {
cv::Mat &img, Classifier *cls) {
cv::Mat srcimg; cv::Mat srcimg;
img.copyTo(srcimg); img.copyTo(srcimg);
cv::Mat crop_img;
cv::Mat resize_img; cv::Mat resize_img;
std::cout << "The predicted text is :" << std::endl; float wh_ratio = float(srcimg.cols) / float(srcimg.rows);
int index = 0; auto preprocess_start = std::chrono::steady_clock::now();
for (int i = 0; i < boxes.size(); i++) { this->resize_op_.Run(srcimg, resize_img, wh_ratio, this->use_tensorrt_);
crop_img = GetRotateCropImage(srcimg, boxes[i]);
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
if (cls != nullptr) { this->is_scale_);
crop_img = cls->Run(crop_img);
} std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
float wh_ratio = float(crop_img.cols) / float(crop_img.rows); this->permute_op_.Run(&resize_img, input.data());
auto preprocess_end = std::chrono::steady_clock::now();
this->resize_op_.Run(crop_img, resize_img, wh_ratio, this->use_tensorrt_);
// Inference.
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, auto input_names = this->predictor_->GetInputNames();
this->is_scale_); auto input_t = this->predictor_->GetInputHandle(input_names[0]);
input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f); auto inference_start = std::chrono::steady_clock::now();
input_t->CopyFromCpu(input.data());
this->permute_op_.Run(&resize_img, input.data()); this->predictor_->Run();
// Inference. std::vector<float> predict_batch;
auto input_names = this->predictor_->GetInputNames(); auto output_names = this->predictor_->GetOutputNames();
auto input_t = this->predictor_->GetInputHandle(input_names[0]); auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); auto predict_shape = output_t->shape();
input_t->CopyFromCpu(input.data());
this->predictor_->Run(); int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1,
std::multiplies<int>());
std::vector<float> predict_batch; predict_batch.resize(out_num);
auto output_names = this->predictor_->GetOutputNames();
auto output_t = this->predictor_->GetOutputHandle(output_names[0]); output_t->CopyToCpu(predict_batch.data());
auto predict_shape = output_t->shape(); auto inference_end = std::chrono::steady_clock::now();
int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1, // ctc decode
std::multiplies<int>()); auto postprocess_start = std::chrono::steady_clock::now();
predict_batch.resize(out_num); std::vector<std::string> str_res;
int argmax_idx;
output_t->CopyToCpu(predict_batch.data()); int last_index = 0;
float score = 0.f;
// ctc decode int count = 0;
std::vector<std::string> str_res; float max_value = 0.0f;
int argmax_idx;
int last_index = 0; for (int n = 0; n < predict_shape[1]; n++) {
float score = 0.f; argmax_idx =
int count = 0; int(Utility::argmax(&predict_batch[n * predict_shape[2]],
float max_value = 0.0f; &predict_batch[(n + 1) * predict_shape[2]]));
max_value =
for (int n = 0; n < predict_shape[1]; n++) { float(*std::max_element(&predict_batch[n * predict_shape[2]],
argmax_idx = &predict_batch[(n + 1) * predict_shape[2]]));
int(Utility::argmax(&predict_batch[n * predict_shape[2]],
&predict_batch[(n + 1) * predict_shape[2]])); if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
max_value = score += max_value;
float(*std::max_element(&predict_batch[n * predict_shape[2]], count += 1;
&predict_batch[(n + 1) * predict_shape[2]])); str_res.push_back(label_list_[argmax_idx]);
if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
score += max_value;
count += 1;
str_res.push_back(label_list_[argmax_idx]);
}
last_index = argmax_idx;
} }
score /= count; last_index = argmax_idx;
for (int i = 0; i < str_res.size(); i++) { }
std::cout << str_res[i]; auto postprocess_end = std::chrono::steady_clock::now();
} score /= count;
std::cout << "\tscore: " << score << std::endl; for (int i = 0; i < str_res.size(); i++) {
std::cout << str_res[i];
} }
std::cout << "\tscore: " << score << std::endl;
std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
times->push_back(double(preprocess_diff.count() * 1000));
std::chrono::duration<float> inference_diff = inference_end - inference_start;
times->push_back(double(inference_diff.count() * 1000));
std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000));
} }
void CRNNRecognizer::LoadModel(const std::string &model_dir) { void CRNNRecognizer::LoadModel(const std::string &model_dir) {
...@@ -101,10 +101,16 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { ...@@ -101,10 +101,16 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
if (this->use_gpu_) { if (this->use_gpu_) {
config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
if (this->use_tensorrt_) { if (this->use_tensorrt_) {
auto precision = paddle_infer::Config::Precision::kFloat32;
if (this->precision_ == "fp16") {
precision = paddle_infer::Config::Precision::kHalf;
}
if (this->precision_ == "int8") {
precision = paddle_infer::Config::Precision::kInt8;
}
config.EnableTensorRtEngine( config.EnableTensorRtEngine(
1 << 20, 10, 3, 1 << 20, 10, 3,
this->use_fp16_ ? paddle_infer::Config::Precision::kHalf precision,
: paddle_infer::Config::Precision::kFloat32,
false, false); false, false);
std::map<std::string, std::vector<int>> min_input_shape = { std::map<std::string, std::vector<int>> min_input_shape = {
{"x", {1, 3, 32, 10}}}; {"x", {1, 3, 32, 10}}};
...@@ -138,59 +144,4 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) { ...@@ -138,59 +144,4 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
this->predictor_ = CreatePredictor(config); this->predictor_ = CreatePredictor(config);
} }
cv::Mat CRNNRecognizer::GetRotateCropImage(const cv::Mat &srcimage,
std::vector<std::vector<int>> box) {
cv::Mat image;
srcimage.copyTo(image);
std::vector<std::vector<int>> points = box;
int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
int left = int(*std::min_element(x_collect, x_collect + 4));
int right = int(*std::max_element(x_collect, x_collect + 4));
int top = int(*std::min_element(y_collect, y_collect + 4));
int bottom = int(*std::max_element(y_collect, y_collect + 4));
cv::Mat img_crop;
image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
for (int i = 0; i < points.size(); i++) {
points[i][0] -= left;
points[i][1] -= top;
}
int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) +
pow(points[0][1] - points[1][1], 2)));
int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) +
pow(points[0][1] - points[3][1], 2)));
cv::Point2f pts_std[4];
pts_std[0] = cv::Point2f(0., 0.);
pts_std[1] = cv::Point2f(img_crop_width, 0.);
pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
pts_std[3] = cv::Point2f(0.f, img_crop_height);
cv::Point2f pointsf[4];
pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
cv::Mat dst_img;
cv::warpPerspective(img_crop, dst_img, M,
cv::Size(img_crop_width, img_crop_height),
cv::BORDER_REPLICATE);
if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) {
cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
cv::transpose(dst_img, srcCopy);
cv::flip(srcCopy, srcCopy, 0);
return srcCopy;
} else {
return dst_img;
}
}
} // namespace PaddleOCR } // namespace PaddleOCR
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include <include/postprocess_op.h> #include <include/postprocess_op.h>
#include <include/clipper.cpp>
namespace PaddleOCR { namespace PaddleOCR {
......
...@@ -92,4 +92,59 @@ void Utility::GetAllFiles(const char *dir_name, ...@@ -92,4 +92,59 @@ void Utility::GetAllFiles(const char *dir_name,
} }
} }
cv::Mat Utility::GetRotateCropImage(const cv::Mat &srcimage,
std::vector<std::vector<int>> box) {
cv::Mat image;
srcimage.copyTo(image);
std::vector<std::vector<int>> points = box;
int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
int left = int(*std::min_element(x_collect, x_collect + 4));
int right = int(*std::max_element(x_collect, x_collect + 4));
int top = int(*std::min_element(y_collect, y_collect + 4));
int bottom = int(*std::max_element(y_collect, y_collect + 4));
cv::Mat img_crop;
image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
for (int i = 0; i < points.size(); i++) {
points[i][0] -= left;
points[i][1] -= top;
}
int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) +
pow(points[0][1] - points[1][1], 2)));
int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) +
pow(points[0][1] - points[3][1], 2)));
cv::Point2f pts_std[4];
pts_std[0] = cv::Point2f(0., 0.);
pts_std[1] = cv::Point2f(img_crop_width, 0.);
pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
pts_std[3] = cv::Point2f(0.f, img_crop_height);
cv::Point2f pointsf[4];
pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
cv::Mat dst_img;
cv::warpPerspective(img_crop, dst_img, M,
cv::Size(img_crop_width, img_crop_height),
cv::BORDER_REPLICATE);
if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) {
cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
cv::transpose(dst_img, srcCopy);
cv::flip(srcCopy, srcCopy, 0);
return srcCopy;
} else {
return dst_img;
}
}
} // namespace PaddleOCR } // namespace PaddleOCR
\ No newline at end of file
# model load config
use_gpu 0
gpu_id 0
gpu_mem 4000
cpu_math_library_num_threads 10
use_mkldnn 0
# det config
max_side_len 960
det_db_thresh 0.3
det_db_box_thresh 0.5
det_db_unclip_ratio 1.6
use_polygon_score 1
det_model_dir ./inference/ch_ppocr_mobile_v2.0_det_infer/
# cls config
use_angle_cls 0
cls_model_dir ./inference/ch_ppocr_mobile_v2.0_cls_infer/
cls_thresh 0.9
# rec config
rec_model_dir ./inference/ch_ppocr_mobile_v2.0_rec_infer/
char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
# show the detection results
visualize 0
# use_tensorrt
use_tensorrt 0
use_fp16 0
./build/ocr_system ./tools/config.txt ../../doc/imgs/12.jpg
doc/joinus.PNG

203 KB | W: | H:

doc/joinus.PNG

191 KB | W: | H:

doc/joinus.PNG
doc/joinus.PNG
doc/joinus.PNG
doc/joinus.PNG
  • 2-up
  • Swipe
  • Onion skin
...@@ -25,6 +25,6 @@ class ClsLoss(nn.Layer): ...@@ -25,6 +25,6 @@ class ClsLoss(nn.Layer):
self.loss_func = nn.CrossEntropyLoss(reduction='mean') self.loss_func = nn.CrossEntropyLoss(reduction='mean')
def forward(self, predicts, batch): def forward(self, predicts, batch):
label = batch[1] label = batch[1].astype("int64")
loss = self.loss_func(input=predicts, label=label) loss = self.loss_func(input=predicts, label=label)
return {'loss': loss} return {'loss': loss}
...@@ -62,7 +62,7 @@ else ...@@ -62,7 +62,7 @@ else
if [ ${model_name} = "ocr_det" ]; then if [ ${model_name} = "ocr_det" ]; then
eval_model_name="ch_ppocr_mobile_v2.0_det_infer" eval_model_name="ch_ppocr_mobile_v2.0_det_infer"
rm -rf ./train_data/icdar2015 rm -rf ./train_data/icdar2015
wget -nc -P ./train_data https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../ cd ./inference && tar xf ${eval_model_name}.tar && tar xf ch_det_data_50.tar && cd ../
else else
......
# 介绍
test.sh和params.txt文件配合使用,完成OCR轻量检测和识别模型从训练到预测的流程测试。
# 安装依赖
- 安装PaddlePaddle >= 2.0
- 安装PaddleOCR依赖
```
pip3 install -r ../requirements.txt
```
- 安装autolog
```
git clone https://github.com/LDOUBLEV/AutoLog
cd AutoLog
pip3 install -r requirements.txt
python3 setup.py bdist_wheel
pip3 install ./dist/auto_log-1.0.0-py3-none-any.whl
cd ../
```
# 目录介绍
```bash
tests/
├── ocr_det_params.txt # 测试OCR检测模型的参数配置文件
├── ocr_rec_params.txt # 测试OCR识别模型的参数配置文件
└── prepare.sh # 完成test.sh运行所需要的数据和模型下载
└── test.sh # 根据
```
# 使用方法
test.sh包含四种运行模式,每种模式的运行数据不同,分别用于测试速度和精度,分别是:
- 模式1 lite_train_infer,使用少量数据训练,用于快速验证训练到预测的走通流程,不验证精度和速度;
```
bash test/prepare.sh ./tests/ocr_det_params.txt 'lite_train_infer'
bash tests/test.sh ./tests/ocr_det_params.txt 'lite_train_infer'
```
- 模式2 whole_infer,使用少量数据训练,一定量数据预测,用于验证训练后的模型执行预测,预测速度是否合理;
```
bash tests/prepare.sh ./tests/ocr_det_params.txt 'whole_infer'
bash tests/test.sh ./tests/ocr_det_params.txt 'whole_infer'
```
- 模式3 infer 不训练,全量数据预测,走通开源模型评估、动转静,检查inference model预测时间和精度;
```
bash tests/prepare.sh ./tests/ocr_det_params.txt 'infer'
用法1:
bash tests/test.sh ./tests/ocr_det_params.txt 'infer'
用法2: 指定GPU卡预测,第三个传入参数为GPU卡号
bash tests/test.sh ./tests/ocr_det_params.txt 'infer' '1'
```
模式4: whole_train_infer , CE: 全量数据训练,全量数据预测,验证模型训练精度,预测精度,预测速度
```
bash tests/prepare.sh ./tests/ocr_det_params.txt 'whole_train_infer'
bash tests/test.sh ./tests/ocr_det_params.txt 'whole_train_infer'
```
...@@ -101,6 +101,7 @@ class TextDetector(object): ...@@ -101,6 +101,7 @@ class TextDetector(object):
if args.benchmark: if args.benchmark:
import auto_log import auto_log
pid = os.getpid() pid = os.getpid()
gpu_id = utility.get_infer_gpuid()
self.autolog = auto_log.AutoLogger( self.autolog = auto_log.AutoLogger(
model_name="det", model_name="det",
model_precision=args.precision, model_precision=args.precision,
...@@ -110,7 +111,7 @@ class TextDetector(object): ...@@ -110,7 +111,7 @@ class TextDetector(object):
inference_config=self.config, inference_config=self.config,
pids=pid, pids=pid,
process_name=None, process_name=None,
gpu_ids=0, gpu_ids=gpu_id if args.use_gpu else None,
time_keys=[ time_keys=[
'preprocess_time', 'inference_time', 'postprocess_time' 'preprocess_time', 'inference_time', 'postprocess_time'
], ],
......
...@@ -68,6 +68,7 @@ class TextRecognizer(object): ...@@ -68,6 +68,7 @@ class TextRecognizer(object):
if args.benchmark: if args.benchmark:
import auto_log import auto_log
pid = os.getpid() pid = os.getpid()
gpu_id = utility.get_infer_gpuid()
self.autolog = auto_log.AutoLogger( self.autolog = auto_log.AutoLogger(
model_name="rec", model_name="rec",
model_precision=args.precision, model_precision=args.precision,
...@@ -77,7 +78,7 @@ class TextRecognizer(object): ...@@ -77,7 +78,7 @@ class TextRecognizer(object):
inference_config=self.config, inference_config=self.config,
pids=pid, pids=pid,
process_name=None, process_name=None,
gpu_ids=0 if args.use_gpu else None, gpu_ids=gpu_id if args.use_gpu else None,
time_keys=[ time_keys=[
'preprocess_time', 'inference_time', 'postprocess_time' 'preprocess_time', 'inference_time', 'postprocess_time'
], ],
......
...@@ -159,6 +159,11 @@ def create_predictor(args, mode, logger): ...@@ -159,6 +159,11 @@ def create_predictor(args, mode, logger):
precision = inference.PrecisionType.Float32 precision = inference.PrecisionType.Float32
if args.use_gpu: if args.use_gpu:
gpu_id = get_infer_gpuid()
if gpu_id is None:
raise ValueError(
"Not found GPU in current device. Please check your device or set args.use_gpu as False"
)
config.enable_use_gpu(args.gpu_mem, 0) config.enable_use_gpu(args.gpu_mem, 0)
if args.use_tensorrt: if args.use_tensorrt:
config.enable_tensorrt_engine( config.enable_tensorrt_engine(
...@@ -280,6 +285,20 @@ def create_predictor(args, mode, logger): ...@@ -280,6 +285,20 @@ def create_predictor(args, mode, logger):
return predictor, input_tensor, output_tensors, config return predictor, input_tensor, output_tensors, config
def get_infer_gpuid():
cmd = "nvidia-smi"
res = os.popen(cmd).readlines()
if len(res) == 0:
return None
cmd = "env | grep CUDA_VISIBLE_DEVICES"
env_cuda = os.popen(cmd).readlines()
if len(env_cuda) == 0:
return 0
else:
gpu_id = env_cuda[0].strip().split("=")[1]
return int(gpu_id[0])
def draw_e2e_res(dt_boxes, strs, img_path): def draw_e2e_res(dt_boxes, strs, img_path):
src_im = cv2.imread(img_path) src_im = cv2.imread(img_path)
for box, str in zip(dt_boxes, strs): for box, str in zip(dt_boxes, strs):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment