Commit c98c5dd1 authored by Leif's avatar Leif
Browse files

Merge remote-tracking branch 'origin/dygraph' into dygraph

parents 063395ec 5ebf5d6e
...@@ -90,7 +90,7 @@ Optimizer: ...@@ -90,7 +90,7 @@ Optimizer:
PostProcess: PostProcess:
name: DistillationDBPostProcess name: DistillationDBPostProcess
model_name: ["Student", "Student2"] model_name: ["Student"]
key: head_out key: head_out
thresh: 0.3 thresh: 0.3
box_thresh: 0.6 box_thresh: 0.6
......
...@@ -44,7 +44,8 @@ public: ...@@ -44,7 +44,8 @@ public:
const int &gpu_id, const int &gpu_mem, const int &gpu_id, const int &gpu_mem,
const int &cpu_math_library_num_threads, const int &cpu_math_library_num_threads,
const bool &use_mkldnn, const string &label_path, const bool &use_mkldnn, const string &label_path,
const bool &use_tensorrt, const std::string &precision) { const bool &use_tensorrt, const std::string &precision,
const int &rec_batch_num) {
this->use_gpu_ = use_gpu; this->use_gpu_ = use_gpu;
this->gpu_id_ = gpu_id; this->gpu_id_ = gpu_id;
this->gpu_mem_ = gpu_mem; this->gpu_mem_ = gpu_mem;
...@@ -52,6 +53,7 @@ public: ...@@ -52,6 +53,7 @@ public:
this->use_mkldnn_ = use_mkldnn; this->use_mkldnn_ = use_mkldnn;
this->use_tensorrt_ = use_tensorrt; this->use_tensorrt_ = use_tensorrt;
this->precision_ = precision; this->precision_ = precision;
this->rec_batch_num_ = rec_batch_num;
this->label_list_ = Utility::ReadDict(label_path); this->label_list_ = Utility::ReadDict(label_path);
this->label_list_.insert(this->label_list_.begin(), this->label_list_.insert(this->label_list_.begin(),
...@@ -64,7 +66,7 @@ public: ...@@ -64,7 +66,7 @@ public:
// Load Paddle inference model // Load Paddle inference model
void LoadModel(const std::string &model_dir); void LoadModel(const std::string &model_dir);
void Run(cv::Mat &img, std::vector<double> *times); void Run(std::vector<cv::Mat> img_list, std::vector<double> *times);
private: private:
std::shared_ptr<Predictor> predictor_; std::shared_ptr<Predictor> predictor_;
...@@ -82,10 +84,12 @@ private: ...@@ -82,10 +84,12 @@ private:
bool is_scale_ = true; bool is_scale_ = true;
bool use_tensorrt_ = false; bool use_tensorrt_ = false;
std::string precision_ = "fp32"; std::string precision_ = "fp32";
int rec_batch_num_ = 6;
// pre-process // pre-process
CrnnResizeImg resize_op_; CrnnResizeImg resize_op_;
Normalize normalize_op_; Normalize normalize_op_;
Permute permute_op_; PermuteBatch permute_op_;
// post-process // post-process
PostProcessor post_processor_; PostProcessor post_processor_;
......
...@@ -44,6 +44,11 @@ public: ...@@ -44,6 +44,11 @@ public:
virtual void Run(const cv::Mat *im, float *data); virtual void Run(const cv::Mat *im, float *data);
}; };
class PermuteBatch {
public:
virtual void Run(const std::vector<cv::Mat> imgs, float *data);
};
class ResizeImgType0 { class ResizeImgType0 {
public: public:
virtual void Run(const cv::Mat &img, cv::Mat &resize_img, int max_size_len, virtual void Run(const cv::Mat &img, cv::Mat &resize_img, int max_size_len,
......
...@@ -50,6 +50,9 @@ public: ...@@ -50,6 +50,9 @@ public:
static cv::Mat GetRotateCropImage(const cv::Mat &srcimage, static cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
std::vector<std::vector<int>> box); std::vector<std::vector<int>> box);
static std::vector<int> argsort(const std::vector<float>& array);
}; };
} // namespace PaddleOCR } // namespace PaddleOCR
\ No newline at end of file
...@@ -61,7 +61,7 @@ DEFINE_string(cls_model_dir, "", "Path of cls inference model."); ...@@ -61,7 +61,7 @@ DEFINE_string(cls_model_dir, "", "Path of cls inference model.");
DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh."); DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh.");
// recognition related // recognition related
DEFINE_string(rec_model_dir, "", "Path of rec inference model."); DEFINE_string(rec_model_dir, "", "Path of rec inference model.");
DEFINE_int32(rec_batch_num, 1, "rec_batch_num."); DEFINE_int32(rec_batch_num, 6, "rec_batch_num.");
DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt", "Path of dictionary."); DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt", "Path of dictionary.");
...@@ -146,8 +146,9 @@ int main_rec(std::vector<cv::String> cv_all_img_names) { ...@@ -146,8 +146,9 @@ int main_rec(std::vector<cv::String> cv_all_img_names) {
CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_gpu_mem, FLAGS_cpu_threads,
FLAGS_enable_mkldnn, char_list_file, FLAGS_enable_mkldnn, char_list_file,
FLAGS_use_tensorrt, FLAGS_precision); FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num);
std::vector<cv::Mat> img_list;
for (int i = 0; i < cv_all_img_names.size(); ++i) { for (int i = 0; i < cv_all_img_names.size(); ++i) {
LOG(INFO) << "The predict img: " << cv_all_img_names[i]; LOG(INFO) << "The predict img: " << cv_all_img_names[i];
...@@ -156,22 +157,21 @@ int main_rec(std::vector<cv::String> cv_all_img_names) { ...@@ -156,22 +157,21 @@ int main_rec(std::vector<cv::String> cv_all_img_names) {
std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl; std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl;
exit(1); exit(1);
} }
img_list.push_back(srcimg);
std::vector<double> rec_times;
rec.Run(srcimg, &rec_times);
time_info[0] += rec_times[0];
time_info[1] += rec_times[1];
time_info[2] += rec_times[2];
} }
std::vector<double> rec_times;
rec.Run(img_list, &rec_times);
time_info[0] += rec_times[0];
time_info[1] += rec_times[1];
time_info[2] += rec_times[2];
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
AutoLogger autolog("ocr_rec", AutoLogger autolog("ocr_rec",
FLAGS_use_gpu, FLAGS_use_gpu,
FLAGS_use_tensorrt, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_enable_mkldnn,
FLAGS_cpu_threads, FLAGS_cpu_threads,
1, FLAGS_rec_batch_num,
"dynamic", "dynamic",
FLAGS_precision, FLAGS_precision,
time_info, time_info,
...@@ -209,7 +209,7 @@ int main_system(std::vector<cv::String> cv_all_img_names) { ...@@ -209,7 +209,7 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id, CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
FLAGS_gpu_mem, FLAGS_cpu_threads, FLAGS_gpu_mem, FLAGS_cpu_threads,
FLAGS_enable_mkldnn, char_list_file, FLAGS_enable_mkldnn, char_list_file,
FLAGS_use_tensorrt, FLAGS_precision); FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num);
for (int i = 0; i < cv_all_img_names.size(); ++i) { for (int i = 0; i < cv_all_img_names.size(); ++i) {
LOG(INFO) << "The predict img: " << cv_all_img_names[i]; LOG(INFO) << "The predict img: " << cv_all_img_names[i];
...@@ -228,19 +228,22 @@ int main_system(std::vector<cv::String> cv_all_img_names) { ...@@ -228,19 +228,22 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
time_info_det[1] += det_times[1]; time_info_det[1] += det_times[1];
time_info_det[2] += det_times[2]; time_info_det[2] += det_times[2];
cv::Mat crop_img; std::vector<cv::Mat> img_list;
for (int j = 0; j < boxes.size(); j++) { for (int j = 0; j < boxes.size(); j++) {
crop_img = Utility::GetRotateCropImage(srcimg, boxes[j]); cv::Mat crop_img;
crop_img = Utility::GetRotateCropImage(srcimg, boxes[j]);
if (cls != nullptr) { if (cls != nullptr) {
crop_img = cls->Run(crop_img); crop_img = cls->Run(crop_img);
} }
rec.Run(crop_img, &rec_times); img_list.push_back(crop_img);
time_info_rec[0] += rec_times[0];
time_info_rec[1] += rec_times[1];
time_info_rec[2] += rec_times[2];
} }
rec.Run(img_list, &rec_times);
time_info_rec[0] += rec_times[0];
time_info_rec[1] += rec_times[1];
time_info_rec[2] += rec_times[2];
} }
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
AutoLogger autolog_det("ocr_det", AutoLogger autolog_det("ocr_det",
FLAGS_use_gpu, FLAGS_use_gpu,
...@@ -257,7 +260,7 @@ int main_system(std::vector<cv::String> cv_all_img_names) { ...@@ -257,7 +260,7 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
FLAGS_use_tensorrt, FLAGS_use_tensorrt,
FLAGS_enable_mkldnn, FLAGS_enable_mkldnn,
FLAGS_cpu_threads, FLAGS_cpu_threads,
1, FLAGS_rec_batch_num,
"dynamic", "dynamic",
FLAGS_precision, FLAGS_precision,
time_info_rec, time_info_rec,
......
...@@ -15,83 +15,108 @@ ...@@ -15,83 +15,108 @@
#include <include/ocr_rec.h> #include <include/ocr_rec.h>
namespace PaddleOCR { namespace PaddleOCR {
void CRNNRecognizer::Run(cv::Mat &img, std::vector<double> *times) { void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, std::vector<double> *times) {
cv::Mat srcimg; std::chrono::duration<float> preprocess_diff = std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
img.copyTo(srcimg); std::chrono::duration<float> inference_diff = std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
cv::Mat resize_img; std::chrono::duration<float> postprocess_diff = std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
float wh_ratio = float(srcimg.cols) / float(srcimg.rows); int img_num = img_list.size();
auto preprocess_start = std::chrono::steady_clock::now(); std::vector<float> width_list;
this->resize_op_.Run(srcimg, resize_img, wh_ratio, this->use_tensorrt_); for (int i = 0; i < img_num; i++) {
width_list.push_back(float(img_list[i].cols) / img_list[i].rows);
this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, }
this->is_scale_); std::vector<int> indices = Utility::argsort(width_list);
std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f); for (int beg_img_no = 0; beg_img_no < img_num; beg_img_no += this->rec_batch_num_) {
auto preprocess_start = std::chrono::steady_clock::now();
this->permute_op_.Run(&resize_img, input.data()); int end_img_no = min(img_num, beg_img_no + this->rec_batch_num_);
auto preprocess_end = std::chrono::steady_clock::now(); float max_wh_ratio = 0;
for (int ino = beg_img_no; ino < end_img_no; ino ++) {
// Inference. int h = img_list[indices[ino]].rows;
auto input_names = this->predictor_->GetInputNames(); int w = img_list[indices[ino]].cols;
auto input_t = this->predictor_->GetInputHandle(input_names[0]); float wh_ratio = w * 1.0 / h;
input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); max_wh_ratio = max(max_wh_ratio, wh_ratio);
auto inference_start = std::chrono::steady_clock::now(); }
input_t->CopyFromCpu(input.data()); std::vector<cv::Mat> norm_img_batch;
this->predictor_->Run(); for (int ino = beg_img_no; ino < end_img_no; ino ++) {
cv::Mat srcimg;
std::vector<float> predict_batch; img_list[indices[ino]].copyTo(srcimg);
auto output_names = this->predictor_->GetOutputNames(); cv::Mat resize_img;
auto output_t = this->predictor_->GetOutputHandle(output_names[0]); this->resize_op_.Run(srcimg, resize_img, max_wh_ratio, this->use_tensorrt_);
auto predict_shape = output_t->shape(); this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, this->is_scale_);
norm_img_batch.push_back(resize_img);
int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1, }
int batch_width = int(ceilf(32 * max_wh_ratio)) - 1;
std::vector<float> input(this->rec_batch_num_ * 3 * 32 * batch_width, 0.0f);
this->permute_op_.Run(norm_img_batch, input.data());
auto preprocess_end = std::chrono::steady_clock::now();
preprocess_diff += preprocess_end - preprocess_start;
// Inference.
auto input_names = this->predictor_->GetInputNames();
auto input_t = this->predictor_->GetInputHandle(input_names[0]);
input_t->Reshape({this->rec_batch_num_, 3, 32, batch_width});
auto inference_start = std::chrono::steady_clock::now();
input_t->CopyFromCpu(input.data());
this->predictor_->Run();
std::vector<float> predict_batch;
auto output_names = this->predictor_->GetOutputNames();
auto output_t = this->predictor_->GetOutputHandle(output_names[0]);
auto predict_shape = output_t->shape();
int out_num = std::accumulate(predict_shape.begin(), predict_shape.end(), 1,
std::multiplies<int>()); std::multiplies<int>());
predict_batch.resize(out_num); predict_batch.resize(out_num);
output_t->CopyToCpu(predict_batch.data()); output_t->CopyToCpu(predict_batch.data());
auto inference_end = std::chrono::steady_clock::now(); auto inference_end = std::chrono::steady_clock::now();
inference_diff += inference_end - inference_start;
// ctc decode
auto postprocess_start = std::chrono::steady_clock::now(); // ctc decode
std::vector<std::string> str_res; auto postprocess_start = std::chrono::steady_clock::now();
int argmax_idx; for (int m = 0; m < predict_shape[0]; m++) {
int last_index = 0; std::vector<std::string> str_res;
float score = 0.f; int argmax_idx;
int count = 0; int last_index = 0;
float max_value = 0.0f; float score = 0.f;
int count = 0;
for (int n = 0; n < predict_shape[1]; n++) { float max_value = 0.0f;
argmax_idx =
int(Utility::argmax(&predict_batch[n * predict_shape[2]], for (int n = 0; n < predict_shape[1]; n++) {
&predict_batch[(n + 1) * predict_shape[2]])); argmax_idx =
max_value = int(Utility::argmax(&predict_batch[(m * predict_shape[1] + n) * predict_shape[2]],
float(*std::max_element(&predict_batch[n * predict_shape[2]], &predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]]));
&predict_batch[(n + 1) * predict_shape[2]])); max_value =
float(*std::max_element(&predict_batch[(m * predict_shape[1] + n) * predict_shape[2]],
if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) { &predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]]));
score += max_value;
count += 1; if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
str_res.push_back(label_list_[argmax_idx]); score += max_value;
count += 1;
str_res.push_back(label_list_[argmax_idx]);
}
last_index = argmax_idx;
}
score /= count;
if (isnan(score))
continue;
for (int i = 0; i < str_res.size(); i++) {
std::cout << str_res[i];
}
std::cout << "\tscore: " << score << std::endl;
}
auto postprocess_end = std::chrono::steady_clock::now();
postprocess_diff += postprocess_end - postprocess_start;
} }
last_index = argmax_idx; times->push_back(double(preprocess_diff.count() * 1000));
} times->push_back(double(inference_diff.count() * 1000));
auto postprocess_end = std::chrono::steady_clock::now(); times->push_back(double(postprocess_diff.count() * 1000));
score /= count;
for (int i = 0; i < str_res.size(); i++) {
std::cout << str_res[i];
}
std::cout << "\tscore: " << score << std::endl;
std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
times->push_back(double(preprocess_diff.count() * 1000));
std::chrono::duration<float> inference_diff = inference_end - inference_start;
times->push_back(double(inference_diff.count() * 1000));
std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
times->push_back(double(postprocess_diff.count() * 1000));
} }
void CRNNRecognizer::LoadModel(const std::string &model_dir) { void CRNNRecognizer::LoadModel(const std::string &model_dir) {
// AnalysisConfig config; // AnalysisConfig config;
paddle_infer::Config config; paddle_infer::Config config;
......
...@@ -40,6 +40,17 @@ void Permute::Run(const cv::Mat *im, float *data) { ...@@ -40,6 +40,17 @@ void Permute::Run(const cv::Mat *im, float *data) {
} }
} }
void PermuteBatch::Run(const std::vector<cv::Mat> imgs, float *data) {
for (int j = 0; j < imgs.size(); j ++){
int rh = imgs[j].rows;
int rw = imgs[j].cols;
int rc = imgs[j].channels();
for (int i = 0; i < rc; ++i) {
cv::extractChannel(imgs[j], cv::Mat(rh, rw, CV_32FC1, data + (j * rc + i) * rh * rw), i);
}
}
}
void Normalize::Run(cv::Mat *im, const std::vector<float> &mean, void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
const std::vector<float> &scale, const bool is_scale) { const std::vector<float> &scale, const bool is_scale) {
double e = 1.0; double e = 1.0;
...@@ -90,16 +101,17 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio, ...@@ -90,16 +101,17 @@ void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
imgC = rec_image_shape[0]; imgC = rec_image_shape[0];
imgH = rec_image_shape[1]; imgH = rec_image_shape[1];
imgW = rec_image_shape[2]; imgW = rec_image_shape[2];
imgW = int(32 * wh_ratio); imgW = int(32 * wh_ratio);
float ratio = float(img.cols) / float(img.rows); float ratio = float(img.cols) / float(img.rows);
int resize_w, resize_h; int resize_w, resize_h;
if (ceilf(imgH * ratio) > imgW) if (ceilf(imgH * ratio) > imgW)
resize_w = imgW; resize_w = imgW;
else else
resize_w = int(ceilf(imgH * ratio)); resize_w = int(ceilf(imgH * ratio));
cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f, cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
cv::INTER_LINEAR); cv::INTER_LINEAR);
cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0, cv::copyMakeBorder(resize_img, resize_img, 0, 0, 0,
......
...@@ -147,4 +147,17 @@ cv::Mat Utility::GetRotateCropImage(const cv::Mat &srcimage, ...@@ -147,4 +147,17 @@ cv::Mat Utility::GetRotateCropImage(const cv::Mat &srcimage,
} }
} }
std::vector<int> Utility::argsort(const std::vector<float>& array)
{
const int array_len(array.size());
std::vector<int> array_index(array_len, 0);
for (int i = 0; i < array_len; ++i)
array_index[i] = i;
std::sort(array_index.begin(), array_index.end(),
[&array](int pos1, int pos2) {return (array[pos1] < array[pos2]); });
return array_index;
}
} // namespace PaddleOCR } // namespace PaddleOCR
\ No newline at end of file
# paddle2onnx 模型转化与预测
本章节介绍 PaddleOCR 模型如何转化为 ONNX 模型,并基于 ONNX 引擎预测。
## 1. 环境准备
需要准备 Paddle2ONNX 模型转化环境,和 ONNX 模型预测环境
### Paddle2ONNX
Paddle2ONNX 支持将 PaddlePaddle 模型格式转化到 ONNX 模型格式,算子目前稳定支持导出 ONNX Opset 9~11,部分Paddle算子支持更低的ONNX Opset转换。
更多细节可参考 [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX/blob/develop/README_zh.md)
- 安装 Paddle2ONNX
```
python3.7 -m pip install paddle2onnx
```
- 安装 ONNX
```
# 建议安装 1.4.0 版本,可根据环境更换版本号
python3.7 -m pip install onnxruntime==1.4.0
```
## 2. 模型转换
- Paddle 模型下载
有两种方式获取Paddle静态图模型:在 [model_list](../../doc/doc_ch/models_list.md) 中下载PaddleOCR提供的预测模型;
参考[模型导出说明](../../doc/doc_ch/inference.md#训练模型转inference模型)把训练好的权重转为 inference_model。
以 ppocr 检测模型为例:
```
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
cd ./inference && tar xf ch_ppocr_mobile_v2.0_det_infer.tar && cd ..
```
- 模型转换
使用 Paddle2ONNX 将Paddle静态图模型转换为ONNX模型格式:
```
paddle2onnx --model_dir=./inference/ch_ppocr_mobile_v2.0_det_infer/ \
--model_filename=inference.pdmodel \
--params_filename=inference.pdiparams \
--save_file=./inference/det_mobile_onnx/model.onnx \
--opset_version=10 \
--enable_onnx_checker=True
```
执行完毕后,ONNX 模型会被保存在 `./inference/det_mobile_onnx/` 路径下
* 注意:以下几个模型暂不支持转换为 ONNX 模型:
NRTR、SAR、RARE、SRN
## 3. onnx 预测
以检测模型为例,使用 ONNX 预测可执行如下命令:
```
python3.7 ../../tools/infer/predict_det.py --use_gpu=False --use_onnx=True \
--det_model_dir=./inference/det_mobile_onnx/model.onnx \
--image_dir=../../doc/imgs/1.jpg
```
执行命令后在终端会打印出预测的检测框坐标,并在 `./inference_results/` 下保存可视化结果。
```
root INFO: 1.jpg [[[291, 295], [334, 292], [348, 844], [305, 847]], [[344, 296], [379, 294], [387, 669], [353, 671]]]
The predict time of ../../doc/imgs/1.jpg: 0.06162881851196289
The visualized image saved in ./inference_results/det_res_1.jpg
```
* 注意:ONNX暂时不支持变长预测,需要将输入resize到固定输入,预测结果可能与直接使用Paddle预测有细微不同。
...@@ -114,7 +114,7 @@ The recognition model is the same. ...@@ -114,7 +114,7 @@ The recognition model is the same.
git clone https://github.com/PaddlePaddle/PaddleOCR git clone https://github.com/PaddlePaddle/PaddleOCR
# Enter the working directory # Enter the working directory
cd PaddleOCR/deploy/pdserver/ cd PaddleOCR/deploy/pdserving/
``` ```
The pdserver directory contains the code to start the pipeline service and send prediction requests, including: The pdserver directory contains the code to start the pipeline service and send prediction requests, including:
......
...@@ -112,7 +112,7 @@ python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_mobile_v2.0_rec_in ...@@ -112,7 +112,7 @@ python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_mobile_v2.0_rec_in
git clone https://github.com/PaddlePaddle/PaddleOCR git clone https://github.com/PaddlePaddle/PaddleOCR
# 进入到工作目录 # 进入到工作目录
cd PaddleOCR/deploy/pdserver/ cd PaddleOCR/deploy/pdserving/
``` ```
pdserver目录包含启动pipeline服务和发送预测请求的代码,包括: pdserver目录包含启动pipeline服务和发送预测请求的代码,包括:
``` ```
...@@ -206,7 +206,7 @@ pip3 install paddle-serving-app==0.3.1 ...@@ -206,7 +206,7 @@ pip3 install paddle-serving-app==0.3.1
1. 启动服务端程序 1. 启动服务端程序
``` ```
cd win cd win
python3 ocr_web_server.py gpu(使用gpu方式) python3 ocr_web_server.py gpu(使用gpu方式)
或者 或者
python3 ocr_web_server.py cpu(使用cpu方式) python3 ocr_web_server.py cpu(使用cpu方式)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_client import Client
import sys
import numpy as np
import base64
import os
import cv2
from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from ocr_reader import OCRReader
client = Client()
# TODO:load_client need to load more than one client model.
# this need to figure out some details.
client.load_client_config(sys.argv[1:])
client.connect(["127.0.0.1:9293"])
import paddle
test_img_dir = "test_img/"
ocr_reader = OCRReader(char_dict_path="../../ppocr/utils/ppocr_keys_v1.txt")
def cv2_to_base64(image):
return base64.b64encode(image).decode(
'utf8') #data.tostring()).decode('utf8')
for img_file in os.listdir(test_img_dir):
with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data = file.read()
image = cv2_to_base64(image_data)
res_list = []
#print(image)
fetch_map = client.predict(
feed={"x": image}, fetch=["save_infer_model/scale_0.tmp_1"], batch=True)
print("fetrch map:", fetch_map)
one_batch_res = ocr_reader.postprocess(fetch_map, with_score=True)
for res in one_batch_res:
res_list.append(res[0])
res = {"res": str(res_list)}
print(res)
...@@ -18,13 +18,19 @@ import json ...@@ -18,13 +18,19 @@ import json
import base64 import base64
import os import os
import argparse
parser = argparse.ArgumentParser(description="args for paddleserving")
parser.add_argument("--image_dir", type=str, default="../../doc/imgs/")
args = parser.parse_args()
def cv2_to_base64(image): def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
url = "http://127.0.0.1:9998/ocr/prediction" url = "http://127.0.0.1:9998/ocr/prediction"
test_img_dir = "../../doc/imgs/" test_img_dir = args.image_dir
for idx, img_file in enumerate(os.listdir(test_img_dir)): for idx, img_file in enumerate(os.listdir(test_img_dir)):
with open(os.path.join(test_img_dir, img_file), 'rb') as file: with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data1 = file.read() image_data1 = file.read()
...@@ -36,5 +42,4 @@ for idx, img_file in enumerate(os.listdir(test_img_dir)): ...@@ -36,5 +42,4 @@ for idx, img_file in enumerate(os.listdir(test_img_dir)):
r = requests.post(url=url, data=json.dumps(data)) r = requests.post(url=url, data=json.dumps(data))
print(r.json()) print(r.json())
test_img_dir = "../../doc/imgs/"
print("==> total number of test imgs: ", len(os.listdir(test_img_dir))) print("==> total number of test imgs: ", len(os.listdir(test_img_dir)))
...@@ -30,7 +30,12 @@ def cv2_to_base64(image): ...@@ -30,7 +30,12 @@ def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8') return base64.b64encode(image).decode('utf8')
test_img_dir = "imgs/" import argparse
parser = argparse.ArgumentParser(description="args for paddleserving")
parser.add_argument("--image_dir", type=str, default="../../doc/imgs/")
args = parser.parse_args()
test_img_dir = args.image_dir
for img_file in os.listdir(test_img_dir): for img_file in os.listdir(test_img_dir):
with open(os.path.join(test_img_dir, img_file), 'rb') as file: with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data = file.read() image_data = file.read()
......
...@@ -101,15 +101,28 @@ python3 tools/train.py -c configs/det/det_mv3_db.yml \ ...@@ -101,15 +101,28 @@ python3 tools/train.py -c configs/det/det_mv3_db.yml \
# 单机多卡训练,通过 --gpus 参数设置使用的GPU ID # 单机多卡训练,通过 --gpus 参数设置使用的GPU ID
python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml \ python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml \
-o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
# 多机多卡训练,通过 --ips 参数设置使用的机器IP地址,通过 --gpus 参数设置使用的GPU ID
python3 -m paddle.distributed.launch --ips="xx.xx.xx.xx,xx.xx.xx.xx" --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml \
-o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
``` ```
上述指令中,通过-c 选择训练使用configs/det/det_db_mv3.yml配置文件。 上述指令中,通过-c 选择训练使用configs/det/det_db_mv3.yml配置文件。
有关配置文件的详细解释,请参考[链接](./config.md) 有关配置文件的详细解释,请参考[链接](./config.md)
您也可以通过-o参数在不需要修改yml文件的情况下,改变训练的参数,比如,调整训练的学习率为0.0001 您也可以通过-o参数在不需要修改yml文件的情况下,改变训练的参数,比如,调整训练的学习率为0.0001
```shell ```shell
python3 tools/train.py -c configs/det/det_mv3_db.yml -o Optimizer.base_lr=0.0001 python3 tools/train.py -c configs/det/det_mv3_db.yml -o Optimizer.base_lr=0.0001
``` ```
**注意:** 采用多机多卡训练时,需要替换上面命令中的ips值为您机器的地址,机器之间需要能够相互ping通。查看机器ip地址的命令为`ifconfig`
如果您想进一步加快训练速度,可以使用[自动混合精度训练](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/01_paddle2.0_introduction/basic_concept/amp_cn.html), 以单机单卡为例,命令如下:
```shell
python3 tools/train.py -c configs/det/det_mv3_db.yml \
-o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained \
Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True
```
<a name="22-----"></a> <a name="22-----"></a>
## 2.2 断点训练 ## 2.2 断点训练
......
...@@ -98,7 +98,19 @@ python3 tools/train.py -c configs/det/det_mv3_db.yml -o \ ...@@ -98,7 +98,19 @@ python3 tools/train.py -c configs/det/det_mv3_db.yml -o \
# multi-GPU training # multi-GPU training
# Set the GPU ID used by the '--gpus' parameter. # Set the GPU ID used by the '--gpus' parameter.
python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
# multi-Node, multi-GPU training
# Set the IPs of your nodes used by the '--ips' parameter. Set the GPU ID used by the '--gpus' parameter.
python3 -m paddle.distributed.launch --ips="xx.xx.xx.xx,xx.xx.xx.xx" --gpus '0,1,2,3' tools/train.py -c configs/det/det_mv3_db.yml \
-o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
```
**Note:** For multi-Node multi-GPU training, you need to replace the `ips` value in the preceding command with the address of your machine, and the machines must be able to ping each other. The command for viewing the IP address of the machine is `ifconfig`.
If you want to further speed up the training, you can use [automatic mixed precision training](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/01_paddle2.0_introduction/basic_concept/amp_en.html). for single card training, the command is as follows:
```
python3 tools/train.py -c configs/det/det_mv3_db.yml \
-o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained \
Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True
``` ```
### 2.2 Load Trained Model and Continue Training ### 2.2 Load Trained Model and Continue Training
......
...@@ -11,7 +11,10 @@ ...@@ -11,7 +11,10 @@
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and #See the License for the specific language governing permissions and
#limitations under the License. #limitations under the License.
"""
This code is refered from:
https://github.com/songdejia/EAST/blob/master/data_utils.py
"""
import math import math
import cv2 import cv2
import numpy as np import numpy as np
...@@ -24,10 +27,10 @@ __all__ = ['EASTProcessTrain'] ...@@ -24,10 +27,10 @@ __all__ = ['EASTProcessTrain']
class EASTProcessTrain(object): class EASTProcessTrain(object):
def __init__(self, def __init__(self,
image_shape = [512, 512], image_shape=[512, 512],
background_ratio = 0.125, background_ratio=0.125,
min_crop_side_ratio = 0.1, min_crop_side_ratio=0.1,
min_text_size = 10, min_text_size=10,
**kwargs): **kwargs):
self.input_size = image_shape[1] self.input_size = image_shape[1]
self.random_scale = np.array([0.5, 1, 2.0, 3.0]) self.random_scale = np.array([0.5, 1, 2.0, 3.0])
...@@ -282,12 +285,7 @@ class EASTProcessTrain(object): ...@@ -282,12 +285,7 @@ class EASTProcessTrain(object):
1.0 / max(min(poly_h, poly_w), 1.0) 1.0 / max(min(poly_h, poly_w), 1.0)
return score_map, geo_map, training_mask return score_map, geo_map, training_mask
def crop_area(self, def crop_area(self, im, polys, tags, crop_background=False, max_tries=50):
im,
polys,
tags,
crop_background=False,
max_tries=50):
""" """
make random crop from the input image make random crop from the input image
:param im: :param im:
...@@ -435,5 +433,4 @@ class EASTProcessTrain(object): ...@@ -435,5 +433,4 @@ class EASTProcessTrain(object):
data['score_map'] = score_map data['score_map'] = score_map
data['geo_map'] = geo_map data['geo_map'] = geo_map
data['training_mask'] = training_mask data['training_mask'] = training_mask
# print(im.shape, score_map.shape, geo_map.shape, training_mask.shape) return data
return data
\ No newline at end of file
...@@ -11,6 +11,10 @@ ...@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/iaa_augment.py
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
......
# -*- coding:utf-8 -*- # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_border_map.py
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
......
# -*- coding:utf-8 -*- # copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -12,12 +24,8 @@ from shapely.geometry import Polygon ...@@ -12,12 +24,8 @@ from shapely.geometry import Polygon
__all__ = ['MakePseGt'] __all__ = ['MakePseGt']
class MakePseGt(object):
r'''
Making binary mask from detection data with ICDAR format.
Typically following the process of class `MakeICDARData`.
'''
class MakePseGt(object):
def __init__(self, kernel_num=7, size=640, min_shrink_ratio=0.4, **kwargs): def __init__(self, kernel_num=7, size=640, min_shrink_ratio=0.4, **kwargs):
self.kernel_num = kernel_num self.kernel_num = kernel_num
self.min_shrink_ratio = min_shrink_ratio self.min_shrink_ratio = min_shrink_ratio
...@@ -38,16 +46,20 @@ class MakePseGt(object): ...@@ -38,16 +46,20 @@ class MakePseGt(object):
text_polys *= scale text_polys *= scale
gt_kernels = [] gt_kernels = []
for i in range(1,self.kernel_num+1): for i in range(1, self.kernel_num + 1):
# s1->sn, from big to small # s1->sn, from big to small
rate = 1.0 - (1.0 - self.min_shrink_ratio) / (self.kernel_num - 1) * i rate = 1.0 - (1.0 - self.min_shrink_ratio) / (self.kernel_num - 1
text_kernel, ignore_tags = self.generate_kernel(image.shape[0:2], rate, text_polys, ignore_tags) ) * i
text_kernel, ignore_tags = self.generate_kernel(
image.shape[0:2], rate, text_polys, ignore_tags)
gt_kernels.append(text_kernel) gt_kernels.append(text_kernel)
training_mask = np.ones(image.shape[0:2], dtype='uint8') training_mask = np.ones(image.shape[0:2], dtype='uint8')
for i in range(text_polys.shape[0]): for i in range(text_polys.shape[0]):
if ignore_tags[i]: if ignore_tags[i]:
cv2.fillPoly(training_mask, text_polys[i].astype(np.int32)[np.newaxis, :, :], 0) cv2.fillPoly(training_mask,
text_polys[i].astype(np.int32)[np.newaxis, :, :],
0)
gt_kernels = np.array(gt_kernels) gt_kernels = np.array(gt_kernels)
gt_kernels[gt_kernels > 0] = 1 gt_kernels[gt_kernels > 0] = 1
...@@ -59,16 +71,25 @@ class MakePseGt(object): ...@@ -59,16 +71,25 @@ class MakePseGt(object):
data['mask'] = training_mask.astype('float32') data['mask'] = training_mask.astype('float32')
return data return data
def generate_kernel(self, img_size, shrink_ratio, text_polys, ignore_tags=None): def generate_kernel(self,
img_size,
shrink_ratio,
text_polys,
ignore_tags=None):
"""
Refer to part of the code:
https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/textdet_targets/base_textdet_targets.py
"""
h, w = img_size h, w = img_size
text_kernel = np.zeros((h, w), dtype=np.float32) text_kernel = np.zeros((h, w), dtype=np.float32)
for i, poly in enumerate(text_polys): for i, poly in enumerate(text_polys):
polygon = Polygon(poly) polygon = Polygon(poly)
distance = polygon.area * (1 - shrink_ratio * shrink_ratio) / (polygon.length + 1e-6) distance = polygon.area * (1 - shrink_ratio * shrink_ratio) / (
polygon.length + 1e-6)
subject = [tuple(l) for l in poly] subject = [tuple(l) for l in poly]
pco = pyclipper.PyclipperOffset() pco = pyclipper.PyclipperOffset()
pco.AddPath(subject, pyclipper.JT_ROUND, pco.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
pyclipper.ET_CLOSEDPOLYGON)
shrinked = np.array(pco.Execute(-distance)) shrinked = np.array(pco.Execute(-distance))
if len(shrinked) == 0 or shrinked.size == 0: if len(shrinked) == 0 or shrinked.size == 0:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment