ocr_det.cpp 4.48 KB
Newer Older
littletomatodonkey's avatar
littletomatodonkey committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h"
#include "paddle_inference_api.h"
#include <chrono>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <vector>

#include <cstring>
#include <fstream>
#include <numeric>

#include <include/ocr_det.h>

namespace PaddleOCR {

void DBDetector::LoadModel(const std::string &model_dir, bool use_gpu,
                           const int gpu_id, const int min_subgraph_size,
                           const int batch_size) {
  AnalysisConfig config;
  config.SetModel(model_dir + "/model", model_dir + "/params");

  // for cpu
  config.DisableGpu();
  config.EnableMKLDNN(); // 开启MKLDNN加速
  config.SetCpuMathLibraryNumThreads(10);

  // 使用ZeroCopyTensor,此处必须设置为false
  config.SwitchUseFeedFetchOps(false);
  // 若输入为多个,此处必须设置为true
  config.SwitchSpecifyInputNames(true);
  // config.SwitchIrDebug(true); //
  // 可视化调试选项,若开启,则会在每个图优化过程后生成dot文件
  // config.SwitchIrOptim(false);// 默认为true。如果设置为false,关闭所有优化
  config.EnableMemoryOptim(); // 开启内存/显存复用

  this->predictor_ = CreatePaddlePredictor(config);
  //   predictor_ = std::move(CreatePaddlePredictor(config)); // PaddleDetection
  //   usage
}

void DBDetector::Run(cv::Mat &img,
                     std::vector<std::vector<std::vector<int>>> &boxes) {
  float ratio_h{};
  float ratio_w{};

  cv::Mat srcimg;
  cv::Mat resize_img;
  img.copyTo(srcimg);
  this->resize_op_.Run(img, resize_img, this->max_side_len_, ratio_h, ratio_w);

  this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
                          this->is_scale_);

  float *input = new float[1 * 3 * resize_img.rows * resize_img.cols];
  this->permute_op_.Run(&resize_img, input);

  auto input_names = this->predictor_->GetInputNames();
  auto input_t = this->predictor_->GetInputTensor(input_names[0]);
  input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
  input_t->copy_from_cpu(input);

  this->predictor_->ZeroCopyRun();

  std::vector<float> out_data;
  auto output_names = this->predictor_->GetOutputNames();
  auto output_t = this->predictor_->GetOutputTensor(output_names[0]);
  std::vector<int> output_shape = output_t->shape();
  int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
                                std::multiplies<int>());

  out_data.resize(out_num);
  output_t->copy_to_cpu(out_data.data());

  int n2 = output_shape[2];
  int n3 = output_shape[3];
  int n = n2 * n3;

  float *pred = new float[n];
  unsigned char *cbuf = new unsigned char[n];

  for (int i = 0; i < n; i++) {
    pred[i] = float(out_data[i]);
    cbuf[i] = (unsigned char)((out_data[i]) * 255);
  }

  cv::Mat cbuf_map(n2, n3, CV_8UC1, (unsigned char *)cbuf);
  cv::Mat pred_map(n2, n3, CV_32F, (float *)pred);

  const double threshold = 0.3 * 255;
  const double maxvalue = 255;
  cv::Mat bit_map;
  cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);

  boxes = post_processor_.boxes_from_bitmap(pred_map, bit_map);

  boxes = post_processor_.filter_tag_det_res(boxes, ratio_h, ratio_w, srcimg);

  //// visualization
  cv::Point rook_points[boxes.size()][4];
  for (int n = 0; n < boxes.size(); n++) {
    for (int m = 0; m < boxes[0].size(); m++) {
      rook_points[n][m] = cv::Point(int(boxes[n][m][0]), int(boxes[n][m][1]));
    }
  }

  cv::Mat img_vis;
  srcimg.copyTo(img_vis);
  for (int n = 0; n < boxes.size(); n++) {
    const cv::Point *ppt[1] = {rook_points[n]};
    int npt[] = {4};
    cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
  }

  imwrite("./det_res.png", img_vis);

  std::cout << "The detection visualized image saved in ./det_res.png"
            << std::endl;

  delete[] input;
  delete[] pred;
  delete[] cbuf;
}

} // namespace PaddleOCR