ocr_db_crnn.cc 11.3 KB
Newer Older
LDOUVLEV's avatar
LDOUVLEV committed
1
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
LDOUBLEV's avatar
LDOUBLEV committed
2
3
4
5
6
7
8
9
10
11
12
13
14
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

15
#include "paddle_api.h" // NOLINT
LDOUBLEV's avatar
LDOUBLEV committed
16
17
#include <chrono>

LDOUVLEV's avatar
LDOUVLEV committed
18
19
#include "crnn_process.h"
#include "db_post_process.h"
LDOUBLEV's avatar
LDOUBLEV committed
20

21
using namespace paddle::lite_api; // NOLINT
LDOUVLEV's avatar
LDOUVLEV committed
22
using namespace std;
LDOUBLEV's avatar
LDOUBLEV committed
23
24

// fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up
25
void neon_mean_scale(const float *din, float *dout, int size,
LDOUBLEV's avatar
LDOUBLEV committed
26
27
28
29
30
31
32
33
34
35
36
37
38
                     const std::vector<float> mean,
                     const std::vector<float> scale) {
  if (mean.size() != 3 || scale.size() != 3) {
    std::cerr << "[ERROR] mean or scale size must equal to 3\n";
    exit(1);
  }
  float32x4_t vmean0 = vdupq_n_f32(mean[0]);
  float32x4_t vmean1 = vdupq_n_f32(mean[1]);
  float32x4_t vmean2 = vdupq_n_f32(mean[2]);
  float32x4_t vscale0 = vdupq_n_f32(scale[0]);
  float32x4_t vscale1 = vdupq_n_f32(scale[1]);
  float32x4_t vscale2 = vdupq_n_f32(scale[2]);

39
40
41
  float *dout_c0 = dout;
  float *dout_c1 = dout + size;
  float *dout_c2 = dout + size * 2;
LDOUBLEV's avatar
LDOUBLEV committed
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

  int i = 0;
  for (; i < size - 3; i += 4) {
    float32x4x3_t vin3 = vld3q_f32(din);
    float32x4_t vsub0 = vsubq_f32(vin3.val[0], vmean0);
    float32x4_t vsub1 = vsubq_f32(vin3.val[1], vmean1);
    float32x4_t vsub2 = vsubq_f32(vin3.val[2], vmean2);
    float32x4_t vs0 = vmulq_f32(vsub0, vscale0);
    float32x4_t vs1 = vmulq_f32(vsub1, vscale1);
    float32x4_t vs2 = vmulq_f32(vsub2, vscale2);
    vst1q_f32(dout_c0, vs0);
    vst1q_f32(dout_c1, vs1);
    vst1q_f32(dout_c2, vs2);

    din += 12;
    dout_c0 += 4;
    dout_c1 += 4;
    dout_c2 += 4;
  }
  for (; i < size; i++) {
    *(dout_c0++) = (*(din++) - mean[0]) * scale[0];
    *(dout_c1++) = (*(din++) - mean[1]) * scale[1];
    *(dout_c2++) = (*(din++) - mean[2]) * scale[2];
  }
}

// resize image to a size multiple of 32 which is required by the network
69
70
cv::Mat DetResizeImg(const cv::Mat img, int max_size_len,
                     std::vector<float> &ratio_hw) {
LDOUBLEV's avatar
LDOUBLEV committed
71
72
73
74
  int w = img.cols;
  int h = img.rows;

  float ratio = 1.f;
LDOUVLEV's avatar
LDOUVLEV committed
75
76
77
  int max_wh = w >= h ? w : h;
  if (max_wh > max_size_len) {
    if (h > w) {
LDOUBLEV's avatar
LDOUBLEV committed
78
79
80
81
82
83
84
85
86
87
      ratio = float(max_size_len) / float(h);
    } else {
      ratio = float(max_size_len) / float(w);
    }
  }

  int resize_h = int(float(h) * ratio);
  int resize_w = int(float(w) * ratio);
  if (resize_h % 32 == 0)
    resize_h = resize_h;
LDOUVLEV's avatar
LDOUVLEV committed
88
  else if (resize_h / 32 < 1 + 1e-5)
LDOUBLEV's avatar
LDOUBLEV committed
89
90
91
92
93
94
    resize_h = 32;
  else
    resize_h = (resize_h / 32 - 1) * 32;

  if (resize_w % 32 == 0)
    resize_w = resize_w;
LDOUVLEV's avatar
LDOUVLEV committed
95
  else if (resize_w / 32 < 1 + 1e-5)
LDOUBLEV's avatar
LDOUBLEV committed
96
97
    resize_w = 32;
  else
LDOUVLEV's avatar
LDOUVLEV committed
98
    resize_w = (resize_w / 32 - 1) * 32;
LDOUBLEV's avatar
LDOUBLEV committed
99
100
101
102

  cv::Mat resize_img;
  cv::resize(img, resize_img, cv::Size(resize_w, resize_h));

LDOUVLEV's avatar
LDOUVLEV committed
103
104
  ratio_hw.push_back(float(resize_h) / float(h));
  ratio_hw.push_back(float(resize_w) / float(w));
LDOUBLEV's avatar
LDOUBLEV committed
105
106
107
  return resize_img;
}

108
void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
LDOUVLEV's avatar
LDOUVLEV committed
109
                 std::shared_ptr<PaddlePredictor> predictor_crnn,
110
111
                 std::string dict_path, std::vector<std::string> &rec_text,
                 std::vector<float> &rec_text_score) {
LDOUBLEV's avatar
LDOUBLEV committed
112
113
114
115
116
117
118
119
  std::vector<float> mean = {0.5f, 0.5f, 0.5f};
  std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};

  cv::Mat srcimg;
  img.copyTo(srcimg);
  cv::Mat crop_img;
  cv::Mat resize_img;

LDOUVLEV's avatar
LDOUVLEV committed
120
  auto charactor_dict = ReadDict(dict_path);
LDOUBLEV's avatar
LDOUBLEV committed
121
122

  int index = 0;
LDOUVLEV's avatar
LDOUVLEV committed
123
124
  for (int i = boxes.size() - 1; i >= 0; i--) {
    crop_img = GetRotateCropImage(srcimg, boxes[i]);
LDOUBLEV's avatar
LDOUBLEV committed
125
126
    float wh_ratio = float(crop_img.cols) / float(crop_img.rows);

LDOUVLEV's avatar
LDOUVLEV committed
127
    resize_img = CrnnResizeImg(crop_img, wh_ratio);
LDOUBLEV's avatar
LDOUBLEV committed
128
129
    resize_img.convertTo(resize_img, CV_32FC3, 1 / 255.f);

130
    const float *dimg = reinterpret_cast<const float *>(resize_img.data);
LDOUBLEV's avatar
LDOUBLEV committed
131

LDOUVLEV's avatar
LDOUVLEV committed
132
133
    std::unique_ptr<Tensor> input_tensor0(
        std::move(predictor_crnn->GetInput(0)));
LDOUBLEV's avatar
LDOUBLEV committed
134
    input_tensor0->Resize({1, 3, resize_img.rows, resize_img.cols});
135
    auto *data0 = input_tensor0->mutable_data<float>();
LDOUBLEV's avatar
LDOUBLEV committed
136

137
138
    neon_mean_scale(dimg, data0, resize_img.rows * resize_img.cols, mean,
                    scale);
LDOUBLEV's avatar
LDOUBLEV committed
139
140
141
142
143
    //// Run CRNN predictor
    predictor_crnn->Run();

    // Get output and run postprocess
    std::unique_ptr<const Tensor> output_tensor0(
LDOUVLEV's avatar
LDOUVLEV committed
144
        std::move(predictor_crnn->GetOutput(0)));
LDOUBLEV's avatar
LDOUBLEV committed
145
    auto *rec_idx = output_tensor0->data<int64>();
LDOUBLEV's avatar
LDOUBLEV committed
146
147
148
149
150

    auto rec_idx_lod = output_tensor0->lod();
    auto shape_out = output_tensor0->shape();

    std::vector<int> pred_idx;
LDOUBLEV's avatar
LDOUBLEV committed
151
    for (int n = int(rec_idx_lod[0][0]); n < int(rec_idx_lod[0][1]); n += 1) {
LDOUBLEV's avatar
LDOUBLEV committed
152
153
154
      pred_idx.push_back(int(rec_idx[n]));
    }

155
156
    if (pred_idx.size() < 1e-3)
      continue;
LDOUBLEV's avatar
LDOUBLEV committed
157
158

    index += 1;
LDOUVLEV's avatar
LDOUVLEV committed
159
    std::string pred_txt = "";
LDOUBLEV's avatar
LDOUBLEV committed
160
    for (int n = 0; n < pred_idx.size(); n++) {
LDOUVLEV's avatar
LDOUVLEV committed
161
      pred_txt += charactor_dict[pred_idx[n]];
LDOUBLEV's avatar
LDOUBLEV committed
162
    }
LDOUVLEV's avatar
LDOUVLEV committed
163
    rec_text.push_back(pred_txt);
LDOUBLEV's avatar
LDOUBLEV committed
164
165

    ////get score
LDOUVLEV's avatar
LDOUVLEV committed
166
167
    std::unique_ptr<const Tensor> output_tensor1(
        std::move(predictor_crnn->GetOutput(1)));
168
    auto *predict_batch = output_tensor1->data<float>();
LDOUBLEV's avatar
LDOUBLEV committed
169
170
171
172
173
174
175
176
177
178
179
    auto predict_shape = output_tensor1->shape();

    auto predict_lod = output_tensor1->lod();

    int argmax_idx;
    int blank = predict_shape[1];
    float score = 0.f;
    int count = 0;
    float max_value = 0.0f;

    for (int n = predict_lod[0][0]; n < predict_lod[0][1] - 1; n++) {
LDOUVLEV's avatar
LDOUVLEV committed
180
181
182
183
184
      argmax_idx = int(Argmax(&predict_batch[n * predict_shape[1]],
                              &predict_batch[(n + 1) * predict_shape[1]]));
      max_value =
          float(*std::max_element(&predict_batch[n * predict_shape[1]],
                                  &predict_batch[(n + 1) * predict_shape[1]]));
LDOUBLEV's avatar
LDOUBLEV committed
185
186
187
188
189
190
191

      if (blank - 1 - argmax_idx > 1e-5) {
        score += max_value;
        count += 1;
      }
    }
    score /= count;
LDOUVLEV's avatar
LDOUVLEV committed
192
    rec_text_score.push_back(score);
LDOUBLEV's avatar
LDOUBLEV committed
193
194
195
  }
}

196
197
198
std::vector<std::vector<std::vector<int>>>
RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
            std::map<std::string, double> Config) {
LDOUBLEV's avatar
LDOUBLEV committed
199
  // Read img
LDOUVLEV's avatar
LDOUVLEV committed
200
  int max_side_len = int(Config["max_side_len"]);
LDOUBLEV's avatar
LDOUBLEV committed
201
202
203
204

  cv::Mat srcimg;
  img.copyTo(srcimg);

LDOUVLEV's avatar
LDOUVLEV committed
205
206
  std::vector<float> ratio_hw;
  img = DetResizeImg(img, max_side_len, ratio_hw);
LDOUBLEV's avatar
LDOUBLEV committed
207
208
209
210
211
212
  cv::Mat img_fp;
  img.convertTo(img_fp, CV_32FC3, 1.0 / 255.f);

  // Prepare input data from image
  std::unique_ptr<Tensor> input_tensor0(std::move(predictor->GetInput(0)));
  input_tensor0->Resize({1, 3, img_fp.rows, img_fp.cols});
213
  auto *data0 = input_tensor0->mutable_data<float>();
LDOUBLEV's avatar
LDOUBLEV committed
214
215

  std::vector<float> mean = {0.485f, 0.456f, 0.406f};
LDOUVLEV's avatar
LDOUVLEV committed
216
  std::vector<float> scale = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
217
  const float *dimg = reinterpret_cast<const float *>(img_fp.data);
LDOUBLEV's avatar
LDOUBLEV committed
218
219
220
221
222
223
  neon_mean_scale(dimg, data0, img_fp.rows * img_fp.cols, mean, scale);

  // Run predictor
  predictor->Run();

  // Get output and post process
LDOUVLEV's avatar
LDOUVLEV committed
224
225
  std::unique_ptr<const Tensor> output_tensor(
      std::move(predictor->GetOutput(0)));
226
  auto *outptr = output_tensor->data<float>();
LDOUBLEV's avatar
LDOUBLEV committed
227
228
229
230
231
232
  auto shape_out = output_tensor->shape();

  // Save output
  float pred[shape_out[2]][shape_out[3]];
  unsigned char cbuf[shape_out[2]][shape_out[3]];

LDOUVLEV's avatar
LDOUVLEV committed
233
234
235
236
  for (int i = 0; i < int(shape_out[2] * shape_out[3]); i++) {
    pred[int(i / int(shape_out[3]))][int(i % shape_out[3])] = float(outptr[i]);
    cbuf[int(i / int(shape_out[3]))][int(i % shape_out[3])] =
        (unsigned char)((outptr[i]) * 255);
LDOUBLEV's avatar
LDOUBLEV committed
237
238
  }

239
240
  cv::Mat cbuf_map(shape_out[2], shape_out[3], CV_8UC1, (unsigned char *)cbuf);
  cv::Mat pred_map(shape_out[2], shape_out[3], CV_32F, (float *)pred);
LDOUBLEV's avatar
LDOUBLEV committed
241

LDOUVLEV's avatar
LDOUVLEV committed
242
  const double threshold = double(Config["det_db_thresh"]) * 255;
LDOUBLEV's avatar
LDOUBLEV committed
243
244
245
246
  const double maxvalue = 255;
  cv::Mat bit_map;
  cv::threshold(cbuf_map, bit_map, threshold, maxvalue, cv::THRESH_BINARY);

LDOUVLEV's avatar
LDOUVLEV committed
247
  auto boxes = BoxesFromBitmap(pred_map, bit_map, Config);
LDOUBLEV's avatar
LDOUBLEV committed
248

LDOUVLEV's avatar
LDOUVLEV committed
249
250
  std::vector<std::vector<std::vector<int>>> filter_boxes =
      FilterTagDetRes(boxes, ratio_hw[0], ratio_hw[1], srcimg);
LDOUBLEV's avatar
LDOUBLEV committed
251

LDOUVLEV's avatar
LDOUVLEV committed
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
  return filter_boxes;
}

std::shared_ptr<PaddlePredictor> loadModel(std::string model_file) {
  MobileConfig config;
  config.set_model_from_file(model_file);

  std::shared_ptr<PaddlePredictor> predictor =
      CreatePaddlePredictor<MobileConfig>(config);
  return predictor;
}

cv::Mat Visualization(cv::Mat srcimg,
                      std::vector<std::vector<std::vector<int>>> boxes) {
  cv::Point rook_points[boxes.size()][4];
  for (int n = 0; n < boxes.size(); n++) {
    for (int m = 0; m < boxes[0].size(); m++) {
      rook_points[n][m] = cv::Point(int(boxes[n][m][0]), int(boxes[n][m][1]));
LDOUBLEV's avatar
LDOUBLEV committed
270
271
272
273
    }
  }
  cv::Mat img_vis;
  srcimg.copyTo(img_vis);
LDOUVLEV's avatar
LDOUVLEV committed
274
  for (int n = 0; n < boxes.size(); n++) {
275
    const cv::Point *ppt[1] = {rook_points[n]};
LDOUVLEV's avatar
LDOUVLEV committed
276
277
    int npt[] = {4};
    cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
LDOUBLEV's avatar
LDOUBLEV committed
278
279
  }

280
281
  cv::imwrite("./vis.jpg", img_vis);
  std::cout << "The detection visualized image saved in ./vis.jpg" << std::endl;
LDOUVLEV's avatar
LDOUVLEV committed
282
283
284
  return img_vis;
}

285
286
std::vector<std::string> split(const std::string &str,
                               const std::string &delim) {
LDOUVLEV's avatar
LDOUVLEV committed
287
  std::vector<std::string> res;
288
289
290
  if ("" == str)
    return res;
  char *strs = new char[str.length() + 1];
LDOUVLEV's avatar
LDOUVLEV committed
291
292
  std::strcpy(strs, str.c_str());

293
  char *d = new char[delim.length() + 1];
LDOUVLEV's avatar
LDOUVLEV committed
294
295
  std::strcpy(d, delim.c_str());

296
  char *p = std::strtok(strs, d);
LDOUVLEV's avatar
LDOUVLEV committed
297
298
299
300
301
  while (p) {
    string s = p;
    res.push_back(s);
    p = std::strtok(NULL, d);
  }
LDOUBLEV's avatar
LDOUBLEV committed
302

LDOUVLEV's avatar
LDOUVLEV committed
303
  return res;
LDOUBLEV's avatar
LDOUBLEV committed
304
305
}

LDOUVLEV's avatar
LDOUVLEV committed
306
307
308
309
310
311
312
313
314
315
std::map<std::string, double> LoadConfigTxt(std::string config_path) {
  auto config = ReadDict(config_path);

  std::map<std::string, double> dict;
  for (int i = 0; i < config.size(); i++) {
    std::vector<std::string> res = split(config[i], " ");
    dict[res[0]] = stod(res[1]);
  }
  return dict;
}
LDOUBLEV's avatar
LDOUBLEV committed
316

317
int main(int argc, char **argv) {
LDOUVLEV's avatar
LDOUVLEV committed
318
319
320
  if (argc < 5) {
    std::cerr << "[ERROR] usage: " << argv[0]
              << " det_model_file rec_model_file image_path\n";
LDOUBLEV's avatar
LDOUBLEV committed
321
322
323
324
325
    exit(1);
  }
  std::string det_model_file = argv[1];
  std::string rec_model_file = argv[2];
  std::string img_path = argv[3];
LDOUVLEV's avatar
LDOUVLEV committed
326
327
328
329
  std::string dict_path = argv[4];

  //// load config from txt file
  auto Config = LoadConfigTxt("./config.txt");
LDOUBLEV's avatar
LDOUBLEV committed
330
331
332

  auto start = std::chrono::system_clock::now();

LDOUVLEV's avatar
LDOUVLEV committed
333
334
335
  auto det_predictor = loadModel(det_model_file);
  auto rec_predictor = loadModel(rec_model_file);

LDOUBLEV's avatar
LDOUBLEV committed
336
  cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
LDOUVLEV's avatar
LDOUVLEV committed
337
  auto boxes = RunDetModel(det_predictor, srcimg, Config);
LDOUBLEV's avatar
LDOUBLEV committed
338

LDOUVLEV's avatar
LDOUVLEV committed
339
340
  std::vector<std::string> rec_text;
  std::vector<float> rec_text_score;
341
342
  RunRecModel(boxes, srcimg, rec_predictor, dict_path, rec_text,
              rec_text_score);
LDOUBLEV's avatar
LDOUBLEV committed
343

LDOUVLEV's avatar
LDOUVLEV committed
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
  auto end = std::chrono::system_clock::now();
  auto duration =
      std::chrono::duration_cast<std::chrono::microseconds>(end - start);

  //// visualization
  auto img_vis = Visualization(srcimg, boxes);

  //// print recognized text
  for (int i = 0; i < rec_text.size(); i++) {
    std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i]
              << std::endl;
  }

  std::cout << "花费了"
            << double(duration.count()) *
                   std::chrono::microseconds::period::num /
                   std::chrono::microseconds::period::den
LDOUBLEV's avatar
LDOUBLEV committed
361
362
363
364
            << "秒" << std::endl;

  return 0;
}