Merge pull request #1 from PaddlePaddle/develop

update-2020-7-17

Merge pull request #1 from PaddlePaddle/develop
update-2020-7-17
26219d5f · shaohua.zhang · GitHub · 0e8a3417 · 311c5997 · 26219d5f
Unverified Commit 26219d5f authored Jul 17, 2020 by shaohua.zhang Committed by GitHub Jul 17, 2020
20 changed files
--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <include/ocr_rec.h>
+
+namespace PaddleOCR {
+
+void CRNNRecognizer::Run(std::vector<std::vector<std::vector<int>>> boxes,
+                         cv::Mat &img) {
+  cv::Mat srcimg;
+  img.copyTo(srcimg);
+  cv::Mat crop_img;
+  cv::Mat resize_img;
+
+  std::cout << "The predicted text is :" << std::endl;
+  int index = 0;
+  for (int i = boxes.size() - 1; i >= 0; i--) {
+    crop_img = GetRotateCropImage(srcimg, boxes[i]);
+
+    float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
+
+    this->resize_op_.Run(crop_img, resize_img, wh_ratio);
+
+    this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
+                            this->is_scale_);
+
+    std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
+
+    this->permute_op_.Run(&resize_img, input.data());
+
+    auto input_names = this->predictor_->GetInputNames();
+    auto input_t = this->predictor_->GetInputTensor(input_names[0]);
+    input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
+    input_t->copy_from_cpu(input.data());
+
+    this->predictor_->ZeroCopyRun();
+
+    std::vector<int64_t> rec_idx;
+    auto output_names = this->predictor_->GetOutputNames();
+    auto output_t = this->predictor_->GetOutputTensor(output_names[0]);
+    auto rec_idx_lod = output_t->lod();
+    auto shape_out = output_t->shape();
+    int out_num = std::accumulate(shape_out.begin(), shape_out.end(), 1,
+                                  std::multiplies<int>());
+
+    rec_idx.resize(out_num);
+    output_t->copy_to_cpu(rec_idx.data());
+
+    std::vector<int> pred_idx;
+    for (int n = int(rec_idx_lod[0][0]); n < int(rec_idx_lod[0][1]); n++) {
+      pred_idx.push_back(int(rec_idx[n]));
+    }
+
+    if (pred_idx.size() < 1e-3)
+      continue;
+
+    index += 1;
+    std::cout << index << "\t";
+    for (int n = 0; n < pred_idx.size(); n++) {
+      std::cout << label_list_[pred_idx[n]];
+    }
+
+    std::vector<float> predict_batch;
+    auto output_t_1 = this->predictor_->GetOutputTensor(output_names[1]);
+
+    auto predict_lod = output_t_1->lod();
+    auto predict_shape = output_t_1->shape();
+    int out_num_1 = std::accumulate(predict_shape.begin(), predict_shape.end(),
+                                    1, std::multiplies<int>());
+
+    predict_batch.resize(out_num_1);
+    output_t_1->copy_to_cpu(predict_batch.data());
+
+    int argmax_idx;
+    int blank = predict_shape[1];
+    float score = 0.f;
+    int count = 0;
+    float max_value = 0.0f;
+
+    for (int n = predict_lod[0][0]; n < predict_lod[0][1] - 1; n++) {
+      argmax_idx =
+          int(Utility::argmax(&predict_batch[n * predict_shape[1]],
+                              &predict_batch[(n + 1) * predict_shape[1]]));
+      max_value =
+          float(*std::max_element(&predict_batch[n * predict_shape[1]],
+                                  &predict_batch[(n + 1) * predict_shape[1]]));
+      if (blank - 1 - argmax_idx > 1e-5) {
+        score += max_value;
+        count += 1;
+      }
+    }
+    score /= count;
+    std::cout << "\tscore: " << score << std::endl;
+  }
+}
+
+void CRNNRecognizer::LoadModel(const std::string &model_dir) {
+  AnalysisConfig config;
+  config.SetModel(model_dir + "/model", model_dir + "/params");
+
+  if (this->use_gpu_) {
+    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
+  } else {
+    config.DisableGpu();
+    if (this->use_mkldnn_) {
+      config.EnableMKLDNN();
+    }
+    config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
+  }
+
+  // false for zero copy tensor
+  config.SwitchUseFeedFetchOps(false);
+  // true for multiple input
+  config.SwitchSpecifyInputNames(true);
+
+  config.SwitchIrOptim(true);
+
+  config.EnableMemoryOptim();
+  config.DisableGlogInfo();
+
+  this->predictor_ = CreatePaddlePredictor(config);
+}
+
+cv::Mat CRNNRecognizer::GetRotateCropImage(const cv::Mat &srcimage,
+                                           std::vector<std::vector<int>> box) {
+  cv::Mat image;
+  srcimage.copyTo(image);
+  std::vector<std::vector<int>> points = box;
+
+  int x_collect[4] = {box[0][0], box[1][0], box[2][0], box[3][0]};
+  int y_collect[4] = {box[0][1], box[1][1], box[2][1], box[3][1]};
+  int left = int(*std::min_element(x_collect, x_collect + 4));
+  int right = int(*std::max_element(x_collect, x_collect + 4));
+  int top = int(*std::min_element(y_collect, y_collect + 4));
+  int bottom = int(*std::max_element(y_collect, y_collect + 4));
+
+  cv::Mat img_crop;
+  image(cv::Rect(left, top, right - left, bottom - top)).copyTo(img_crop);
+
+  for (int i = 0; i < points.size(); i++) {
+    points[i][0] -= left;
+    points[i][1] -= top;
+  }
+
+  int img_crop_width = int(sqrt(pow(points[0][0] - points[1][0], 2) +
+                                pow(points[0][1] - points[1][1], 2)));
+  int img_crop_height = int(sqrt(pow(points[0][0] - points[3][0], 2) +
+                                 pow(points[0][1] - points[3][1], 2)));
+
+  cv::Point2f pts_std[4];
+  pts_std[0] = cv::Point2f(0., 0.);
+  pts_std[1] = cv::Point2f(img_crop_width, 0.);
+  pts_std[2] = cv::Point2f(img_crop_width, img_crop_height);
+  pts_std[3] = cv::Point2f(0.f, img_crop_height);
+
+  cv::Point2f pointsf[4];
+  pointsf[0] = cv::Point2f(points[0][0], points[0][1]);
+  pointsf[1] = cv::Point2f(points[1][0], points[1][1]);
+  pointsf[2] = cv::Point2f(points[2][0], points[2][1]);
+  pointsf[3] = cv::Point2f(points[3][0], points[3][1]);
+
+  cv::Mat M = cv::getPerspectiveTransform(pointsf, pts_std);
+
+  cv::Mat dst_img;
+  cv::warpPerspective(img_crop, dst_img, M,
+                      cv::Size(img_crop_width, img_crop_height),
+                      cv::BORDER_REPLICATE);
+
+  if (float(dst_img.rows) >= float(dst_img.cols) * 1.5) {
+    cv::Mat srcCopy = cv::Mat(dst_img.rows, dst_img.cols, dst_img.depth());
+    cv::transpose(dst_img, srcCopy);
+    cv::flip(srcCopy, srcCopy, 0);
+    return srcCopy;
+  } else {
+    return dst_img;
+  }
+}
+
+} // namespace PaddleOCR
--- a/deploy/cpp_infer/src/postprocess_op.cpp
+++ b/deploy/cpp_infer/src/postprocess_op.cpp
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <include/postprocess_op.h>
+
+namespace PaddleOCR {
+
+void PostProcessor::GetContourArea(const std::vector<std::vector<float>> &box,
+                                   float unclip_ratio, float &distance) {
+  int pts_num = 4;
+  float area = 0.0f;
+  float dist = 0.0f;
+  for (int i = 0; i < pts_num; i++) {
+    area += box[i][0] * box[(i + 1) % pts_num][1] -
+            box[i][1] * box[(i + 1) % pts_num][0];
+    dist += sqrtf((box[i][0] - box[(i + 1) % pts_num][0]) *
+                      (box[i][0] - box[(i + 1) % pts_num][0]) +
+                  (box[i][1] - box[(i + 1) % pts_num][1]) *
+                      (box[i][1] - box[(i + 1) % pts_num][1]));
+  }
+  area = fabs(float(area / 2.0));
+
+  distance = area * unclip_ratio / dist;
+}
+
+cv::RotatedRect PostProcessor::UnClip(std::vector<std::vector<float>> box,
+                                      const float &unclip_ratio) {
+  float distance = 1.0;
+
+  GetContourArea(box, unclip_ratio, distance);
+
+  ClipperLib::ClipperOffset offset;
+  ClipperLib::Path p;
+  p << ClipperLib::IntPoint(int(box[0][0]), int(box[0][1]))
+    << ClipperLib::IntPoint(int(box[1][0]), int(box[1][1]))
+    << ClipperLib::IntPoint(int(box[2][0]), int(box[2][1]))
+    << ClipperLib::IntPoint(int(box[3][0]), int(box[3][1]));
+  offset.AddPath(p, ClipperLib::jtRound, ClipperLib::etClosedPolygon);
+
+  ClipperLib::Paths soln;
+  offset.Execute(soln, distance);
+  std::vector<cv::Point2f> points;
+
+  for (int j = 0; j < soln.size(); j++) {
+    for (int i = 0; i < soln[soln.size() - 1].size(); i++) {
+      points.emplace_back(soln[j][i].X, soln[j][i].Y);
+    }
+  }
+  cv::RotatedRect res;
+  if (points.size() <= 0) {
+    res = cv::RotatedRect(cv::Point2f(0, 0), cv::Size2f(1, 1), 0);
+  } else {
+    res = cv::minAreaRect(points);
+  }
+  return res;
+}
+
+float **PostProcessor::Mat2Vec(cv::Mat mat) {
+  auto **array = new float *[mat.rows];
+  for (int i = 0; i < mat.rows; ++i)
+    array[i] = new float[mat.cols];
+  for (int i = 0; i < mat.rows; ++i) {
+    for (int j = 0; j < mat.cols; ++j) {
+      array[i][j] = mat.at<float>(i, j);
+    }
+  }
+
+  return array;
+}
+
+std::vector<std::vector<int>>
+PostProcessor::OrderPointsClockwise(std::vector<std::vector<int>> pts) {
+  std::vector<std::vector<int>> box = pts;
+  std::sort(box.begin(), box.end(), XsortInt);
+
+  std::vector<std::vector<int>> leftmost = {box[0], box[1]};
+  std::vector<std::vector<int>> rightmost = {box[2], box[3]};
+
+  if (leftmost[0][1] > leftmost[1][1])
+    std::swap(leftmost[0], leftmost[1]);
+
+  if (rightmost[0][1] > rightmost[1][1])
+    std::swap(rightmost[0], rightmost[1]);
+
+  std::vector<std::vector<int>> rect = {leftmost[0], rightmost[0], rightmost[1],
+                                        leftmost[1]};
+  return rect;
+}
+
+std::vector<std::vector<float>> PostProcessor::Mat2Vector(cv::Mat mat) {
+  std::vector<std::vector<float>> img_vec;
+  std::vector<float> tmp;
+
+  for (int i = 0; i < mat.rows; ++i) {
+    tmp.clear();
+    for (int j = 0; j < mat.cols; ++j) {
+      tmp.push_back(mat.at<float>(i, j));
+    }
+    img_vec.push_back(tmp);
+  }
+  return img_vec;
+}
+
+bool PostProcessor::XsortFp32(std::vector<float> a, std::vector<float> b) {
+  if (a[0] != b[0])
+    return a[0] < b[0];
+  return false;
+}
+
+bool PostProcessor::XsortInt(std::vector<int> a, std::vector<int> b) {
+  if (a[0] != b[0])
+    return a[0] < b[0];
+  return false;
+}
+
+std::vector<std::vector<float>> PostProcessor::GetMiniBoxes(cv::RotatedRect box,
+                                                            float &ssid) {
+  ssid = std::max(box.size.width, box.size.height);
+
+  cv::Mat points;
+  cv::boxPoints(box, points);
+
+  auto array = Mat2Vector(points);
+  std::sort(array.begin(), array.end(), XsortFp32);
+
+  std::vector<float> idx1 = array[0], idx2 = array[1], idx3 = array[2],
+                     idx4 = array[3];
+  if (array[3][1] <= array[2][1]) {
+    idx2 = array[3];
+    idx3 = array[2];
+  } else {
+    idx2 = array[2];
+    idx3 = array[3];
+  }
+  if (array[1][1] <= array[0][1]) {
+    idx1 = array[1];
+    idx4 = array[0];
+  } else {
+    idx1 = array[0];
+    idx4 = array[1];
+  }
+
+  array[0] = idx1;
+  array[1] = idx2;
+  array[2] = idx3;
+  array[3] = idx4;
+
+  return array;
+}
+
+float PostProcessor::BoxScoreFast(std::vector<std::vector<float>> box_array,
+                                  cv::Mat pred) {
+  auto array = box_array;
+  int width = pred.cols;
+  int height = pred.rows;
+
+  float box_x[4] = {array[0][0], array[1][0], array[2][0], array[3][0]};
+  float box_y[4] = {array[0][1], array[1][1], array[2][1], array[3][1]};
+
+  int xmin = clamp(int(std::floor(*(std::min_element(box_x, box_x + 4)))), 0,
+                   width - 1);
+  int xmax = clamp(int(std::ceil(*(std::max_element(box_x, box_x + 4)))), 0,
+                   width - 1);
+  int ymin = clamp(int(std::floor(*(std::min_element(box_y, box_y + 4)))), 0,
+                   height - 1);
+  int ymax = clamp(int(std::ceil(*(std::max_element(box_y, box_y + 4)))), 0,
+                   height - 1);
+
+  cv::Mat mask;
+  mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8UC1);
+
+  cv::Point root_point[4];
+  root_point[0] = cv::Point(int(array[0][0]) - xmin, int(array[0][1]) - ymin);
+  root_point[1] = cv::Point(int(array[1][0]) - xmin, int(array[1][1]) - ymin);
+  root_point[2] = cv::Point(int(array[2][0]) - xmin, int(array[2][1]) - ymin);
+  root_point[3] = cv::Point(int(array[3][0]) - xmin, int(array[3][1]) - ymin);
+  const cv::Point *ppt[1] = {root_point};
+  int npt[] = {4};
+  cv::fillPoly(mask, ppt, npt, 1, cv::Scalar(1));
+
+  cv::Mat croppedImg;
+  pred(cv::Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1))
+      .copyTo(croppedImg);
+
+  auto score = cv::mean(croppedImg, mask)[0];
+  return score;
+}
+
+std::vector<std::vector<std::vector<int>>>
+PostProcessor::BoxesFromBitmap(const cv::Mat pred, const cv::Mat bitmap,
+                               const float &box_thresh,
+                               const float &det_db_unclip_ratio) {
+  const int min_size = 3;
+  const int max_candidates = 1000;
+
+  int width = bitmap.cols;
+  int height = bitmap.rows;
+
+  std::vector<std::vector<cv::Point>> contours;
+  std::vector<cv::Vec4i> hierarchy;
+
+  cv::findContours(bitmap, contours, hierarchy, cv::RETR_LIST,
+                   cv::CHAIN_APPROX_SIMPLE);
+
+  int num_contours =
+      contours.size() >= max_candidates ? max_candidates : contours.size();
+
+  std::vector<std::vector<std::vector<int>>> boxes;
+
+  for (int _i = 0; _i < num_contours; _i++) {
+    if (contours[_i].size() <= 2) {
+      continue;
+    }
+    float ssid;
+    cv::RotatedRect box = cv::minAreaRect(contours[_i]);
+    auto array = GetMiniBoxes(box, ssid);
+
+    auto box_for_unclip = array;
+    // end get_mini_box
+
+    if (ssid < min_size) {
+      continue;
+    }
+
+    float score;
+    score = BoxScoreFast(array, pred);
+    if (score < box_thresh)
+      continue;
+
+    // start for unclip
+    cv::RotatedRect points = UnClip(box_for_unclip, det_db_unclip_ratio);
+    if (points.size.height < 1.001 && points.size.width < 1.001) {
+      continue;
+    }
+    // end for unclip
+
+    cv::RotatedRect clipbox = points;
+    auto cliparray = GetMiniBoxes(clipbox, ssid);
+
+    if (ssid < min_size + 2)
+      continue;
+
+    int dest_width = pred.cols;
+    int dest_height = pred.rows;
+    std::vector<std::vector<int>> intcliparray;
+
+    for (int num_pt = 0; num_pt < 4; num_pt++) {
+      std::vector<int> a{int(clampf(roundf(cliparray[num_pt][0] / float(width) *
+                                           float(dest_width)),
+                                    0, float(dest_width))),
+                         int(clampf(roundf(cliparray[num_pt][1] /
+                                           float(height) * float(dest_height)),
+                                    0, float(dest_height)))};
+      intcliparray.push_back(a);
+    }
+    boxes.push_back(intcliparray);
+
+  } // end for
+  return boxes;
+}
+
+std::vector<std::vector<std::vector<int>>>
+PostProcessor::FilterTagDetRes(std::vector<std::vector<std::vector<int>>> boxes,
+                               float ratio_h, float ratio_w, cv::Mat srcimg) {
+  int oriimg_h = srcimg.rows;
+  int oriimg_w = srcimg.cols;
+
+  std::vector<std::vector<std::vector<int>>> root_points;
+  for (int n = 0; n < boxes.size(); n++) {
+    boxes[n] = OrderPointsClockwise(boxes[n]);
+    for (int m = 0; m < boxes[0].size(); m++) {
+      boxes[n][m][0] /= ratio_w;
+      boxes[n][m][1] /= ratio_h;
+
+      boxes[n][m][0] = int(_min(_max(boxes[n][m][0], 0), oriimg_w - 1));
+      boxes[n][m][1] = int(_min(_max(boxes[n][m][1], 0), oriimg_h - 1));
+    }
+  }
+
+  for (int n = 0; n < boxes.size(); n++) {
+    int rect_width, rect_height;
+    rect_width = int(sqrt(pow(boxes[n][0][0] - boxes[n][1][0], 2) +
+                          pow(boxes[n][0][1] - boxes[n][1][1], 2)));
+    rect_height = int(sqrt(pow(boxes[n][0][0] - boxes[n][3][0], 2) +
+                           pow(boxes[n][0][1] - boxes[n][3][1], 2)));
+    if (rect_width <= 10 || rect_height <= 10)
+      continue;
+    root_points.push_back(boxes[n]);
+  }
+  return root_points;
+}
+
+} // namespace PaddleOCR
--- a/deploy/cpp_infer/src/preprocess_op.cpp
+++ b/deploy/cpp_infer/src/preprocess_op.cpp
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+#include "paddle_api.h"
+#include "paddle_inference_api.h"
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <vector>
+
+#include <cstring>
+#include <fstream>
+#include <numeric>
+
+#include <include/preprocess_op.h>
+
+namespace PaddleOCR {
+
+void Permute::Run(const cv::Mat *im, float *data) {
+  int rh = im->rows;
+  int rw = im->cols;
+  int rc = im->channels();
+  for (int i = 0; i < rc; ++i) {
+    cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw), i);
+  }
+}
+
+void Normalize::Run(cv::Mat *im, const std::vector<float> &mean,
+                    const std::vector<float> &scale, const bool is_scale) {
+  double e = 1.0;
+  if (is_scale) {
+    e /= 255.0;
+  }
+  (*im).convertTo(*im, CV_32FC3, e);
+  for (int h = 0; h < im->rows; h++) {
+    for (int w = 0; w < im->cols; w++) {
+      im->at<cv::Vec3f>(h, w)[0] =
+          (im->at<cv::Vec3f>(h, w)[0] - mean[0]) * scale[0];
+      im->at<cv::Vec3f>(h, w)[1] =
+          (im->at<cv::Vec3f>(h, w)[1] - mean[1]) * scale[1];
+      im->at<cv::Vec3f>(h, w)[2] =
+          (im->at<cv::Vec3f>(h, w)[2] - mean[2]) * scale[2];
+    }
+  }
+}
+
+void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
+                         int max_size_len, float &ratio_h, float &ratio_w) {
+  int w = img.cols;
+  int h = img.rows;
+
+  float ratio = 1.f;
+  int max_wh = w >= h ? w : h;
+  if (max_wh > max_size_len) {
+    if (h > w) {
+      ratio = float(max_size_len) / float(h);
+    } else {
+      ratio = float(max_size_len) / float(w);
+    }
+  }
+
+  int resize_h = int(float(h) * ratio);
+  int resize_w = int(float(w) * ratio);
+  if (resize_h % 32 == 0)
+    resize_h = resize_h;
+  else if (resize_h / 32 < 1 + 1e-5)
+    resize_h = 32;
+  else
+    resize_h = (resize_h / 32 - 1) * 32;
+
+  if (resize_w % 32 == 0)
+    resize_w = resize_w;
+  else if (resize_w / 32 < 1)
+    resize_w = 32;
+  else
+    resize_w = (resize_w / 32 - 1) * 32;
+
+  cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
+
+  ratio_h = float(resize_h) / float(h);
+  ratio_w = float(resize_w) / float(w);
+}
+
+void CrnnResizeImg::Run(const cv::Mat &img, cv::Mat &resize_img, float wh_ratio,
+                        const std::vector<int> &rec_image_shape) {
+  int imgC, imgH, imgW;
+  imgC = rec_image_shape[0];
+  imgH = rec_image_shape[1];
+  imgW = rec_image_shape[2];
+
+  imgW = int(32 * wh_ratio);
+
+  float ratio = float(img.cols) / float(img.rows);
+  int resize_w, resize_h;
+  if (ceilf(imgH * ratio) > imgW)
+    resize_w = imgW;
+  else
+    resize_w = int(ceilf(imgH * ratio));
+
+  cv::resize(img, resize_img, cv::Size(resize_w, imgH), 0.f, 0.f,
+             cv::INTER_LINEAR);
+}
+
+} // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/src/utility.cpp
+++ b/deploy/cpp_infer/src/utility.cpp
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <iostream>
+#include <ostream>
+#include <vector>
+
+#include <include/utility.h>
+
+namespace PaddleOCR {
+
+std::vector<std::string> Utility::ReadDict(const std::string &path) {
+  std::ifstream in(path);
+  std::string line;
+  std::vector<std::string> m_vec;
+  if (in) {
+    while (getline(in, line)) {
+      m_vec.push_back(line);
+    }
+  } else {
+    std::cout << "no such label file: " << path << ", exit the program..."
+              << std::endl;
+    exit(1);
+  }
+  return m_vec;
+}
+
+void Utility::VisualizeBboxes(
+    const cv::Mat &srcimg,
+    const std::vector<std::vector<std::vector<int>>> &boxes) {
+  cv::Point rook_points[boxes.size()][4];
+  for (int n = 0; n < boxes.size(); n++) {
+    for (int m = 0; m < boxes[0].size(); m++) {
+      rook_points[n][m] = cv::Point(int(boxes[n][m][0]), int(boxes[n][m][1]));
+    }
+  }
+  cv::Mat img_vis;
+  srcimg.copyTo(img_vis);
+  for (int n = 0; n < boxes.size(); n++) {
+    const cv::Point *ppt[1] = {rook_points[n]};
+    int npt[] = {4};
+    cv::polylines(img_vis, ppt, npt, 1, 1, CV_RGB(0, 255, 0), 2, 8, 0);
+  }
+
+  cv::imwrite("./ocr_vis.png", img_vis);
+  std::cout << "The detection visualized image saved in ./ocr_vis.png.pn"
+            << std::endl;
+}
+
+} // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/tools/build.sh
+++ b/deploy/cpp_infer/tools/build.sh
+
+OPENCV_DIR=your_opencv_dir
+LIB_DIR=your_paddle_inference_dir
+CUDA_LIB_DIR=your_cuda_lib_dir
+CUDNN_LIB_DIR=/your_cudnn_lib_dir
+
+BUILD_DIR=build
+rm -rf ${BUILD_DIR}
+mkdir ${BUILD_DIR}
+cd ${BUILD_DIR}
+cmake .. \
+    -DPADDLE_LIB=${LIB_DIR} \
+    -DWITH_MKL=ON \
+    -DDEMO_NAME=ocr_system \
+    -DWITH_GPU=OFF \
+    -DWITH_STATIC_LIB=OFF \
+    -DUSE_TENSORRT=OFF \
+    -DOPENCV_DIR=${OPENCV_DIR} \
+    -DCUDNN_LIB=${CUDNN_LIB_DIR} \
+    -DCUDA_LIB=${CUDA_LIB_DIR} \
+
+make -j
--- a/deploy/cpp_infer/tools/config.txt
+++ b/deploy/cpp_infer/tools/config.txt
+# model load config
+use_gpu  0
+gpu_id  0
+gpu_mem  4000
+cpu_math_library_num_threads  10
+use_mkldnn 0
+
+# det config
+max_side_len  960
+det_db_thresh  0.3
+det_db_box_thresh  0.5
+det_db_unclip_ratio  2.0
+det_model_dir  ./inference/det_db
+
+# rec config
+rec_model_dir  ./inference/rec_crnn
+char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
+img_path  ../../doc/imgs/11.jpg
+
+# show the detection results
+visualize 0
+
--- a/deploy/cpp_infer/tools/run.sh
+++ b/deploy/cpp_infer/tools/run.sh
+
+./build/ocr_system ./tools/config.txt ../../doc/imgs/12.jpg
--- a/deploy/hubserving/ocr_det/__init__.py
+++ b/deploy/hubserving/ocr_det/__init__.py
--- a/deploy/hubserving/ocr_det/config.json
+++ b/deploy/hubserving/ocr_det/config.json
+{
+    "modules_info": {
+        "ocr_det": {
+            "init_args": {
+                "version": "1.0.0",
+                "use_gpu": true
+            },
+            "predict_args": {
+            }
+        }
+    },
+    "port": 8866,
+    "use_multiprocess": false,
+    "workers": 2    
+}
--- a/deploy/hubserving/ocr_det/module.py
+++ b/deploy/hubserving/ocr_det/module.py
+# -*- coding:utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import ast
+import copy
+import math
+import os
+import time
+
+from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor
+from paddlehub.common.logger import logger
+from paddlehub.module.module import moduleinfo, runnable, serving
+from PIL import Image
+import cv2
+import numpy as np
+import paddle.fluid as fluid
+import paddlehub as hub
+
+from tools.infer.utility import base64_to_cv2
+from tools.infer.predict_det import TextDetector
+
+
+@moduleinfo(
+    name="ocr_det",
+    version="1.0.0",
+    summary="ocr detection service",
+    author="paddle-dev",
+    author_email="paddle-dev@baidu.com",
+    type="cv/text_recognition")
+class OCRDet(hub.Module):
+    def _initialize(self, use_gpu=False, enable_mkldnn=False):
+        """
+        initialize with the necessary elements
+        """
+        from ocr_det.params import read_params
+        cfg = read_params()
+
+        cfg.use_gpu = use_gpu
+        if use_gpu:
+            try:
+                _places = os.environ["CUDA_VISIBLE_DEVICES"]
+                int(_places[0])
+                print("use gpu: ", use_gpu)
+                print("CUDA_VISIBLE_DEVICES: ", _places)
+                cfg.gpu_mem = 8000
+            except:
+                raise RuntimeError(
+                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id."
+                )
+        cfg.ir_optim = True
+        cfg.enable_mkldnn = enable_mkldnn
+
+        self.text_detector = TextDetector(cfg)
+
+    def read_images(self, paths=[]):
+        images = []
+        for img_path in paths:
+            assert os.path.isfile(
+                img_path), "The {} isn't a valid file.".format(img_path)
+            img = cv2.imread(img_path)
+            if img is None:
+                logger.info("error in loading image:{}".format(img_path))
+                continue
+            images.append(img)
+        return images
+
+    def predict(self,
+                images=[],
+                paths=[]):
+        """
+        Get the text box in the predicted images.
+        Args:
+            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
+            paths (list[str]): The paths of images. If paths not images
+        Returns:
+            res (list): The result of text detection box and save path of images.
+        """
+
+        if images != [] and isinstance(images, list) and paths == []:
+            predicted_data = images
+        elif images == [] and isinstance(paths, list) and paths != []:
+            predicted_data = self.read_images(paths)
+        else:
+            raise TypeError("The input data is inconsistent with expectations.")
+
+        assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
+        
+        all_results = []
+        for img in predicted_data:
+            if img is None:
+                logger.info("error in loading image")
+                all_results.append([])
+                continue
+            dt_boxes, elapse = self.text_detector(img)
+            logger.info("Predict time : {}".format(elapse))
+
+            rec_res_final = []
+            for dno in range(len(dt_boxes)):
+                rec_res_final.append(
+                    {
+                        'text_region': dt_boxes[dno].astype(np.int).tolist()
+                    }
+                )
+            all_results.append(rec_res_final)
+        return all_results
+
+    @serving
+    def serving_method(self, images, **kwargs):
+        """
+        Run as a service.
+        """
+        images_decode = [base64_to_cv2(image) for image in images]
+        results = self.predict(images_decode, **kwargs)
+        return results
+
+   
+if __name__ == '__main__':
+    ocr = OCRDet()
+    image_path = [
+        './doc/imgs/11.jpg',
+        './doc/imgs/12.jpg',
+    ]
+    res = ocr.predict(paths=image_path)
+    print(res)
\ No newline at end of file
--- a/deploy/hubserving/ocr_det/params.py
+++ b/deploy/hubserving/ocr_det/params.py
+# -*- coding:utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+class Config(object):
+    pass
+
+
+def read_params():
+    cfg = Config()
+    
+    #params for text detector
+    cfg.det_algorithm = "DB"
+    cfg.det_model_dir = "./inference/ch_det_mv3_db/"
+    cfg.det_max_side_len = 960
+
+    #DB parmas
+    cfg.det_db_thresh =0.3
+    cfg.det_db_box_thresh =0.5
+    cfg.det_db_unclip_ratio =2.0
+
+    # #EAST parmas
+    # cfg.det_east_score_thresh = 0.8
+    # cfg.det_east_cover_thresh = 0.1
+    # cfg.det_east_nms_thresh = 0.2
+
+    # #params for text recognizer
+    # cfg.rec_algorithm = "CRNN"
+    # cfg.rec_model_dir = "./inference/ch_det_mv3_crnn/"
+
+    # cfg.rec_image_shape = "3, 32, 320"
+    # cfg.rec_char_type = 'ch'
+    # cfg.rec_batch_num = 30
+    # cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt"
+    # cfg.use_space_char = True
+
+    return cfg
\ No newline at end of file
--- a/deploy/hubserving/ocr_rec/__init__.py
+++ b/deploy/hubserving/ocr_rec/__init__.py
--- a/deploy/hubserving/ocr_rec/config.json
+++ b/deploy/hubserving/ocr_rec/config.json
+{
+    "modules_info": {
+        "ocr_rec": {
+            "init_args": {
+                "version": "1.0.0",
+                "use_gpu": true
+            },
+            "predict_args": {
+            }
+        }
+    },
+    "port": 8867,
+    "use_multiprocess": false,
+    "workers": 2
+}
--- a/deploy/hubserving/ocr_rec/module.py
+++ b/deploy/hubserving/ocr_rec/module.py
+# -*- coding:utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import ast
+import copy
+import math
+import os
+import time
+
+from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor
+from paddlehub.common.logger import logger
+from paddlehub.module.module import moduleinfo, runnable, serving
+from PIL import Image
+import cv2
+import numpy as np
+import paddle.fluid as fluid
+import paddlehub as hub
+
+from tools.infer.utility import base64_to_cv2
+from tools.infer.predict_rec import TextRecognizer
+
+
+@moduleinfo(
+    name="ocr_rec",
+    version="1.0.0",
+    summary="ocr recognition service",
+    author="paddle-dev",
+    author_email="paddle-dev@baidu.com",
+    type="cv/text_recognition")
+class OCRRec(hub.Module):
+    def _initialize(self, use_gpu=False, enable_mkldnn=False):
+        """
+        initialize with the necessary elements
+        """
+        from ocr_rec.params import read_params
+        cfg = read_params()
+
+        cfg.use_gpu = use_gpu
+        if use_gpu:
+            try:
+                _places = os.environ["CUDA_VISIBLE_DEVICES"]
+                int(_places[0])
+                print("use gpu: ", use_gpu)
+                print("CUDA_VISIBLE_DEVICES: ", _places)
+                cfg.gpu_mem = 8000
+            except:
+                raise RuntimeError(
+                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id."
+                )
+        cfg.ir_optim = True
+        cfg.enable_mkldnn = enable_mkldnn
+
+        self.text_recognizer = TextRecognizer(cfg)
+
+    def read_images(self, paths=[]):
+        images = []
+        for img_path in paths:
+            assert os.path.isfile(
+                img_path), "The {} isn't a valid file.".format(img_path)
+            img = cv2.imread(img_path)
+            if img is None:
+                logger.info("error in loading image:{}".format(img_path))
+                continue
+            images.append(img)
+        return images
+
+    def predict(self,
+                images=[],
+                paths=[]):
+        """
+        Get the text box in the predicted images.
+        Args:
+            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
+            paths (list[str]): The paths of images. If paths not images
+        Returns:
+            res (list): The result of text detection box and save path of images.
+        """
+
+        if images != [] and isinstance(images, list) and paths == []:
+            predicted_data = images
+        elif images == [] and isinstance(paths, list) and paths != []:
+            predicted_data = self.read_images(paths)
+        else:
+            raise TypeError("The input data is inconsistent with expectations.")
+
+        assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
+        
+        img_list = []
+        for img in predicted_data:
+            if img is None:
+                continue
+            img_list.append(img)
+            
+        rec_res_final = []
+        try:
+            rec_res, predict_time = self.text_recognizer(img_list)
+            for dno in range(len(rec_res)):
+                text, score = rec_res[dno]
+                rec_res_final.append(
+                    {
+                        'text': text,
+                        'confidence': float(score),
+                    }
+                )
+        except Exception as e:
+            print(e)
+            return [[]]
+
+        return [rec_res_final]
+
+
+    @serving
+    def serving_method(self, images, **kwargs):
+        """
+        Run as a service.
+        """
+        images_decode = [base64_to_cv2(image) for image in images]
+        results = self.predict(images_decode, **kwargs)
+        return results
+
+   
+if __name__ == '__main__':
+    ocr = OCRRec()
+    image_path = [
+        './doc/imgs_words/ch/word_1.jpg',
+        './doc/imgs_words/ch/word_2.jpg',
+        './doc/imgs_words/ch/word_3.jpg',
+    ]
+    res = ocr.predict(paths=image_path)
+    print(res)
\ No newline at end of file
--- a/deploy/hubserving/ocr_rec/params.py
+++ b/deploy/hubserving/ocr_rec/params.py
+# -*- coding:utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+class Config(object):
+    pass
+
+
+def read_params():
+    cfg = Config()
+    
+    # #params for text detector
+    # cfg.det_algorithm = "DB"
+    # cfg.det_model_dir = "./inference/ch_det_mv3_db/"
+    # cfg.det_max_side_len = 960
+
+    # #DB parmas
+    # cfg.det_db_thresh =0.3
+    # cfg.det_db_box_thresh =0.5
+    # cfg.det_db_unclip_ratio =2.0
+
+    # #EAST parmas
+    # cfg.det_east_score_thresh = 0.8
+    # cfg.det_east_cover_thresh = 0.1
+    # cfg.det_east_nms_thresh = 0.2
+
+    #params for text recognizer
+    cfg.rec_algorithm = "CRNN"
+    cfg.rec_model_dir = "./inference/ch_rec_mv3_crnn/"
+
+    cfg.rec_image_shape = "3, 32, 320"
+    cfg.rec_char_type = 'ch'
+    cfg.rec_batch_num = 30
+    cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt"
+    cfg.use_space_char = True
+
+    return cfg
\ No newline at end of file
--- a/deploy/hubserving/ocr_system/__init__.py
+++ b/deploy/hubserving/ocr_system/__init__.py
--- a/deploy/hubserving/ocr_system/config.json
+++ b/deploy/hubserving/ocr_system/config.json
+{
+    "modules_info": {
+        "ocr_system": {
+            "init_args": {
+                "version": "1.0.0",
+                "use_gpu": true
+            },
+            "predict_args": {
+            }
+        }
+    },
+    "port": 8868,
+    "use_multiprocess": false,
+    "workers": 2
+}
+
--- a/deploy/hubserving/ocr_system/module.py
+++ b/deploy/hubserving/ocr_system/module.py
+# -*- coding:utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import ast
+import copy
+import math
+import os
+import time
+
+from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor
+from paddlehub.common.logger import logger
+from paddlehub.module.module import moduleinfo, runnable, serving
+from PIL import Image
+import cv2
+import numpy as np
+import paddle.fluid as fluid
+import paddlehub as hub
+
+from tools.infer.utility import base64_to_cv2
+from tools.infer.predict_system import TextSystem
+
+
+@moduleinfo(
+    name="ocr_system",
+    version="1.0.0",
+    summary="ocr system service",
+    author="paddle-dev",
+    author_email="paddle-dev@baidu.com",
+    type="cv/text_recognition")
+class OCRSystem(hub.Module):
+    def _initialize(self, use_gpu=False, enable_mkldnn=False):
+        """
+        initialize with the necessary elements
+        """
+        from ocr_system.params import read_params
+        cfg = read_params()
+
+        cfg.use_gpu = use_gpu
+        if use_gpu:
+            try:
+                _places = os.environ["CUDA_VISIBLE_DEVICES"]
+                int(_places[0])
+                print("use gpu: ", use_gpu)
+                print("CUDA_VISIBLE_DEVICES: ", _places)
+                cfg.gpu_mem = 8000
+            except:
+                raise RuntimeError(
+                    "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id."
+                )
+        cfg.ir_optim = True
+        cfg.enable_mkldnn = enable_mkldnn
+        
+        self.text_sys = TextSystem(cfg)
+
+    def read_images(self, paths=[]):
+        images = []
+        for img_path in paths:
+            assert os.path.isfile(
+                img_path), "The {} isn't a valid file.".format(img_path)
+            img = cv2.imread(img_path)
+            if img is None:
+                logger.info("error in loading image:{}".format(img_path))
+                continue
+            images.append(img)
+        return images
+
+    def predict(self,
+                       images=[],
+                       paths=[]):
+        """
+        Get the chinese texts in the predicted images.
+        Args:
+            images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths
+            paths (list[str]): The paths of images. If paths not images
+        Returns:
+            res (list): The result of chinese texts and save path of images.
+        """
+
+        if images != [] and isinstance(images, list) and paths == []:
+            predicted_data = images
+        elif images == [] and isinstance(paths, list) and paths != []:
+            predicted_data = self.read_images(paths)
+        else:
+            raise TypeError("The input data is inconsistent with expectations.")
+
+        assert predicted_data != [], "There is not any image to be predicted. Please check the input data."
+
+        all_results = []
+        for img in predicted_data:
+            if img is None:
+                logger.info("error in loading image")
+                all_results.append([])
+                continue
+            starttime = time.time()
+            dt_boxes, rec_res = self.text_sys(img)
+            elapse = time.time() - starttime
+            logger.info("Predict time: {}".format(elapse))
+
+            dt_num = len(dt_boxes)
+            rec_res_final = []
+
+            for dno in range(dt_num):
+                text, score = rec_res[dno]
+                rec_res_final.append(
+                    {
+                        'text': text,
+                        'confidence': float(score),
+                        'text_region': dt_boxes[dno].astype(np.int).tolist()
+                    }
+                )
+            all_results.append(rec_res_final)
+        return all_results
+
+    @serving
+    def serving_method(self, images, **kwargs):
+        """
+        Run as a service.
+        """
+        images_decode = [base64_to_cv2(image) for image in images]
+        results = self.predict(images_decode, **kwargs)
+        return results
+
+   
+if __name__ == '__main__':
+    ocr = OCRSystem()
+    image_path = [
+        './doc/imgs/11.jpg',
+        './doc/imgs/12.jpg',
+    ]
+    res = ocr.predict(paths=image_path)
+    print(res)
\ No newline at end of file
--- a/deploy/hubserving/ocr_system/params.py
+++ b/deploy/hubserving/ocr_system/params.py
+# -*- coding:utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+class Config(object):
+    pass
+
+
+def read_params():
+    cfg = Config()
+    
+    #params for text detector
+    cfg.det_algorithm = "DB"
+    cfg.det_model_dir = "./inference/ch_det_mv3_db/"
+    cfg.det_max_side_len = 960
+
+    #DB parmas
+    cfg.det_db_thresh =0.3
+    cfg.det_db_box_thresh =0.5
+    cfg.det_db_unclip_ratio =2.0
+
+    #EAST parmas
+    cfg.det_east_score_thresh = 0.8
+    cfg.det_east_cover_thresh = 0.1
+    cfg.det_east_nms_thresh = 0.2
+
+    #params for text recognizer
+    cfg.rec_algorithm = "CRNN"
+    cfg.rec_model_dir = "./inference/ch_rec_mv3_crnn/"
+
+    cfg.rec_image_shape = "3, 32, 320"
+    cfg.rec_char_type = 'ch'
+    cfg.rec_batch_num = 30
+    cfg.rec_char_dict_path = "./ppocr/utils/ppocr_keys_v1.txt"
+    cfg.use_space_char = True
+
+    return cfg
\ No newline at end of file
--- a/deploy/imgs/cpp_infer_pred_12.png
+++ b/deploy/imgs/cpp_infer_pred_12.png