fix conflict

721c76b4 · LDOUBLEV · 98162be4 · b77f9ec0 · 721c76b4 · 721c76b4
Commit 721c76b4 authored Dec 16, 2021 by LDOUBLEV
20 changed files
--- a/configs/rec/rec_mtb_nrtr.yml
+++ b/configs/rec/rec_mtb_nrtr.yml
@@ -14,11 +14,10 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
-  character_dict_path: 
-  character_type: EN_symbol
+  character_dict_path: ppocr/utils/EN_symbol_dict.txt
  max_text_length: 25
  infer_mode: False
-  use_space_char: True
+  use_space_char: False
  save_res_path: ./output/rec/predicts_nrtr.txt

 Optimizer:

--- a/configs/rec/rec_mv3_none_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_mv3_none_none_ctc.yml
+++ b/configs/rec/rec_mv3_none_none_ctc.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_mv3_tps_bilstm_att.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_att.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words/ch/word_1.jpg
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_r31_sar.yml
+++ b/configs/rec/rec_r31_sar.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: 
  # for data or label process
  character_dict_path: ppocr/utils/dict90.txt
-  character_type: EN_symbol
  max_text_length: 30
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_r34_vd_none_none_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_none_ctc.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_r34_vd_tps_bilstm_att.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_att.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words/ch/word_1.jpg
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  infer_mode: False
  use_space_char: False

--- a/configs/rec/rec_r50_fpn_srn.yml
+++ b/configs/rec/rec_r50_fpn_srn.yml
@@ -15,7 +15,6 @@ Global:
  infer_img: doc/imgs_words/ch/word_1.jpg
  # for data or label process
  character_dict_path:
-  character_type: en
  max_text_length: 25
  num_heads: 8
  infer_mode: False

--- a/configs/rec/rec_resnet_stn_bilstm_att.yml
+++ b/configs/rec/rec_resnet_stn_bilstm_att.yml
@@ -14,8 +14,7 @@ Global:
  use_visualdl: False
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
-  character_dict_path: 
-  character_type: EN_symbol
+  character_dict_path: ppocr/utils/EN_symbol_dict.txt
  max_text_length: 100
  infer_mode: False
  use_space_char: False

--- a/configs/table/table_mv3.yml
+++ b/configs/table/table_mv3.yml
 Global:
  use_gpu: true
-  epoch_num: 50
+  epoch_num: 400
  log_smooth_window: 20
  print_batch_step: 5
  save_model_dir: ./output/table_mv3/
-  save_epoch_step: 5
+  save_epoch_step: 3
  # evaluation is run every 400 iterations after the 0th iteration
  eval_batch_step: [0, 400]
  cal_metric_during_train: True
@@ -12,18 +12,17 @@ Global:
  checkpoints: 
  save_inference_dir:
  use_visualdl: False
-  infer_img: doc/imgs_words/ch/word_1.jpg
+  infer_img: doc/table/table.jpg
  # for data or label process
  character_dict_path: ppocr/utils/dict/table_structure_dict.txt
  character_type: en
  max_text_length: 100
-  max_elem_length: 500
+  max_elem_length: 800
  max_cell_num: 500
  infer_mode: False
  process_total_num: 0
  process_cut_num: 0

-
 Optimizer:
  name: Adam
  beta1: 0.9
@@ -41,13 +40,15 @@ Architecture:
  Backbone:
    name: MobileNetV3
    scale: 1.0
-    model_name: small
-    disable_se: True
+    model_name: large
  Head:
    name: TableAttentionHead
    hidden_size: 256
    l2_decay: 0.00001
    loc_type: 2
+    max_text_length: 100
+    max_elem_length: 800
+    max_cell_num: 500

 Loss:
  name: TableAttentionLoss

--- a/deploy/cpp_infer/include/ocr_rec.h
+++ b/deploy/cpp_infer/include/ocr_rec.h
@@ -44,7 +44,8 @@ public:
                          const int &gpu_id, const int &gpu_mem,
                          const int &cpu_math_library_num_threads,
                          const bool &use_mkldnn, const string &label_path,
-                          const bool &use_tensorrt, const std::string &precision) {
+                          const bool &use_tensorrt, const std::string &precision,
+                          const int &rec_batch_num) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
    this->gpu_mem_ = gpu_mem;
@@ -52,6 +53,7 @@ public:
    this->use_mkldnn_ = use_mkldnn;
    this->use_tensorrt_ = use_tensorrt;
    this->precision_ = precision;
+    this->rec_batch_num_ = rec_batch_num;

    this->label_list_ = Utility::ReadDict(label_path);
    this->label_list_.insert(this->label_list_.begin(),
@@ -64,7 +66,7 @@ public:
  // Load Paddle inference model
  void LoadModel(const std::string &model_dir);

-  void Run(cv::Mat &img, std::vector<double> *times);
+  void Run(std::vector<cv::Mat> img_list, std::vector<double> *times);

 private:
  std::shared_ptr<Predictor> predictor_;
@@ -82,10 +84,12 @@ private:
  bool is_scale_ = true;
  bool use_tensorrt_ = false;
  std::string precision_ = "fp32";
+  int rec_batch_num_ = 6;
+    
  // pre-process
  CrnnResizeImg resize_op_;
  Normalize normalize_op_;
-  Permute permute_op_;
+  PermuteBatch permute_op_;

  // post-process
  PostProcessor post_processor_;

--- a/deploy/cpp_infer/include/preprocess_op.h
+++ b/deploy/cpp_infer/include/preprocess_op.h
@@ -44,6 +44,11 @@ public:
  virtual void Run(const cv::Mat *im, float *data);
 };

+class PermuteBatch {
+public:
+  virtual void Run(const std::vector<cv::Mat> imgs, float *data);
+};
+    
 class ResizeImgType0 {
 public:
  virtual void Run(const cv::Mat &img, cv::Mat &resize_img, int max_size_len,

--- a/deploy/cpp_infer/include/utility.h
+++ b/deploy/cpp_infer/include/utility.h
@@ -50,6 +50,9 @@ public:
    
  static cv::Mat GetRotateCropImage(const cv::Mat &srcimage,
                          std::vector<std::vector<int>> box);
+    
+  static std::vector<int> argsort(const std::vector<float>& array);
+
 };

 } // namespace PaddleOCR
\ No newline at end of file
--- a/deploy/cpp_infer/readme.md
+++ b/deploy/cpp_infer/readme.md
@@ -34,10 +34,10 @@ PaddleOCR模型部署。

 * 首先需要从opencv官网上下载在Linux环境下源码编译的包，以opencv3.4.7为例，下载命令如下。

-```
+```bash
 cd deploy/cpp_infer
-wget https://github.com/opencv/opencv/archive/3.4.7.tar.gz
-tar -xf 3.4.7.tar.gz
+wget https://paddleocr.bj.bcebos.com/libs/opencv/opencv-3.4.7.tar.gz
+tar -xf opencv-3.4.7.tar.gz
 ```

 最终可以在当前目录下看到`opencv-3.4.7/`的文件夹。
@@ -45,12 +45,13 @@ tar -xf 3.4.7.tar.gz
 * 编译opencv，设置opencv源码路径(`root_path`)以及安装路径(`install_path`)。进入opencv源码路径下，按照下面的方式进行编译。

 ```shell
-root_path=your_opencv_root_path
+root_path="your_opencv_root_path"
 install_path=${root_path}/opencv3
+build_dir=${root_path}/build

-rm -rf build
-mkdir build
-cd build
+rm -rf ${build_dir}
+mkdir ${build_dir}
+cd ${build_dir}

 cmake .. \
    -DCMAKE_INSTALL_PREFIX=${install_path} \
@@ -74,6 +75,11 @@ make -j
 make install
 ```

+也可以直接修改`tools/build_opencv.sh`的内容，然后直接运行下面的命令进行编译。
+
+```shell
+sh tools/build_opencv.sh
+```

 其中`root_path`为下载的opencv源码路径，`install_path`为opencv的安装路径，`make install`完成之后，会在该文件夹下生成opencv头文件和库文件，用于后面的OCR代码编译。

@@ -233,12 +239,12 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
    --image_dir=../../doc/imgs/12.jpg
 ```

-更多参数如下：
+更多支持的可调节参数解释如下：

 - 通用参数

 |参数名称|类型|默认参数|意义|
-| --- | --- | --- | --- |
+| :---: | :---: | :---: | :---: |
 |use_gpu|bool|false|是否使用GPU|
 |gpu_id|int|0|GPU id，使用GPU时有效|
 |gpu_mem|int|4000|申请的GPU内存|
@@ -248,7 +254,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 - 检测模型相关

 |参数名称|类型|默认参数|意义|
-| --- | --- | --- | --- |
+| :---: | :---: | :---: | :---: |
 |det_model_dir|string|-|检测模型inference model地址|
 |max_side_len|int|960|输入图像长宽大于960时，等比例缩放图像，使得图像最长边为960|
 |det_db_thresh|float|0.3|用于过滤DB预测的二值化图像，设置为0.-0.3对结果影响不明显|
@@ -260,7 +266,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 - 方向分类器相关

 |参数名称|类型|默认参数|意义|
-| --- | --- | --- | --- |
+| :---: | :---: | :---: | :---: |
 |use_angle_cls|bool|false|是否使用方向分类器|
 |cls_model_dir|string|-|方向分类器inference model地址|
 |cls_thresh|float|0.9|方向分类器的得分阈值|
@@ -268,7 +274,7 @@ CUDNN_LIB_DIR=/your_cudnn_lib_dir
 - 识别模型相关

 |参数名称|类型|默认参数|意义|
-| --- | --- | --- | --- |
+| :---: | :---: | :---: | :---: |
 |rec_model_dir|string|-|识别模型inference model地址|
 |char_list_file|string|../../ppocr/utils/ppocr_keys_v1.txt|字典文件|


--- a/deploy/cpp_infer/readme_en.md
+++ b/deploy/cpp_infer/readme_en.md
@@ -17,10 +17,10 @@ PaddleOCR model deployment.

 * First of all, you need to download the source code compiled package in the Linux environment from the opencv official website. Taking opencv3.4.7 as an example, the download command is as follows.

-```
+```bash
 cd deploy/cpp_infer
-wget https://github.com/opencv/opencv/archive/3.4.7.tar.gz
-tar -xf 3.4.7.tar.gz
+wget https://paddleocr.bj.bcebos.com/libs/opencv/opencv-3.4.7.tar.gz
+tar -xf opencv-3.4.7.tar.gz
 ```

 Finally, you can see the folder of `opencv-3.4.7/` in the current directory.

--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -61,7 +61,7 @@ DEFINE_string(cls_model_dir, "", "Path of cls inference model.");
 DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh.");
 // recognition related
 DEFINE_string(rec_model_dir, "", "Path of rec inference model.");
-DEFINE_int32(rec_batch_num, 1, "rec_batch_num.");
+DEFINE_int32(rec_batch_num, 6, "rec_batch_num.");
 DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt", "Path of dictionary.");


@@ -146,8 +146,9 @@ int main_rec(std::vector<cv::String> cv_all_img_names) {
    CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
                       FLAGS_gpu_mem, FLAGS_cpu_threads,
                       FLAGS_enable_mkldnn, char_list_file,
-                       FLAGS_use_tensorrt, FLAGS_precision);
+                       FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num);

+    std::vector<cv::Mat> img_list;
    for (int i = 0; i < cv_all_img_names.size(); ++i) {
      LOG(INFO) << "The predict img: " << cv_all_img_names[i];

@@ -156,14 +157,13 @@ int main_rec(std::vector<cv::String> cv_all_img_names) {
        std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl;
        exit(1);
      }
-
+      img_list.push_back(srcimg);
+    }
    std::vector<double> rec_times;
-      rec.Run(srcimg, &rec_times);
-        
+    rec.Run(img_list, &rec_times);
    time_info[0] += rec_times[0];
    time_info[1] += rec_times[1];
    time_info[2] += rec_times[2];
-    }
    
    if (FLAGS_benchmark) {
        AutoLogger autolog("ocr_rec", 
@@ -171,7 +171,7 @@ int main_rec(std::vector<cv::String> cv_all_img_names) {
                           FLAGS_use_tensorrt,
                           FLAGS_enable_mkldnn,
                           FLAGS_cpu_threads,
-                           1, 
+                           FLAGS_rec_batch_num, 
                           "dynamic", 
                           FLAGS_precision, 
                           time_info, 
@@ -209,7 +209,7 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
    CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
                       FLAGS_gpu_mem, FLAGS_cpu_threads,
                       FLAGS_enable_mkldnn, char_list_file,
-                       FLAGS_use_tensorrt, FLAGS_precision);
+                       FLAGS_use_tensorrt, FLAGS_precision, FLAGS_rec_batch_num);

    for (int i = 0; i < cv_all_img_names.size(); ++i) {
      LOG(INFO) << "The predict img: " << cv_all_img_names[i];
@@ -228,19 +228,22 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
      time_info_det[1] += det_times[1];
      time_info_det[2] += det_times[2];
        
-      cv::Mat crop_img;
+      std::vector<cv::Mat> img_list;
      for (int j = 0; j < boxes.size(); j++) {
+          cv::Mat crop_img;
          crop_img = Utility::GetRotateCropImage(srcimg, boxes[j]);
-
          if (cls != nullptr) {
              crop_img = cls->Run(crop_img);
          }
-        rec.Run(crop_img, &rec_times);
+          img_list.push_back(crop_img);
+      }
+
+      rec.Run(img_list, &rec_times);
      time_info_rec[0] += rec_times[0];
      time_info_rec[1] += rec_times[1];
      time_info_rec[2] += rec_times[2];
    }
-    }
+    
    if (FLAGS_benchmark) {
        AutoLogger autolog_det("ocr_det", 
                            FLAGS_use_gpu,
@@ -257,7 +260,7 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
                            FLAGS_use_tensorrt,
                            FLAGS_enable_mkldnn,
                            FLAGS_cpu_threads,
-                            1, 
+                            FLAGS_rec_batch_num, 
                            "dynamic", 
                            FLAGS_precision, 
                            time_info_rec, 

--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -16,27 +16,48 @@

 namespace PaddleOCR {
    
-void CRNNRecognizer::Run(cv::Mat &img, std::vector<double> *times) {
-  cv::Mat srcimg;
-  img.copyTo(srcimg);
-  cv::Mat resize_img;
+void CRNNRecognizer::Run(std::vector<cv::Mat> img_list, std::vector<double> *times) {
+    std::chrono::duration<float> preprocess_diff = std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
+    std::chrono::duration<float> inference_diff = std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
+    std::chrono::duration<float> postprocess_diff = std::chrono::steady_clock::now() - std::chrono::steady_clock::now();
+
+    int img_num = img_list.size();
+    std::vector<float> width_list;
+    for (int i = 0; i < img_num; i++) {
+        width_list.push_back(float(img_list[i].cols) / img_list[i].rows);
+    }
+    std::vector<int> indices = Utility::argsort(width_list);

-  float wh_ratio = float(srcimg.cols) / float(srcimg.rows);
+    for (int beg_img_no = 0; beg_img_no < img_num; beg_img_no += this->rec_batch_num_) {
        auto preprocess_start = std::chrono::steady_clock::now();
-  this->resize_op_.Run(srcimg, resize_img, wh_ratio, this->use_tensorrt_);
-
-  this->normalize_op_.Run(&resize_img, this->mean_, this->scale_,
-                          this->is_scale_);
-
-  std::vector<float> input(1 * 3 * resize_img.rows * resize_img.cols, 0.0f);
+        int end_img_no = min(img_num, beg_img_no + this->rec_batch_num_);
+        float max_wh_ratio = 0;
+        for (int ino = beg_img_no; ino < end_img_no; ino ++) {
+            int h = img_list[indices[ino]].rows;
+            int w = img_list[indices[ino]].cols;
+            float wh_ratio = w * 1.0 / h;
+            max_wh_ratio = max(max_wh_ratio, wh_ratio);
+        }
+        std::vector<cv::Mat> norm_img_batch;
+        for (int ino = beg_img_no; ino < end_img_no; ino ++) {
+            cv::Mat srcimg;
+            img_list[indices[ino]].copyTo(srcimg);
+            cv::Mat resize_img;
+            this->resize_op_.Run(srcimg, resize_img, max_wh_ratio, this->use_tensorrt_);
+            this->normalize_op_.Run(&resize_img, this->mean_, this->scale_, this->is_scale_);
+            norm_img_batch.push_back(resize_img);
+        }
        
-  this->permute_op_.Run(&resize_img, input.data());
+        int batch_width = int(ceilf(32 * max_wh_ratio)) - 1;
+        std::vector<float> input(this->rec_batch_num_ * 3 * 32 * batch_width, 0.0f);
+        this->permute_op_.Run(norm_img_batch, input.data());
        auto preprocess_end = std::chrono::steady_clock::now();
+        preprocess_diff += preprocess_end - preprocess_start;

        // Inference.
        auto input_names = this->predictor_->GetInputNames();
        auto input_t = this->predictor_->GetInputHandle(input_names[0]);
-  input_t->Reshape({1, 3, resize_img.rows, resize_img.cols});
+        input_t->Reshape({this->rec_batch_num_, 3, 32, batch_width});
        auto inference_start = std::chrono::steady_clock::now();
        input_t->CopyFromCpu(input.data());
        this->predictor_->Run();
@@ -52,9 +73,11 @@ void CRNNRecognizer::Run(cv::Mat &img, std::vector<double> *times) {

        output_t->CopyToCpu(predict_batch.data());
        auto inference_end = std::chrono::steady_clock::now();
+        inference_diff += inference_end - inference_start;
        
        // ctc decode
        auto postprocess_start = std::chrono::steady_clock::now();
+        for (int m = 0; m < predict_shape[0]; m++) {
            std::vector<std::string> str_res;
            int argmax_idx;
            int last_index = 0;
@@ -64,11 +87,11 @@ void CRNNRecognizer::Run(cv::Mat &img, std::vector<double> *times) {

            for (int n = 0; n < predict_shape[1]; n++) {
                argmax_idx =
-        int(Utility::argmax(&predict_batch[n * predict_shape[2]],
-                            &predict_batch[(n + 1) * predict_shape[2]]));
+                    int(Utility::argmax(&predict_batch[(m * predict_shape[1] + n) * predict_shape[2]],
+                                        &predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]]));
                max_value =
-        float(*std::max_element(&predict_batch[n * predict_shape[2]],
-                                &predict_batch[(n + 1) * predict_shape[2]]));
+                    float(*std::max_element(&predict_batch[(m * predict_shape[1] + n) * predict_shape[2]],
+                                            &predict_batch[(m * predict_shape[1] + n + 1) * predict_shape[2]]));

                if (argmax_idx > 0 && (!(n > 0 && argmax_idx == last_index))) {
                    score += max_value;
@@ -77,21 +100,23 @@ void CRNNRecognizer::Run(cv::Mat &img, std::vector<double> *times) {
                }
                last_index = argmax_idx;
            }
-  auto postprocess_end = std::chrono::steady_clock::now();
            score /= count;
+            if (isnan(score))
+                continue;
            for (int i = 0; i < str_res.size(); i++) {
                std::cout << str_res[i];
            }
            std::cout << "\tscore: " << score << std::endl;
-
-  std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
+        }
+        auto postprocess_end = std::chrono::steady_clock::now();
+        postprocess_diff += postprocess_end - postprocess_start;
+    }
    times->push_back(double(preprocess_diff.count() * 1000));
-  std::chrono::duration<float> inference_diff = inference_end - inference_start;
    times->push_back(double(inference_diff.count() * 1000));
-  std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
    times->push_back(double(postprocess_diff.count() * 1000));
 }

+    
 void CRNNRecognizer::LoadModel(const std::string &model_dir) {
  //   AnalysisConfig config;
  paddle_infer::Config config;