Merge branch 'PaddlePaddle:dygraph' into dygraph

006d84bf · 崔浩 · GitHub · 302ca30c · 8beeb84c · 006d84bf
Unverified Commit 006d84bf authored Oct 21, 2021 by 崔浩 Committed by GitHub Oct 21, 2021
20 changed files
--- a/deploy/cpp_infer/readme_en.md
+++ b/deploy/cpp_infer/readme_en.md
-# Server-side C++ inference
+# Server-side C++ Inference

 This chapter introduces the C++ deployment method of the PaddleOCR model, and the corresponding python predictive deployment method refers to [document](../../doc/doc_ch/inference.md).
 C++ is better than python in terms of performance calculation. Therefore, in most CPU and GPU deployment scenarios, C++ deployment is mostly used.
@@ -6,14 +6,14 @@ This section will introduce how to configure the C++ environment and complete it
 PaddleOCR model deployment.


-## 1. Prepare the environment
+## 1. Prepare the Environment

 ### Environment

 - Linux, docker is recommended.


-### 1.1 Compile opencv
+### 1.1 Compile OpenCV

 * First of all, you need to download the source code compiled package in the Linux environment from the opencv official website. Taking opencv3.4.7 as an example, the download command is as follows.

@@ -73,7 +73,7 @@ opencv3/
 |-- share
 ```

-### 1.2 Compile or download or  the Paddle inference library
+### 1.2 Compile or Download or the Paddle Inference Library

 * There are 2 ways to obtain the Paddle inference library, described in detail below.

@@ -136,7 +136,7 @@ build/paddle_inference_install_dir/
 Among them, `paddle` is the Paddle library required for C++ prediction later, and `version.txt` contains the version information of the current inference library.


-## 2. Compile and run the demo
+## 2. Compile and Run the Demo

 ### 2.1 Export the inference model


--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -35,15 +35,16 @@
 #include <sys/stat.h>

 #include <gflags/gflags.h>
+#include "auto_log/autolog.h"

 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU.");
 DEFINE_int32(gpu_id, 0, "Device id of GPU to execute.");
 DEFINE_int32(gpu_mem, 4000, "GPU id when infering with GPU.");
-DEFINE_int32(cpu_math_library_num_threads, 10, "Num of threads with CPU.");
-DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU.");
+DEFINE_int32(cpu_threads, 10, "Num of threads with CPU.");
+DEFINE_bool(enable_mkldnn, false, "Whether use mkldnn with CPU.");
 DEFINE_bool(use_tensorrt, false, "Whether use tensorrt.");
 DEFINE_string(precision, "fp32", "Precision be one of fp32/fp16/int8");
-DEFINE_bool(benchmark, true, "Whether use benchmark.");
+DEFINE_bool(benchmark, false, "Whether use benchmark.");
 DEFINE_string(save_log_path, "./log_output/", "Save benchmark log path.");
 // detection related
 DEFINE_string(image_dir, "", "Dir of input image.");
@@ -60,6 +61,7 @@ DEFINE_string(cls_model_dir, "", "Path of cls inference model.");
 DEFINE_double(cls_thresh, 0.9, "Threshold of cls_thresh.");
 // recognition related
 DEFINE_string(rec_model_dir, "", "Path of rec inference model.");
+DEFINE_int32(rec_batch_num, 1, "rec_batch_num.");
 DEFINE_string(char_list_file, "../../ppocr/utils/ppocr_keys_v1.txt", "Path of dictionary.");


@@ -68,34 +70,6 @@ using namespace cv;
 using namespace PaddleOCR;


-void PrintBenchmarkLog(std::string model_name, 
-                       int batch_size, 
-                       std::string input_shape,
-                       std::vector<double> time_info,
-                       int img_num){
-  LOG(INFO) << "----------------------- Config info -----------------------";
-  LOG(INFO) << "runtime_device: " << (FLAGS_use_gpu ? "gpu" : "cpu");
-  LOG(INFO) << "ir_optim: " << "True";
-  LOG(INFO) << "enable_memory_optim: " << "True";
-  LOG(INFO) << "enable_tensorrt: " << FLAGS_use_tensorrt;
-  LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False");
-  LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_math_library_num_threads;
-  LOG(INFO) << "----------------------- Data info -----------------------";
-  LOG(INFO) << "batch_size: " << batch_size;
-  LOG(INFO) << "input_shape: " << input_shape;
-  LOG(INFO) << "data_num: " << img_num;
-  LOG(INFO) << "----------------------- Model info -----------------------";
-  LOG(INFO) << "model_name: " << model_name;
-  LOG(INFO) << "precision: " << FLAGS_precision;
-  LOG(INFO) << "----------------------- Perf info ------------------------";
-  LOG(INFO) << "Total time spent(ms): "
-            << std::accumulate(time_info.begin(), time_info.end(), 0);
-  LOG(INFO) << "preprocess_time(ms): " << time_info[0] / img_num
-            << ", inference_time(ms): " << time_info[1] / img_num
-            << ", postprocess_time(ms): " << time_info[2] / img_num;
-}
-
-
 static bool PathExists(const std::string& path){
 #ifdef _WIN32
  struct _stat buffer;
@@ -110,14 +84,14 @@ static bool PathExists(const std::string& path){
 int main_det(std::vector<cv::String> cv_all_img_names) {
    std::vector<double> time_info = {0, 0, 0};
    DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
-                   FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads, 
-                   FLAGS_use_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
+                   FLAGS_gpu_mem, FLAGS_cpu_threads, 
+                   FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
                   FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
                   FLAGS_use_polygon_score, FLAGS_visualize,
                   FLAGS_use_tensorrt, FLAGS_precision);
    
    for (int i = 0; i < cv_all_img_names.size(); ++i) {
-      LOG(INFO) << "The predict img: " << cv_all_img_names[i];
+//       LOG(INFO) << "The predict img: " << cv_all_img_names[i];

      cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
      if (!srcimg.data) {
@@ -132,10 +106,30 @@ int main_det(std::vector<cv::String> cv_all_img_names) {
      time_info[0] += det_times[0];
      time_info[1] += det_times[1];
      time_info[2] += det_times[2];
+    
+      if (FLAGS_benchmark) {
+          cout << cv_all_img_names[i] << '\t';
+          for (int n = 0; n < boxes.size(); n++) {
+            for (int m = 0; m < boxes[n].size(); m++) {
+              cout << boxes[n][m][0] << ' ' << boxes[n][m][1] << ' ';
+            }
+          }
+          cout << endl;
+      }        
    }
    
    if (FLAGS_benchmark) {
-        PrintBenchmarkLog("det", 1, "dynamic", time_info, cv_all_img_names.size());
+        AutoLogger autolog("ocr_det", 
+                           FLAGS_use_gpu,
+                           FLAGS_use_tensorrt,
+                           FLAGS_enable_mkldnn,
+                           FLAGS_cpu_threads,
+                           1, 
+                           "dynamic", 
+                           FLAGS_precision, 
+                           time_info, 
+                           cv_all_img_names.size());
+        autolog.report();
    }
    return 0;
 }
@@ -143,9 +137,15 @@ int main_det(std::vector<cv::String> cv_all_img_names) {

 int main_rec(std::vector<cv::String> cv_all_img_names) {
    std::vector<double> time_info = {0, 0, 0};
+    
+    std::string char_list_file = FLAGS_char_list_file;
+    if (FLAGS_benchmark) 
+        char_list_file = FLAGS_char_list_file.substr(6);
+    cout << "label file: " << char_list_file << endl;
+        
    CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
-                       FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads,
-                       FLAGS_use_mkldnn, FLAGS_char_list_file,
+                       FLAGS_gpu_mem, FLAGS_cpu_threads,
+                       FLAGS_enable_mkldnn, char_list_file,
                       FLAGS_use_tensorrt, FLAGS_precision);

    for (int i = 0; i < cv_all_img_names.size(); ++i) {
@@ -166,17 +166,29 @@ int main_rec(std::vector<cv::String> cv_all_img_names) {
    }
        
    if (FLAGS_benchmark) {
-        PrintBenchmarkLog("rec", 1, "dynamic", time_info, cv_all_img_names.size());
+        AutoLogger autolog("ocr_rec", 
+                           FLAGS_use_gpu,
+                           FLAGS_use_tensorrt,
+                           FLAGS_enable_mkldnn,
+                           FLAGS_cpu_threads,
+                           1, 
+                           "dynamic", 
+                           FLAGS_precision, 
+                           time_info, 
+                           cv_all_img_names.size());
+        autolog.report();
    }
-    
    return 0;
 }


 int main_system(std::vector<cv::String> cv_all_img_names) {
+    std::vector<double> time_info_det = {0, 0, 0};
+    std::vector<double> time_info_rec = {0, 0, 0};
+
    DBDetector det(FLAGS_det_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
-                   FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads, 
-                   FLAGS_use_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
+                   FLAGS_gpu_mem, FLAGS_cpu_threads, 
+                   FLAGS_enable_mkldnn, FLAGS_max_side_len, FLAGS_det_db_thresh,
                   FLAGS_det_db_box_thresh, FLAGS_det_db_unclip_ratio,
                   FLAGS_use_polygon_score, FLAGS_visualize,
                   FLAGS_use_tensorrt, FLAGS_precision);
@@ -184,22 +196,25 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
    Classifier *cls = nullptr;
    if (FLAGS_use_angle_cls) {
      cls = new Classifier(FLAGS_cls_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
-                           FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads,
-                           FLAGS_use_mkldnn, FLAGS_cls_thresh,
+                           FLAGS_gpu_mem, FLAGS_cpu_threads,
+                           FLAGS_enable_mkldnn, FLAGS_cls_thresh,
                           FLAGS_use_tensorrt, FLAGS_precision);
    }

+    std::string char_list_file = FLAGS_char_list_file;
+    if (FLAGS_benchmark) 
+        char_list_file = FLAGS_char_list_file.substr(6);
+    cout << "label file: " << char_list_file << endl;
+        
    CRNNRecognizer rec(FLAGS_rec_model_dir, FLAGS_use_gpu, FLAGS_gpu_id,
-                       FLAGS_gpu_mem, FLAGS_cpu_math_library_num_threads,
-                       FLAGS_use_mkldnn, FLAGS_char_list_file,
+                       FLAGS_gpu_mem, FLAGS_cpu_threads,
+                       FLAGS_enable_mkldnn, char_list_file,
                       FLAGS_use_tensorrt, FLAGS_precision);

-    auto start = std::chrono::system_clock::now();
-
    for (int i = 0; i < cv_all_img_names.size(); ++i) {
      LOG(INFO) << "The predict img: " << cv_all_img_names[i];

-      cv::Mat srcimg = cv::imread(FLAGS_image_dir, cv::IMREAD_COLOR);
+      cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
      if (!srcimg.data) {
        std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << endl;
        exit(1);
@@ -209,6 +224,9 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
      std::vector<double> rec_times;
        
      det.Run(srcimg, boxes, &det_times);
+      time_info_det[0] += det_times[0];
+      time_info_det[1] += det_times[1];
+      time_info_det[2] += det_times[2];
        
      cv::Mat crop_img;
      for (int j = 0; j < boxes.size(); j++) {
@@ -218,18 +236,36 @@ int main_system(std::vector<cv::String> cv_all_img_names) {
          crop_img = cls->Run(crop_img);
        }
        rec.Run(crop_img, &rec_times);
+        time_info_rec[0] += rec_times[0];
+        time_info_rec[1] += rec_times[1];
+        time_info_rec[2] += rec_times[2];
      }
-        
-      auto end = std::chrono::system_clock::now();
-      auto duration =
-          std::chrono::duration_cast<std::chrono::microseconds>(end - start);
-      std::cout << "Cost  "
-                << double(duration.count()) *
-                       std::chrono::microseconds::period::num /
-                       std::chrono::microseconds::period::den
-                << "s" << std::endl;
    }
-      
+    if (FLAGS_benchmark) {
+        AutoLogger autolog_det("ocr_det", 
+                            FLAGS_use_gpu,
+                            FLAGS_use_tensorrt,
+                            FLAGS_enable_mkldnn,
+                            FLAGS_cpu_threads,
+                            1, 
+                            "dynamic", 
+                            FLAGS_precision, 
+                            time_info_det, 
+                            cv_all_img_names.size());
+        AutoLogger autolog_rec("ocr_rec", 
+                            FLAGS_use_gpu,
+                            FLAGS_use_tensorrt,
+                            FLAGS_enable_mkldnn,
+                            FLAGS_cpu_threads,
+                            1, 
+                            "dynamic", 
+                            FLAGS_precision, 
+                            time_info_rec, 
+                            cv_all_img_names.size());
+        autolog_det.report();
+        std::cout << endl;
+        autolog_rec.report();
+    }  
    return 0;
 }


--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -112,12 +112,16 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
          1 << 20, 10, 3,
          precision,
          false, false);
+
      std::map<std::string, std::vector<int>> min_input_shape = {
-          {"x", {1, 3, 32, 10}}};
+          {"x", {1, 3, 32, 10}},
+          {"lstm_0.tmp_0", {10, 1, 96}}};
      std::map<std::string, std::vector<int>> max_input_shape = {
-          {"x", {1, 3, 32, 2000}}};
+          {"x", {1, 3, 32, 2000}},
+          {"lstm_0.tmp_0", {1000, 1, 96}}};
      std::map<std::string, std::vector<int>> opt_input_shape = {
-          {"x", {1, 3, 32, 320}}};
+          {"x", {1, 3, 32, 320}},
+          {"lstm_0.tmp_0", {25, 1, 96}}};

      config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
                                    opt_input_shape);
@@ -139,7 +143,7 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
  config.SwitchIrOptim(true);

  config.EnableMemoryOptim();
-  config.DisableGlogInfo();
+//   config.DisableGlogInfo();

  this->predictor_ = CreatePredictor(config);
 }

--- a/deploy/hubserving/ocr_det/params.py
+++ b/deploy/hubserving/ocr_det/params.py
@@ -13,7 +13,7 @@ def read_params():

    #params for text detector
    cfg.det_algorithm = "DB"
-    cfg.det_model_dir = "./inference/ch_ppocr_mobile_v2.0_det_infer/"
+    cfg.det_model_dir = "./inference/ch_PP-OCRv2_det_infer/"
    cfg.det_limit_side_len = 960
    cfg.det_limit_type = 'max'


--- a/deploy/hubserving/ocr_rec/params.py
+++ b/deploy/hubserving/ocr_rec/params.py
@@ -13,7 +13,7 @@ def read_params():

    #params for text recognizer
    cfg.rec_algorithm = "CRNN"
-    cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v2.0_rec_infer/"
+    cfg.rec_model_dir = "./inference/ch_PP-OCRv2_rec_infer/"

    cfg.rec_image_shape = "3, 32, 320"
    cfg.rec_char_type = 'ch'

--- a/deploy/hubserving/ocr_system/params.py
+++ b/deploy/hubserving/ocr_system/params.py
@@ -13,7 +13,7 @@ def read_params():

    #params for text detector
    cfg.det_algorithm = "DB"
-    cfg.det_model_dir = "./inference/ch_ppocr_mobile_v2.0_det_infer/"
+    cfg.det_model_dir = "./inference/ch_PP-OCRv2_det_infer/"
    cfg.det_limit_side_len = 960
    cfg.det_limit_type = 'max'

@@ -31,7 +31,7 @@ def read_params():

    #params for text recognizer
    cfg.rec_algorithm = "CRNN"
-    cfg.rec_model_dir = "./inference/ch_ppocr_mobile_v2.0_rec_infer/"
+    cfg.rec_model_dir = "./inference/ch_PP-OCRv2_rec_infer/"

    cfg.rec_image_shape = "3, 32, 320"
    cfg.rec_char_type = 'ch'

--- a/deploy/hubserving/readme.md
+++ b/deploy/hubserving/readme.md
@@ -34,10 +34,10 @@ pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/sim
 ```

 ### 2. 下载推理模型
-安装服务模块前，需要准备推理模型并放到正确路径。默认使用的是v2.0版的超轻量模型，默认模型路径为：
+安装服务模块前，需要准备推理模型并放到正确路径。默认使用的是PP-OCRv2模型，默认模型路径为：
 ```
-检测模型：./inference/ch_ppocr_mobile_v2.0_det_infer/
-识别模型：./inference/ch_ppocr_mobile_v2.0_rec_infer/
+检测模型：./inference/ch_PP-OCRv2_det_infer/
+识别模型：./inference/ch_PP-OCRv2_rec_infer/
 方向分类器：./inference/ch_ppocr_mobile_v2.0_cls_infer/
 ```  


--- a/deploy/hubserving/readme_en.md
+++ b/deploy/hubserving/readme_en.md
@@ -35,10 +35,10 @@ pip3 install paddlehub==2.1.0 --upgrade -i https://pypi.tuna.tsinghua.edu.cn/sim
 ```

 ### 2. Download inference model
-Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the ultra lightweight model of v2.0 is used, and the default model path is:  
+Before installing the service module, you need to prepare the inference model and put it in the correct path. By default, the PP-OCRv2 models are used, and the default model path is:  
 ```
-detection model: ./inference/ch_ppocr_mobile_v2.0_det_infer/
-recognition model: ./inference/ch_ppocr_mobile_v2.0_rec_infer/
+detection model: ./inference/ch_PP-OCRv2_det_infer/
+recognition model: ./inference/ch_PP-OCRv2_rec_infer/
 text direction classifier: ./inference/ch_ppocr_mobile_v2.0_cls_infer/
 ```  


--- a/deploy/pdserving/web_service_det.py
+++ b/deploy/pdserving/web_service_det.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_server.web_service import WebService, Op
+
+import logging
+import numpy as np
+import cv2
+import base64
+# from paddle_serving_app.reader import OCRReader
+from ocr_reader import OCRReader, DetResizeForTest
+from paddle_serving_app.reader import Sequential, ResizeByFactor
+from paddle_serving_app.reader import Div, Normalize, Transpose
+from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
+
+_LOGGER = logging.getLogger()
+
+
+class DetOp(Op):
+    def init_op(self):
+        self.det_preprocess = Sequential([
+            DetResizeForTest(), Div(255),
+            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
+                (2, 0, 1))
+        ])
+        self.filter_func = FilterBoxes(10, 10)
+        self.post_func = DBPostProcess({
+            "thresh": 0.3,
+            "box_thresh": 0.5,
+            "max_candidates": 1000,
+            "unclip_ratio": 1.5,
+            "min_size": 3
+        })
+
+    def preprocess(self, input_dicts, data_id, log_id):
+        (_, input_dict), = input_dicts.items()
+        data = base64.b64decode(input_dict["image"].encode('utf8'))
+        self.raw_im = data
+        data = np.fromstring(data, np.uint8)
+        # Note: class variables(self.var) can only be used in process op mode
+        im = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        self.ori_h, self.ori_w, _ = im.shape
+        det_img = self.det_preprocess(im)
+        _, self.new_h, self.new_w = det_img.shape
+        return {"x": det_img[np.newaxis, :].copy()}, False, None, ""
+
+    def postprocess(self, input_dicts, fetch_dict, log_id):
+        det_out = fetch_dict["save_infer_model/scale_0.tmp_1"]
+        ratio_list = [
+            float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
+        ]
+        dt_boxes_list = self.post_func(det_out, [ratio_list])
+        dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
+        out_dict = {"dt_boxes": str(dt_boxes)}
+
+        return out_dict, None, ""
+
+
+class OcrService(WebService):
+    def get_pipeline_response(self, read_op):
+        det_op = DetOp(name="det", input_ops=[read_op])
+        return det_op
+
+
+uci_service = OcrService(name="ocr")
+uci_service.prepare_pipeline_config("config.yml")
+uci_service.run_service()
--- a/deploy/pdserving/web_service_rec.py
+++ b/deploy/pdserving/web_service_rec.py
--- a/deploy/slim/prune/sensitivity_anal.py
+++ b/deploy/slim/prune/sensitivity_anal.py
--- a/deploy/slim/quantization/quant_kl.py
+++ b/deploy/slim/quantization/quant_kl.py
--- a/doc/PaddleOCR_log.png
+++ b/doc/PaddleOCR_log.png
--- a/doc/datasets/ic15_location_download.png
+++ b/doc/datasets/ic15_location_download.png
--- a/doc/datasets/icdar_rec.png
+++ b/doc/datasets/icdar_rec.png
--- a/doc/doc_ch/FAQ.md
+++ b/doc/doc_ch/FAQ.md
--- a/doc/doc_ch/add_new_algorithm.md
+++ b/doc/doc_ch/add_new_algorithm.md
--- a/doc/doc_ch/algorithm_overview.md
+++ b/doc/doc_ch/algorithm_overview.md
--- a/doc/doc_ch/angle_class.md
+++ b/doc/doc_ch/angle_class.md
--- a/doc/doc_ch/benchmark.md
+++ b/doc/doc_ch/benchmark.md