Merge remote-tracking branch 'origin/dygraph' into dygraph

84ce34bd · Leif · f470ede8 · 529133fb · 84ce34bd · 84ce34bd
Commit 84ce34bd authored Oct 29, 2021 by Leif
20 changed files
--- a/benchmark/readme.md
+++ b/benchmark/readme.md
-# PaddleOCR DB/EAST 算法训练benchmark测试
+# PaddleOCR DB/EAST/PSE 算法训练benchmark测试
 PaddleOCR/benchmark目录下的文件用于获取并分析训练日志。
 训练采用icdar2015数据集，包括1000张训练图像和500张测试图像。模型配置采用resnet18_vd作为backbone，分别训练batch_size=8和batch_size=16的情况。
@@ -18,7 +18,7 @@ run_det.sh 执行方式如下:
 ```
 # cd PaddleOCR/
-bash benchmark/run_det.sh 
+bash benchmark/run_det.sh
 ```
 以DB为例，将得到四个日志文件，如下：
@@ -28,7 +28,3 @@ det_res18_db_v2.0_sp_bs8_fp32_1
 det_res18_db_v2.0_mp_bs16_fp32_1
 det_res18_db_v2.0_mp_bs8_fp32_1
 ```
--- a/benchmark/run_benchmark_det.sh
+++ b/benchmark/run_benchmark_det.sh
@@ -6,7 +6,7 @@ function _set_params(){
    run_mode=${1:-"sp"}          # 单卡sp|多卡mp
    batch_size=${2:-"64"}
    fp_item=${3:-"fp32"}        # fp32|fp16
-    max_iter=${4:-"500"}       # 可选，如果需要修改代码提前中断
+    max_iter=${4:-"10"}       # 可选，如果需要修改代码提前中断
    model_name=${5:-"model_name"}
    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # TRAIN_LOG_DIR 后续QA设置该参数
@@ -20,7 +20,7 @@ function _train(){
    echo "Train on ${num_gpu_devices} GPUs"
    echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"
-    train_cmd="-c configs/det/${model_name}.yml -o Train.loader.batch_size_per_card=${batch_size} Global.epoch_num=${max_iter} "   
+    train_cmd="-c configs/det/${model_name}.yml -o Train.loader.batch_size_per_card=${batch_size} Global.epoch_num=${max_iter} Global.eval_batch_step=[0,20000] Global.print_batch_step=2"   
    case ${run_mode} in
      sp) 
        train_cmd="python3.7 tools/train.py "${train_cmd}""
@@ -39,18 +39,24 @@ function _train(){
        echo -e "${model_name}, SUCCESS"
        export job_fail_flag=0
    fi
-    kill -9 `ps -ef|grep 'python3.7'|awk '{print $2}'`
    if [ $run_mode = "mp" -a -d mylog ]; then
        rm ${log_file}
        cp mylog/workerlog.0 ${log_file}
    fi
+}
-    # run log analysis
+function _analysis_log(){
-    analysis_cmd="python3.7 benchmark/analysis.py --filename ${log_file}  --mission_name ${model_name} --run_mode ${mode} --direction_id 0 --keyword 'ips:' --base_batch_size ${batch_szie} --skip_steps 1 --gpu_num ${num_gpu_devices}  --index 1  --model_mode=-1  --ips_unit=samples/sec"
+    analysis_cmd="python3.7 benchmark/analysis.py --filename ${log_file}  --mission_name ${model_name} --run_mode ${run_mode} --direction_id 0 --keyword 'ips:' --base_batch_size ${batch_size} --skip_steps 1 --gpu_num ${num_gpu_devices}  --index 1  --model_mode=-1  --ips_unit=samples/sec"
    eval $analysis_cmd
 }
+function _kill_process(){
+    kill -9 `ps -ef|grep 'python3.7'|awk '{print $2}'`
+}
 _set_params $@
 _train
+_analysis_log
+_kill_process
\ No newline at end of file
--- a/benchmark/run_det.sh
+++ b/benchmark/run_det.sh
@@ -3,11 +3,11 @@
 # 1 安装该模型需要的依赖 (如需开启优化策略请注明)
 python3.7 -m pip install -r requirements.txt
 # 2 拷贝该模型需要数据、预训练模型
-wget -c  -p ./tain_data/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data  && tar xf icdar2015.tar && cd ../
+wget -P ./train_data/  https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar && cd train_data  && tar xf icdar2015.tar && cd ../
-wget -c -p ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams
+wget -P ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams
 # 3 批量运行（如不方便批量，1，2需放到单个模型中）
-model_mode_list=(det_res18_db_v2.0 det_r50_vd_east)
+model_mode_list=(det_res18_db_v2.0 det_r50_vd_east det_r50_vd_pse)
 fp_item_list=(fp32)
 bs_list=(8 16)
 for model_mode in ${model_mode_list[@]}; do
@@ -15,11 +15,11 @@ for model_mode in ${model_mode_list[@]}; do
          for bs_item in ${bs_list[@]}; do
            echo "index is speed, 1gpus, begin, ${model_name}"
            run_mode=sp
-            CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 10 ${model_mode}     #  (5min)
+            CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode}     #  (5min)
            sleep 60
            echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
            run_mode=mp
-            CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 10 ${model_mode} 
+            CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark_det.sh ${run_mode} ${bs_item} ${fp_item} 2 ${model_mode} 
            sleep 60
            done
      done

--- a/configs/rec/rec_mtb_nrtr.yml
+++ b/configs/rec/rec_mtb_nrtr.yml
@@ -17,7 +17,7 @@ Global:
  character_dict_path: ppocr/utils/EN_symbol_dict.txt
  max_text_length: 25
  infer_mode: False
-  use_space_char: True
+  use_space_char: False
  save_res_path: ./output/rec/predicts_nrtr.txt
 Optimizer:

--- a/deploy/lite/ocr_db_crnn.cc
+++ b/deploy/lite/ocr_db_crnn.cc
@@ -12,12 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "paddle_api.h" // NOLINT
 #include <chrono>
+#include "paddle_api.h" // NOLINT
+#include "paddle_place.h"
 #include "cls_process.h"
 #include "crnn_process.h"
 #include "db_post_process.h"
+#include "AutoLog/auto_log/lite_autolog.h"
 using namespace paddle::lite_api; // NOLINT
 using namespace std;
@@ -27,7 +29,7 @@ void NeonMeanScale(const float *din, float *dout, int size,
                   const std::vector<float> mean,
                   const std::vector<float> scale) {
  if (mean.size() != 3 || scale.size() != 3) {
-    std::cerr << "[ERROR] mean or scale size must equal to 3\n";
+    std::cerr << "[ERROR] mean or scale size must equal to 3" << std::endl;
    exit(1);
  }
  float32x4_t vmean0 = vdupq_n_f32(mean[0]);
@@ -159,7 +161,8 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
                 std::vector<float> &rec_text_score,
                 std::vector<std::string> charactor_dict,
                 std::shared_ptr<PaddlePredictor> predictor_cls,
-                 int use_direction_classify) {
+                 int use_direction_classify,
+                 std::vector<double> *times) {
  std::vector<float> mean = {0.5f, 0.5f, 0.5f};
  std::vector<float> scale = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
@@ -226,14 +229,15 @@ void RunRecModel(std::vector<std::vector<std::vector<int>>> boxes, cv::Mat img,
 std::vector<std::vector<std::vector<int>>>
 RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
-            std::map<std::string, double> Config) {
+            std::map<std::string, double> Config, std::vector<double> *times) {
  // Read img
  int max_side_len = int(Config["max_side_len"]);
  int det_db_use_dilate = int(Config["det_db_use_dilate"]);
  cv::Mat srcimg;
  img.copyTo(srcimg);
+  auto preprocess_start = std::chrono::steady_clock::now();
  std::vector<float> ratio_hw;
  img = DetResizeImg(img, max_side_len, ratio_hw);
  cv::Mat img_fp;
@@ -248,8 +252,10 @@ RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
  std::vector<float> scale = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
  const float *dimg = reinterpret_cast<const float *>(img_fp.data);
  NeonMeanScale(dimg, data0, img_fp.rows * img_fp.cols, mean, scale);
+  auto preprocess_end = std::chrono::steady_clock::now();
  // Run predictor
+  auto inference_start = std::chrono::steady_clock::now();
  predictor->Run();
  // Get output and post process
@@ -257,8 +263,10 @@ RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
      std::move(predictor->GetOutput(0)));
  auto *outptr = output_tensor->data<float>();
  auto shape_out = output_tensor->shape();
+  auto inference_end = std::chrono::steady_clock::now();
  // Save output
+  auto postprocess_start = std::chrono::steady_clock::now();
  float pred[shape_out[2] * shape_out[3]];
  unsigned char cbuf[shape_out[2] * shape_out[3]];
@@ -287,14 +295,35 @@ RunDetModel(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
  std::vector<std::vector<std::vector<int>>> filter_boxes =
      FilterTagDetRes(boxes, ratio_hw[0], ratio_hw[1], srcimg);
+  auto postprocess_end = std::chrono::steady_clock::now();
+  std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
+  times->push_back(double(preprocess_diff.count() * 1000));
+  std::chrono::duration<float> inference_diff = inference_end - inference_start;
+  times->push_back(double(inference_diff.count() * 1000));
+  std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
+  times->push_back(double(postprocess_diff.count() * 1000));
  return filter_boxes;
 }
-std::shared_ptr<PaddlePredictor> loadModel(std::string model_file) {
+std::shared_ptr<PaddlePredictor> loadModel(std::string model_file, std::string power_mode, int num_threads) {
  MobileConfig config;
  config.set_model_from_file(model_file);
+  if (power_mode == "LITE_POWER_HIGH"){
+      config.set_power_mode(LITE_POWER_HIGH);
+  } else {
+      if (power_mode == "LITE_POWER_LOW") {
+          config.set_power_mode(LITE_POWER_HIGH);
+      } else {
+          std::cerr << "Only support LITE_POWER_HIGH or LITE_POWER_HIGH." << std::endl;
+          exit(1);
+      }
+  }
+  config.set_threads(num_threads);
  std::shared_ptr<PaddlePredictor> predictor =
      CreatePaddlePredictor<MobileConfig>(config);
  return predictor;
@@ -354,60 +383,255 @@ std::map<std::string, double> LoadConfigTxt(std::string config_path) {
  return dict;
 }
-int main(int argc, char **argv) {
+void check_params(int argc, char **argv) {
-  if (argc < 5) {
+  if (argc<=1 || (strcmp(argv[1], "det")!=0 && strcmp(argv[1], "rec")!=0 && strcmp(argv[1], "system")!=0)) {
-    std::cerr << "[ERROR] usage: " << argv[0]
+    std::cerr << "Please choose one mode of [det, rec, system] !" << std::endl;
-              << " det_model_file cls_model_file rec_model_file image_path "
-                 "charactor_dict\n";
    exit(1);
  }
-  std::string det_model_file = argv[1];
+  if (strcmp(argv[1], "det") == 0) {
-  std::string rec_model_file = argv[2];
+      if (argc < 9){
-  std::string cls_model_file = argv[3];
+        std::cerr << "[ERROR] usage:" << argv[0]
-  std::string img_path = argv[4];
+                  << " det det_model num_threads batchsize power_mode img_dir det_config lite_benchmark_value" << std::endl;
-  std::string dict_path = argv[5];
+        exit(1);
+      }
+  }
+  if (strcmp(argv[1], "rec") == 0) {
+      if (argc < 9){
+        std::cerr << "[ERROR] usage:" << argv[0]
+                  << " rec rec_model num_threads batchsize power_mode img_dir key_txt lite_benchmark_value" << std::endl;
+        exit(1);
+      }
+  }
+  if (strcmp(argv[1], "system") == 0) {
+      if (argc < 12){
+        std::cerr << "[ERROR] usage:" << argv[0]
+                  << " system det_model rec_model clas_model num_threads batchsize power_mode img_dir det_config key_txt lite_benchmark_value" << std::endl;
+        exit(1);
+      }
+  }
+}
+void system(char **argv){
+  std::string det_model_file = argv[2];
+  std::string rec_model_file = argv[3];
+  std::string cls_model_file = argv[4];
+  std::string precision = argv[5];
+  std::string num_threads = argv[6];
+  std::string batchsize = argv[7];
+  std::string power_mode = argv[8];
+  std::string img_dir = argv[9];
+  std::string det_config_path = argv[10];
+  std::string dict_path = argv[11];
+  if (strcmp(argv[5], "FP32") != 0 && strcmp(argv[5], "INT8") != 0) {
+      std::cerr << "Only support FP32 or INT8." << std::endl;
+      exit(1);
+  }
+  std::vector<cv::String> cv_all_img_names;
+  cv::glob(img_dir, cv_all_img_names);
  //// load config from txt file
-  auto Config = LoadConfigTxt("./config.txt");
+  auto Config = LoadConfigTxt(det_config_path);
  int use_direction_classify = int(Config["use_direction_classify"]);
-  auto start = std::chrono::system_clock::now();
+  auto charactor_dict = ReadDict(dict_path);
+  charactor_dict.insert(charactor_dict.begin(), "#"); // blank char for ctc
+  charactor_dict.push_back(" ");
+  auto det_predictor = loadModel(det_model_file, power_mode, std::stoi(num_threads));
+  auto rec_predictor = loadModel(rec_model_file, power_mode, std::stoi(num_threads));
+  auto cls_predictor = loadModel(cls_model_file, power_mode, std::stoi(num_threads));
-  auto det_predictor = loadModel(det_model_file);
+  for (int i = 0; i < cv_all_img_names.size(); ++i) {
-  auto rec_predictor = loadModel(rec_model_file);
+    std::cout << "The predict img: " << cv_all_img_names[i] << std::endl;
-  auto cls_predictor = loadModel(cls_model_file);
+    cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
+    if (!srcimg.data) {
+      std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << std::endl;
+      exit(1);
+    }
+    std::vector<double> det_times;
+    auto boxes = RunDetModel(det_predictor, srcimg, Config, &det_times);
+    std::vector<std::string> rec_text;
+    std::vector<float> rec_text_score;
+    std::vector<double> rec_times;
+    RunRecModel(boxes, srcimg, rec_predictor, rec_text, rec_text_score,
+                charactor_dict, cls_predictor, use_direction_classify, &rec_times);
+    //// visualization
+    auto img_vis = Visualization(srcimg, boxes);
+    //// print recognized text
+    for (int i = 0; i < rec_text.size(); i++) {
+      std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i]
+                << std::endl;
+    }
+  }
+}
+void det(int argc, char **argv) {
+  std::string det_model_file = argv[2];
+  std::string precision = argv[3];
+  std::string num_threads = argv[4];
+  std::string batchsize = argv[5];
+  std::string power_mode = argv[6];
+  std::string img_dir = argv[7];
+  std::string det_config_path = argv[8];
+  if (strcmp(argv[3], "FP32") != 0 && strcmp(argv[3], "INT8") != 0) {
+      std::cerr << "Only support FP32 or INT8." << std::endl;
+      exit(1);
+  }
+  std::vector<cv::String> cv_all_img_names;
+  cv::glob(img_dir, cv_all_img_names);
+  //// load config from txt file
+  auto Config = LoadConfigTxt(det_config_path);
+  auto det_predictor = loadModel(det_model_file, power_mode, std::stoi(num_threads));
+  std::vector<double> time_info = {0, 0, 0};
+  for (int i = 0; i < cv_all_img_names.size(); ++i) {
+    std::cout << "The predict img: " << cv_all_img_names[i] << std::endl;
+    cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
+    if (!srcimg.data) {
+      std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << std::endl;
+      exit(1);
+    }
+    std::vector<double> times;
+    auto boxes = RunDetModel(det_predictor, srcimg, Config, &times);
+    //// visualization
+    auto img_vis = Visualization(srcimg, boxes);
+    std::cout << boxes.size() << " bboxes have detected:" << std::endl;
+    // for (int i=0; i<boxes.size(); i++){
+    //   std::cout << "The " << i << " box:" << std::endl;
+    //   for (int j=0; j<4; j++){
+    //     for (int k=0; k<2; k++){
+    //       std::cout << boxes[i][j][k] << "\t";
+    //     }
+    //   }
+    //   std::cout << std::endl;
+    // }
+    time_info[0] += times[0];
+    time_info[1] += times[1];
+    time_info[2] += times[2];
+  }
+  if (strcmp(argv[9], "True") == 0) {
+    AutoLogger autolog(det_model_file, 
+                       0,
+                       0,
+                       0,
+                       std::stoi(num_threads),
+                       std::stoi(batchsize), 
+                       "dynamic", 
+                       precision, 
+                       power_mode,
+                       time_info, 
+                       cv_all_img_names.size());
+    autolog.report();
+  }
+}
+void rec(int argc, char **argv) {
+  std::string rec_model_file = argv[2];
+  std::string precision = argv[3];
+  std::string num_threads = argv[4];
+  std::string batchsize = argv[5];
+  std::string power_mode = argv[6];
+  std::string img_dir = argv[7];
+  std::string dict_path = argv[8];
+  if (strcmp(argv[3], "FP32") != 0 && strcmp(argv[3], "INT8") != 0) {
+      std::cerr << "Only support FP32 or INT8." << std::endl;
+      exit(1);
+  }
+  std::vector<cv::String> cv_all_img_names;
+  cv::glob(img_dir, cv_all_img_names);
  auto charactor_dict = ReadDict(dict_path);
  charactor_dict.insert(charactor_dict.begin(), "#"); // blank char for ctc
  charactor_dict.push_back(" ");
-  cv::Mat srcimg = cv::imread(img_path, cv::IMREAD_COLOR);
+  auto rec_predictor = loadModel(rec_model_file, power_mode, std::stoi(num_threads));
-  auto boxes = RunDetModel(det_predictor, srcimg, Config);
-  std::vector<std::string> rec_text;
+  std::shared_ptr<PaddlePredictor> cls_predictor;
-  std::vector<float> rec_text_score;
-  RunRecModel(boxes, srcimg, rec_predictor, rec_text, rec_text_score,
+  std::vector<double> time_info = {0, 0, 0};
-              charactor_dict, cls_predictor, use_direction_classify);
+  for (int i = 0; i < cv_all_img_names.size(); ++i) {
+    std::cout << "The predict img: " << cv_all_img_names[i] << std::endl;
+    cv::Mat srcimg = cv::imread(cv_all_img_names[i], cv::IMREAD_COLOR);
-  auto end = std::chrono::system_clock::now();
+    if (!srcimg.data) {
-  auto duration =
+      std::cerr << "[ERROR] image read failed! image path: " << cv_all_img_names[i] << std::endl;
-      std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+      exit(1);
+    }
-  //// visualization
+    int width = srcimg.cols;
-  auto img_vis = Visualization(srcimg, boxes);
+    int height = srcimg.rows;
+    std::vector<int> upper_left = {0, 0};
+    std::vector<int> upper_right = {width, 0};
+    std::vector<int> lower_right = {width, height};
+    std::vector<int> lower_left  = {0, height};
+    std::vector<std::vector<int>> box = {upper_left, upper_right, lower_right, lower_left};
+    std::vector<std::vector<std::vector<int>>> boxes = {box};
+    std::vector<std::string> rec_text;
+    std::vector<float> rec_text_score;
+    std::vector<double> times;
+    RunRecModel(boxes, srcimg, rec_predictor, rec_text, rec_text_score,
+                charactor_dict, cls_predictor, 0, &times);
+    //// print recognized text
+    for (int i = 0; i < rec_text.size(); i++) {
+      std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i]
+                << std::endl;
+    }
+  }
+  // TODO: support autolog
+  if (strcmp(argv[9], "True") == 0) {
+    AutoLogger autolog(rec_model_file, 
+                       0,
+                       0,
+                       0,
+                       std::stoi(num_threads),
+                       std::stoi(batchsize), 
+                       "dynamic", 
+                       precision, 
+                       power_mode,
+                       time_info, 
+                       cv_all_img_names.size());
+    autolog.report();
+  }
+}
+int main(int argc, char **argv) {
+  check_params(argc, argv);
+  std::cout << "mode: " << argv[1] << endl;
-  //// print recognized text
+  if (strcmp(argv[1], "system") == 0) {
-  for (int i = 0; i < rec_text.size(); i++) {
+    system(argv);
-    std::cout << i << "\t" << rec_text[i] << "\t" << rec_text_score[i]
-              << std::endl;
  }
-  std::cout << "花费了"
+  if (strcmp(argv[1], "det") == 0) {
-            << double(duration.count()) *
+    det(argc, argv);
-                   std::chrono::microseconds::period::num /
+  }
-                   std::chrono::microseconds::period::den
-            << "秒" << std::endl;
+  if (strcmp(argv[1], "rec") == 0) {
+    rec(argc, argv);
+  }
  return 0;
 }
\ No newline at end of file
--- a/doc/doc_ch/enhanced_ctc_loss.md
+++ b/doc/doc_ch/enhanced_ctc_loss.md
@@ -64,7 +64,7 @@ C-CTC Loss是CTC Loss + Center Loss的简称。 其中Center Loss出自论文 <
 以配置文件`configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml`为例， center提取命令如下所示:
 ```
-python tools/export_center.py -c configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml -o  Global.pretrained_model: "./output/rec_mobile_pp-OCRv2/best_accuracy"
+python tools/export_center.py -c configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml -o Global.pretrained_model="./output/rec_mobile_pp-OCRv2/best_accuracy"
 ```
 运行完后，会在PaddleOCR主目录下生成`train_center.pkl`.

--- a/ppocr/losses/rec_nrtr_loss.py
+++ b/ppocr/losses/rec_nrtr_loss.py
@@ -22,7 +22,7 @@ class NRTRLoss(nn.Layer):
            log_prb = F.log_softmax(pred, axis=1)
            non_pad_mask = paddle.not_equal(
                tgt, paddle.zeros(
-                    tgt.shape, dtype='int64'))
+                    tgt.shape, dtype=tgt.dtype))
            loss = -(one_hot * log_prb).sum(axis=1)
            loss = loss.masked_select(non_pad_mask).mean()
        else:

--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -29,10 +29,7 @@ from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, Di
    TableLabelDecode, NRTRLabelDecode, SARLabelDecode , SEEDLabelDecode
 from .cls_postprocess import ClsPostProcess
 from .pg_postprocess import PGPostProcess
+from .pse_postprocess import PSEPostProcess
-if platform.system() != "Windows":
-    # pse is not support in Windows
-    from .pse_postprocess import PSEPostProcess
 def build_post_process(config, global_config=None):

--- a/ppocr/postprocess/pse_postprocess/pse/__init__.py
+++ b/ppocr/postprocess/pse_postprocess/pse/__init__.py
@@ -17,7 +17,12 @@ import subprocess
 python_path = sys.executable
-if subprocess.call('cd ppocr/postprocess/pse_postprocess/pse;{} setup.py build_ext --inplace;cd -'.format(python_path), shell=True) != 0:
+ori_path = os.getcwd()
-    raise RuntimeError('Cannot compile pse: {}'.format(os.path.dirname(os.path.realpath(__file__))))
+os.chdir('ppocr/postprocess/pse_postprocess/pse')
+if subprocess.call(
+        '{} setup.py build_ext --inplace'.format(python_path), shell=True) != 0:
+    raise RuntimeError('Cannot compile pse: {}'.format(
+        os.path.dirname(os.path.realpath(__file__))))
+os.chdir(ori_path)
 from .pse import pse
\ No newline at end of file
--- a/PTDN/common_func.sh
+++ b/PTDN/common_func.sh
--- a/PTDN/compare_results.py
+++ b/PTDN/compare_results.py
@@ -32,6 +32,7 @@ def run_shell_command(cmd):
    else:
        return None
 def parser_results_from_log_by_name(log_path, names_list):
    if not os.path.exists(log_path):
        raise ValueError("The log file {} does not exists!".format(log_path))
@@ -52,6 +53,7 @@ def parser_results_from_log_by_name(log_path, names_list):
        parser_results[name] = result
    return parser_results
 def load_gt_from_file(gt_file):
    if not os.path.exists(gt_file):
        raise ValueError("The log file {} does not exists!".format(gt_file))

--- a/PTDN/configs/det_mv3_db.yml
+++ b/PTDN/configs/det_mv3_db.yml
--- a/PTDN/configs/det_r50_vd_db.yml
+++ b/PTDN/configs/det_r50_vd_db.yml
--- a/PTDN/configs/ppocr_det_mobile_params.txt
+++ b/PTDN/configs/ppocr_det_mobile_params.txt
 ===========================train_params===========================
 model_name:ocr_det
 python:python3.7
-gpu_list:0|0,1
+gpu_list:0|0,1|10.21.226.181,10.21.226.133;0,1
-Global.use_gpu:True|True
+Global.use_gpu:True|True|True
-Global.auto_cast:null
+Global.auto_cast:fp32|amp
-Global.epoch_num:lite_train_infer=1|whole_train_infer=300
+Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300
 Global.save_model_dir:./output/
-Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4
+Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
 Global.pretrained_model:null
 train_model_name:latest
 train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
 trainer:norm_train|pact_train|fpgm_train
-norm_train:tools/train.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
+norm_train:tools/train.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
-pact_train:deploy/slim/quantization/quant.py -c tests/configs/det_mv3_db.yml -o
+pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/det_mv3_db.yml -o
-fpgm_train:deploy/slim/prune/sensitivity_anal.py -c tests/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
+fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
 distill_train:null
 null:null
 null:null
@@ -27,13 +27,13 @@ null:null
 ===========================infer_params===========================
 Global.save_inference_dir:./output/
 Global.pretrained_model:
-norm_export:tools/export_model.py -c tests/configs/det_mv3_db.yml -o 
+norm_export:tools/export_model.py -c test_tipc/configs/det_mv3_db.yml -o 
-quant_export:deploy/slim/quantization/export_model.py -c tests/configs/det_mv3_db.yml -o 
+quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/det_mv3_db.yml -o 
-fpgm_export:deploy/slim/prune/export_prune_model.py -c tests/configs/det_mv3_db.yml -o 
+fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/det_mv3_db.yml -o 
 distill_export:null
 export1:null
 export2:null
-##
+inference_dir:null
 train_model:./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy
 infer_export:tools/export_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o
 infer_quant:False
@@ -98,3 +98,13 @@ null:null
 --benchmark:True
 null:null
 null:null
+===========================lite_params===========================
+inference:./ocr_db_crnn det
+infer_model:./models/ch_ppocr_mobile_v2.0_det_opt.nb|./models/ch_ppocr_mobile_v2.0_det_slim_opt.nb
+--cpu_threads:1|4
+--batch_size:1
+--power_mode:LITE_POWER_HIGH|LITE_POWER_LOW
+--image_dir:./test_data/icdar2015_lite/text_localization/ch4_test_images/|./test_data/icdar2015_lite/text_localization/ch4_test_images/img_233.jpg
+--config_dir:./config.txt
+--rec_dict_dir:./ppocr_keys_v1.txt
+--benchmark:True
--- a/PTDN/configs/ppocr_det_server_params.txt
+++ b/PTDN/configs/ppocr_det_server_params.txt
--- a/PTDN/configs/ppocr_rec_mobile_params.txt
+++ b/PTDN/configs/ppocr_rec_mobile_params.txt
--- a/PTDN/configs/ppocr_rec_server_params.txt
+++ b/PTDN/configs/ppocr_rec_server_params.txt
--- a/PTDN/configs/ppocr_sys_mobile_params.txt
+++ b/PTDN/configs/ppocr_sys_mobile_params.txt
--- a/PTDN/configs/ppocr_sys_server_params.txt
+++ b/PTDN/configs/ppocr_sys_server_params.txt
--- a/test_tipc/configs/ppocrv2_det_mobile_params.txt
+++ b/test_tipc/configs/ppocrv2_det_mobile_params.txt
+===========================train_params===========================
+model_name:PPOCRv2_ocr_det
+python:python3.7
+gpu_list:0|0,1
+Global.use_gpu:True|True
+Global.auto_cast:fp32
+Global.epoch_num:lite_train_infer=1|whole_train_infer=500
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:lite_train_infer=2|whole_train_infer=4
+Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
+null:null
+##
+trainer:norm_train|pact_train
+norm_train:tools/train.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o 
+pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:null
+null:null
+##
+===========================infer_params===========================
+Global.save_inference_dir:./output/
+Global.pretrained_model:
+norm_export:tools/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o 
+quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCR_det_cml.yml -o 
+fpgm_export: 
+distill_export:null
+export1:null
+export2:null
+inference_dir:Student
+infer_model:./inference/ch_PP-OCRv2_det_infer/
+infer_export:null
+infer_quant:False
+inference:tools/infer/predict_det.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:False|True
+--precision:fp32|fp16|int8
+--det_model_dir:
+--image_dir:./inference/ch_det_data_50/all-sum-510/
+null:null
+--benchmark:True
+null:null