fix conflicts

83303bc7 · LDOUBLEV · 3af943f3 · af0bac58 · 83303bc7 · 83303bc7
Commit 83303bc7 authored Oct 09, 2021 by LDOUBLEV
20 changed files
--- a/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml
+++ b/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml
@@ -19,6 +19,7 @@ Global:
  max_text_length: 25
  infer_mode: False
  use_space_char: True
+  save_res_path: ./output/rec/predicts_chinese_common_v2.0.txt


 Optimizer:

--- a/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml
+++ b/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml
@@ -19,6 +19,7 @@ Global:
  max_text_length: 25
  infer_mode: False
  use_space_char: True
+  save_res_path: ./output/rec/predicts_chinese_lite_v2.0.txt


 Optimizer:

--- a/configs/rec/rec_icdar15_train.yml
+++ b/configs/rec/rec_icdar15_train.yml
@@ -10,15 +10,16 @@ Global:
  cal_metric_during_train: True
  pretrained_model:
  checkpoints:
-  save_inference_dir:
+  save_inference_dir: ./
  use_visualdl: False
  infer_img: doc/imgs_words_en/word_10.png
  # for data or label process
-  character_dict_path: ppocr/utils/ic15_dict.txt
-  character_type: ch
+  character_dict_path: ppocr/utils/en_dict.txt
+  character_type: EN
  max_text_length: 25
  infer_mode: False
  use_space_char: False
+  save_res_path: ./output/rec/predicts_ic15.txt

 Optimizer:
  name: Adam
@@ -59,8 +60,8 @@ Metric:
 Train:
  dataset:
    name: SimpleDataSet
-    data_dir: ./train_data/
-    label_file_list: ["./train_data/train_list.txt"]
+    data_dir: ./train_data/ic15_data/
+    label_file_list: ["./train_data/ic15_data/rec_gt_train.txt"]
    transforms:
      - DecodeImage: # load image
          img_mode: BGR
@@ -80,8 +81,8 @@ Train:
 Eval:
  dataset:
    name: SimpleDataSet
-    data_dir: ./train_data/
-    label_file_list: ["./train_data/train_list.txt"]
+    data_dir: ./train_data/ic15_data
+    label_file_list: ["./train_data/ic15_data/rec_gt_test.txt"]
    transforms:
      - DecodeImage: # load image
          img_mode: BGR

--- a/configs/rec/rec_mtb_nrtr.yml
+++ b/configs/rec/rec_mtb_nrtr.yml
+Global:
+  use_gpu: True
+  epoch_num: 21
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/nrtr/
+  save_epoch_step: 1
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 2000]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_10.png
+  # for data or label process
+  character_dict_path: 
+  character_type: EN_symbol
+  max_text_length: 25
+  infer_mode: False
+  use_space_char: True
+  save_res_path: ./output/rec/predicts_nrtr.txt
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.99
+  clip_norm: 5.0
+  lr:
+    name: Cosine
+    learning_rate: 0.0005
+    warmup_epoch: 2
+  regularizer:
+    name: 'L2'
+    factor: 0.
+
+Architecture:
+  model_type: rec
+  algorithm: NRTR
+  in_channels: 1
+  Transform:
+  Backbone:
+    name: MTB
+    cnn_num: 2
+  Head:
+    name: Transformer
+    d_model: 512
+    num_encoder_layers: 6
+    beam_size: -1 # When Beam size is greater than 0, it means to use beam search when evaluation.
+    
+
+Loss:
+  name: NRTRLoss
+  smoothing: True
+
+PostProcess:
+  name: NRTRLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/training/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - NRTRLabelEncode: # Class handling label
+      - NRTRRecResizeImg:
+          image_shape: [100, 32]
+          resize_type: PIL # PIL or OpenCV
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 512
+    drop_last: True
+    num_workers: 8
+
+Eval:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/evaluation/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - NRTRLabelEncode: # Class handling label
+      - NRTRRecResizeImg:
+          image_shape: [100, 32]
+          resize_type: PIL # PIL or OpenCV
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 256
+    num_workers: 1
+    use_shared_memory: False
--- a/configs/rec/rec_mv3_none_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_none_bilstm_ctc.yml
@@ -19,6 +19,7 @@ Global:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
+  save_res_path: ./output/rec/predicts_mv3_none_bilstm_ctc.txt

 Optimizer:
  name: Adam

--- a/configs/rec/rec_mv3_none_none_ctc.yml
+++ b/configs/rec/rec_mv3_none_none_ctc.yml
@@ -19,6 +19,7 @@ Global:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
+  save_res_path: ./output/rec/predicts_mv3_none_none_ctc.txt

 Optimizer:
  name: Adam

--- a/configs/rec/rec_mv3_tps_bilstm_att.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_att.yml
@@ -19,6 +19,7 @@ Global:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
+  save_res_path: ./output/rec/predicts_mv3_tps_bilstm_att.txt


 Optimizer:

--- a/configs/rec/rec_mv3_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_mv3_tps_bilstm_ctc.yml
@@ -19,6 +19,7 @@ Global:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
+  save_res_path: ./output/rec/predicts_mv3_tps_bilstm_ctc.txt

 Optimizer:
  name: Adam

--- a/configs/rec/rec_r31_sar.yml
+++ b/configs/rec/rec_r31_sar.yml
+Global:
+  use_gpu: true
+  epoch_num: 5
+  log_smooth_window: 20
+  print_batch_step: 20
+  save_model_dir: ./sar_rec
+  save_epoch_step: 1
+  # evaluation is run every 2000 iterations
+  eval_batch_step: [0, 2000]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  # for data or label process
+  character_dict_path: ppocr/utils/dict90.txt
+  character_type: EN_symbol
+  max_text_length: 30
+  infer_mode: False
+  use_space_char: False
+  rm_symbol: True
+  save_res_path: ./output/rec/predicts_sar.txt
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+    name: Piecewise
+    decay_epochs: [3, 4]
+    values: [0.001, 0.0001, 0.00001] 
+  regularizer:
+    name: 'L2'
+    factor: 0
+
+Architecture:
+  model_type: rec
+  algorithm: SAR
+  Transform:
+  Backbone:
+    name: ResNet31
+  Head:
+    name: SARHead
+
+Loss:
+  name: SARLoss
+
+PostProcess:
+  name: SARLabelDecode
+
+Metric:
+  name: RecMetric
+
+
+Train:
+  dataset:
+    name: SimpleDataSet
+    label_file_list: ['./train_data/train_list.txt']
+    data_dir: ./train_data/
+    ratio_list: 1.0
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - SARLabelEncode: # Class handling label
+      - SARRecResizeImg:
+          image_shape: [3, 48, 48, 160] # h:48 w:[48,160]
+          width_downsample_ratio: 0.25
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'valid_ratio'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 64
+    drop_last: True
+    num_workers: 8
+    use_shared_memory: False
+
+Eval:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/evaluation/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - SARLabelEncode: # Class handling label
+      - SARRecResizeImg:
+          image_shape: [3, 48, 48, 160]
+          width_downsample_ratio: 0.25
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'valid_ratio'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 64
+    num_workers: 4
+    use_shared_memory: False
+  
--- a/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_bilstm_ctc.yml
@@ -19,6 +19,7 @@ Global:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
+  save_res_path: ./output/rec/predicts_r34_vd_none_bilstm_ctc.txt

 Optimizer:
  name: Adam

--- a/configs/rec/rec_r34_vd_none_none_ctc.yml
+++ b/configs/rec/rec_r34_vd_none_none_ctc.yml
@@ -19,6 +19,7 @@ Global:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
+  save_res_path: ./output/rec/predicts_r34_vd_none_none_ctc.txt

 Optimizer:
  name: Adam

--- a/configs/rec/rec_r34_vd_tps_bilstm_att.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_att.yml
@@ -19,6 +19,7 @@ Global:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
+  save_res_path: ./output/rec/predicts_b3_rare_r34_none_gru.txt


 Optimizer:

--- a/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+++ b/configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
@@ -19,6 +19,7 @@ Global:
  max_text_length: 25
  infer_mode: False
  use_space_char: False
+  save_res_path: ./output/rec/predicts_r34_vd_tps_bilstm_ctc.txt

 Optimizer:
  name: Adam
@@ -37,7 +38,7 @@ Architecture:
    name: TPS
    num_fiducial: 20
    loc_lr: 0.1
-    model_name: small
+    model_name: large
  Backbone:
    name: ResNet
    layers: 34

--- a/configs/rec/rec_r50_fpn_srn.yml
+++ b/configs/rec/rec_r50_fpn_srn.yml
@@ -20,6 +20,7 @@ Global:
  num_heads: 8
  infer_mode: False
  use_space_char: False
+  save_res_path: ./output/rec/predicts_srn.txt


 Optimizer:

--- a/configs/rec/rec_resnet_stn_bilstm_att.yml
+++ b/configs/rec/rec_resnet_stn_bilstm_att.yml
+Global:
+  use_gpu: True
+  epoch_num: 400
+  log_smooth_window: 20
+  print_batch_step: 10
+  save_model_dir: ./output/rec/seed
+  save_epoch_step: 3
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [0, 2000]
+  cal_metric_during_train: True
+  pretrained_model:
+  checkpoints:
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words_en/word_10.png
+  # for data or label process
+  character_dict_path: 
+  character_type: EN_symbol
+  max_text_length: 100
+  infer_mode: False
+  use_space_char: False
+  save_res_path: ./output/rec/predicts_seed.txt
+
+
+Optimizer:
+  name: Adadelta
+  weight_deacy: 0.0
+  momentum: 0.9
+  lr:
+    name: Piecewise
+    decay_epochs: [4,5,8]
+    values: [1.0, 0.1, 0.01]
+  regularizer:
+    name: 'L2'
+    factor: 2.0e-05
+
+
+Architecture:
+  model_type: rec
+  algorithm: SEED
+  Transform:
+    name: STN_ON
+    tps_inputsize: [32, 64]
+    tps_outputsize: [32, 100]
+    num_control_points: 20
+    tps_margins: [0.05,0.05]
+    stn_activation: none
+  Backbone:
+    name: ResNet_ASTER
+  Head:
+    name: AsterHead  # AttentionHead
+    sDim: 512
+    attDim: 512
+    max_len_labels: 100
+
+Loss:
+  name: AsterLoss
+
+PostProcess:
+  name: SEEDLabelDecode
+
+Metric:
+  name: RecMetric
+  main_indicator: acc
+  is_filter: True
+
+Train:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/training/
+    transforms:
+      - Fasttext:
+          path: "./cc.en.300.bin"
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - SEEDLabelEncode: # Class handling label
+      - RecResizeImg:
+          character_type: en
+          image_shape: [3, 64, 256]
+          padding: False
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length', 'fast_label'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    batch_size_per_card: 256
+    drop_last: True
+    num_workers: 6
+
+Eval:
+  dataset:
+    name: LMDBDataSet
+    data_dir: ./train_data/data_lmdb_release/evaluation/
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - SEEDLabelEncode: # Class handling label
+      - RecResizeImg:
+          character_type: en
+          image_shape: [3, 64, 256]
+          padding: False
+      - KeepKeys:
+          keep_keys: ['image', 'label', 'length'] # dataloader will return list in this order
+  loader:
+    shuffle: False
+    drop_last: True
+    batch_size_per_card: 256
+    num_workers: 4
--- a/configs/table/table_mv3.yml
+++ b/configs/table/table_mv3.yml
+Global:
+  use_gpu: true
+  epoch_num: 50
+  log_smooth_window: 20
+  print_batch_step: 5
+  save_model_dir: ./output/table_mv3/
+  save_epoch_step: 5
+  # evaluation is run every 400 iterations after the 0th iteration
+  eval_batch_step: [0, 400]
+  cal_metric_during_train: True
+  pretrained_model: 
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: doc/imgs_words/ch/word_1.jpg
+  # for data or label process
+  character_dict_path: ppocr/utils/dict/table_structure_dict.txt
+  character_type: en
+  max_text_length: 100
+  max_elem_length: 500
+  max_cell_num: 500
+  infer_mode: False
+  process_total_num: 0
+  process_cut_num: 0
+
+
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  clip_norm: 5.0
+  lr:
+    learning_rate: 0.001
+  regularizer:
+    name: 'L2'
+    factor: 0.00000
+
+Architecture:
+  model_type: table
+  algorithm: TableAttn
+  Backbone:
+    name: MobileNetV3
+    scale: 1.0
+    model_name: small
+    disable_se: True
+  Head:
+    name: TableAttentionHead
+    hidden_size: 256
+    l2_decay: 0.00001
+    loc_type: 2
+
+Loss:
+  name: TableAttentionLoss
+  structure_weight: 100.0
+  loc_weight: 10000.0
+
+PostProcess:
+  name: TableLabelDecode
+
+Metric:
+  name: TableMetric
+  main_indicator: acc
+
+Train:
+  dataset:
+    name: PubTabDataSet
+    data_dir: train_data/table/pubtabnet/train/
+    label_file_path: train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - ResizeTableImage:
+          max_len: 488
+      - TableLabelEncode:
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - PaddingTableImage:
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'structure', 'bbox_list', 'sp_tokens', 'bbox_list_mask']
+  loader:
+    shuffle: True
+    batch_size_per_card: 32
+    drop_last: True
+    num_workers: 1
+
+Eval:
+  dataset:
+    name: PubTabDataSet
+    data_dir: train_data/table/pubtabnet/val/
+    label_file_path: train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - ResizeTableImage:
+          max_len: 488
+      - TableLabelEncode:
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - PaddingTableImage:
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'structure', 'bbox_list', 'sp_tokens', 'bbox_list_mask']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 16
+    num_workers: 1
--- a/deploy/android_demo/.gitignore
+++ b/deploy/android_demo/.gitignore
+*.iml
+.gradle
+/local.properties
+/.idea/*
+.DS_Store
+/build
+/captures
+.externalNativeBuild
+
--- a/deploy/android_demo/README.md
+++ b/deploy/android_demo/README.md
+# 如何快速测试
+### 1. 安装最新版本的Android Studio
+可以从 https://developer.android.com/studio 下载。本Demo使用是4.0版本Android Studio编写。
+
+### 2. 按照NDK 20 以上版本
+Demo测试的时候使用的是NDK 20b版本，20版本以上均可以支持编译成功。
+
+如果您是初学者，可以用以下方式安装和测试NDK编译环境。
+点击 File -> New ->New Project，  新建  "Native C++" project
+
+### 3. 导入项目
+点击 File->New->Import Project...， 然后跟着Android Studio的引导导入
+
+
+# 获得更多支持
+前往[端计算模型生成平台EasyEdge](https://ai.baidu.com/easyedge/app/open_source_demo?referrerUrl=paddlelite)，获得更多开发支持：
+
+- Demo APP：可使用手机扫码安装，方便手机端快速体验文字识别
+- SDK：模型被封装为适配不同芯片硬件和操作系统SDK，包括完善的接口，方便进行二次开发
--- a/deploy/android_demo/app/.gitignore
+++ b/deploy/android_demo/app/.gitignore
+/build
--- a/deploy/android_demo/app/build.gradle
+++ b/deploy/android_demo/app/build.gradle
+import java.security.MessageDigest
+
+apply plugin: 'com.android.application'
+
+android {
+    compileSdkVersion 29
+    defaultConfig {
+        applicationId "com.baidu.paddle.lite.demo.ocr"
+        minSdkVersion 23
+        targetSdkVersion 29
+        versionCode 1
+        versionName "1.0"
+        testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
+        externalNativeBuild {
+            cmake {
+                cppFlags "-std=c++11 -frtti -fexceptions -Wno-format"
+                arguments '-DANDROID_PLATFORM=android-23', '-DANDROID_STL=c++_shared' ,"-DANDROID_ARM_NEON=TRUE"
+            }
+        }
+        ndk {
+            // abiFilters "arm64-v8a", "armeabi-v7a"
+            abiFilters   "arm64-v8a", "armeabi-v7a"
+            ldLibs "jnigraphics"
+        }
+    }
+    buildTypes {
+        release {
+            minifyEnabled false
+            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
+        }
+    }
+    externalNativeBuild {
+        cmake {
+            path "src/main/cpp/CMakeLists.txt"
+            version "3.10.2"
+        }
+    }
+}
+
+dependencies {
+    implementation fileTree(include: ['*.jar'], dir: 'libs')
+    implementation 'androidx.appcompat:appcompat:1.1.0'
+    implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
+    testImplementation 'junit:junit:4.12'
+    androidTestImplementation 'com.android.support.test:runner:1.0.2'
+    androidTestImplementation 'com.android.support.test.espresso:espresso-core:3.0.2'
+}
+
+def archives = [
+        [
+                'src' : 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/paddle_lite_libs_v2_9_0.tar.gz',
+                'dest': 'PaddleLite'
+        ],
+        [
+                'src' : 'https://paddlelite-demo.bj.bcebos.com/libs/android/opencv-4.2.0-android-sdk.tar.gz',
+                'dest': 'OpenCV'
+        ],
+        [
+                'src' : 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ocr_v2_for_cpu.tar.gz',
+                'dest' : 'src/main/assets/models'
+        ],
+        [
+                'src' : 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/lite/ch_dict.tar.gz',
+                'dest' : 'src/main/assets/labels'
+        ]
+]
+
+task downloadAndExtractArchives(type: DefaultTask) {
+    doFirst {
+        println "Downloading and extracting archives including libs and models"
+    }
+    doLast {
+        // Prepare cache folder for archives
+        String cachePath = "cache"
+        if (!file("${cachePath}").exists()) {
+            mkdir "${cachePath}"
+        }
+        archives.eachWithIndex { archive, index ->
+            MessageDigest messageDigest = MessageDigest.getInstance('MD5')
+            messageDigest.update(archive.src.bytes)
+            String cacheName = new BigInteger(1, messageDigest.digest()).toString(32)
+            // Download the target archive if not exists
+            boolean copyFiles = !file("${archive.dest}").exists()
+            if (!file("${cachePath}/${cacheName}.tar.gz").exists()) {
+                ant.get(src: archive.src, dest: file("${cachePath}/${cacheName}.tar.gz"))
+                copyFiles = true; // force to copy files from the latest archive files
+            }
+            // Extract the target archive if its dest path does not exists
+            if (copyFiles) {
+                copy {
+                    from tarTree("${cachePath}/${cacheName}.tar.gz")
+                    into "${archive.dest}"
+                }
+            }
+        }
+    }
+}
+preBuild.dependsOn downloadAndExtractArchives
\ No newline at end of file