add east & sast

021c1132 · MissPenguin · 8a5566c9 · 021c1132 · 021c1132 · 021c1132
Commit 021c1132 authored Dec 09, 2020 by MissPenguin
20 changed files
--- a/MANIFEST.in
+++ b/MANIFEST.in
 include LICENSE.txt
 include README.md
-recursive-include ppocr/utils *.txt utility.py character.py check.py
+recursive-include ppocr/utils *.txt utility.py logging.py
-recursive-include ppocr/data/det *.py
+recursive-include ppocr/data/ *.py
 recursive-include ppocr/postprocess *.py
-recursive-include ppocr/postprocess/lanms *.*
+recursive-include tools/infer *.py
-recursive-include tools/infer *.py
\ No newline at end of file
--- a/configs/det/det_mv3_east.yml
+++ b/configs/det/det_mv3_east.yml
+Global:
+  use_gpu: true
+  epoch_num: 10000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/east_mv3/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/det_east/predicts_east.txt
+Architecture:
+  model_type: det
+  algorithm: EAST
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+  Neck:
+    name: EASTFPN
+    model_name: small
+  Head:
+    name: EASTHead
+    model_name: small
+Loss:
+  name: EASTLoss
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+PostProcess:
+  name: EASTPostProcess
+  score_thresh: 0.8
+  cover_thresh: 0.1
+  nms_thresh: 0.2
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - EASTProcessTrain:
+          image_shape: [512, 512]
+          background_ratio: 0.125
+          min_crop_side_ratio: 0.1
+          min_text_size: 10
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 16
+    num_workers: 8
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_side_len: 2400
+          limit_type: max
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/configs/det/det_r50_vd_east.yml
+++ b/configs/det/det_r50_vd_east.yml
+Global:
+  use_gpu: true
+  epoch_num: 10000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/east_r50_vd/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_pretrained/
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/det_east/predicts_east.txt
+Architecture:
+  model_type: det
+  algorithm: EAST
+  Transform:
+  Backbone:
+    name: ResNet
+    layers: 50
+  Neck:
+    name: EASTFPN
+    model_name: large
+  Head:
+    name: EASTHead
+    model_name: large
+Loss:
+  name: EASTLoss
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+PostProcess:
+  name: EASTPostProcess
+  score_thresh: 0.8
+  cover_thresh: 0.1
+  nms_thresh: 0.2
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
+    ratio_list: [1.0]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - EASTProcessTrain:
+          image_shape: [512, 512]
+          background_ratio: 0.125
+          min_crop_side_ratio: 0.1
+          min_text_size: 10
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'geo_map', 'training_mask'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 8
+    num_workers: 8
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          limit_side_len: 2400
+          limit_type: max
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/configs/det/det_r50_vd_sast_icdar15.yml
+++ b/configs/det/det_r50_vd_sast_icdar15.yml
+Global:
+  use_gpu: true
+  epoch_num: 5000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/sast_r50_vd_ic15/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/sast_r50_vd_ic15/predicts_sast.txt
+Architecture:
+  model_type: det
+  algorithm: SAST
+  Transform:
+  Backbone:
+    name: ResNet_SAST
+    layers: 50
+  Neck:
+    name: SASTFPN
+    with_cab: True
+  Head:
+    name: SASTHead
+Loss:
+  name: SASTLoss
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+PostProcess:
+  name: SASTPostProcess
+  score_thresh: 0.5
+  sample_pts_num: 2
+  nms_thresh: 0.2
+  expand_scale: 1.0
+  shrink_ratio_of_width: 0.3
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+Train:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_path: [./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt, ./train_data/total_text_icdar_14pt/train_label_json.txt]
+    data_ratio_list: [0.5, 0.5]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - SASTProcessTrain:
+          image_shape: [512, 512]
+          min_crop_side_ratio: 0.3
+          min_crop_size: 24
+          min_text_size: 4
+          max_text_size: 512
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 4
+    num_workers: 4
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/icdar2015/text_localization/
+    label_file_list:
+      - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          resize_long: 1536
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/configs/det/det_r50_vd_sast_totaltext.yml
+++ b/configs/det/det_r50_vd_sast_totaltext.yml
+Global:
+  use_gpu: true
+  epoch_num: 5000
+  log_smooth_window: 20
+  print_batch_step: 2
+  save_model_dir: ./output/sast_r50_vd_tt/
+  save_epoch_step: 1000
+  # evaluation is run every 5000 iterations after the 4000th iteration
+  eval_batch_step: [4000, 5000]
+  # if pretrained_model is saved in static mode, load_static_weights must set to True
+  load_static_weights: True
+  cal_metric_during_train: False
+  pretrained_model: ./pretrain_models/ResNet50_vd_ssld_pretrained/
+  checkpoints: 
+  save_inference_dir:
+  use_visualdl: False
+  infer_img: 
+  save_res_path: ./output/sast_r50_vd_tt/predicts_sast.txt
+Architecture:
+  model_type: det
+  algorithm: SAST
+  Transform:
+  Backbone:
+    name: ResNet_SAST
+    layers: 50
+  Neck:
+    name: SASTFPN
+    with_cab: True
+  Head:
+    name: SASTHead
+Loss:
+  name: SASTLoss
+Optimizer:
+  name: Adam
+  beta1: 0.9
+  beta2: 0.999
+  lr:
+  #  name: Cosine
+    learning_rate: 0.001
+  #  warmup_epoch: 0
+  regularizer:
+    name: 'L2'
+    factor: 0
+PostProcess:
+  name: SASTPostProcess
+  score_thresh: 0.5
+  sample_pts_num: 6
+  nms_thresh: 0.2
+  expand_scale: 1.2
+  shrink_ratio_of_width: 0.2
+Metric:
+  name: DetMetric
+  main_indicator: hmean
+Train:
+  dataset:
+    name: SimpleDataSet
+    label_file_list: [./train_data/icdar2013/train_label_json.txt, ./train_data/icdar2015/train_label_json.txt, ./train_data/icdar17_mlt_latin/train_label_json.txt, ./train_data/coco_text_icdar_4pts/train_label_json.txt]
+    ratio_list: [0.1, 0.45, 0.3, 0.15]
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - SASTProcessTrain:
+          image_shape: [512, 512]
+          min_crop_side_ratio: 0.3
+          min_crop_size: 24
+          min_text_size: 4
+          max_text_size: 512
+      - KeepKeys:
+          keep_keys: ['image', 'score_map', 'border_map', 'training_mask', 'tvo_map', 'tco_map'] # dataloader will return list in this order
+  loader:
+    shuffle: True
+    drop_last: False
+    batch_size_per_card: 4
+    num_workers: 4
+Eval:
+  dataset:
+    name: SimpleDataSet
+    data_dir: ./train_data/
+    label_file_list: 
+        - ./train_data/total_text_icdar_14pt/test_label_json.txt
+    transforms:
+      - DecodeImage: # load image
+          img_mode: BGR
+          channel_first: False
+      - DetLabelEncode: # Class handling label
+      - DetResizeForTest:
+          resize_long: 768
+      - NormalizeImage:
+          scale: 1./255.
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+          order: 'hwc'
+      - ToCHWImage:
+      - KeepKeys:
+          keep_keys: ['image', 'shape', 'polys', 'ignore_tags']
+  loader:
+    shuffle: False
+    drop_last: False
+    batch_size_per_card: 1 # must be 1
+    num_workers: 2
\ No newline at end of file
--- a/configs/rec/multi_language/rec_en_number_lite_train.yml
+++ b/configs/rec/multi_language/rec_en_number_lite_train.yml
@@ -15,7 +15,7 @@ Global:
  use_visualdl: False
  infer_img:
  # for data or label process
-  character_dict_path: ppocr/utils/ic15_dict.txt
+  character_dict_path: ppocr/utils/dict/ic15_dict.txt
  character_type: ch
  max_text_length: 25
  infer_mode: False

--- a/configs/rec/multi_language/rec_french_lite_train.yml
+++ b/configs/rec/multi_language/rec_french_lite_train.yml
@@ -15,7 +15,7 @@ Global:
  use_visualdl: False
  infer_img:
  # for data or label process
-  character_dict_path: ppocr/utils/french_dict.txt
+  character_dict_path: ppocr/utils/dict/french_dict.txt
  character_type: french
  max_text_length: 25
  infer_mode: False

--- a/configs/rec/multi_language/rec_german_lite_train.yml
+++ b/configs/rec/multi_language/rec_german_lite_train.yml
@@ -15,7 +15,7 @@ Global:
  use_visualdl: False
  infer_img:
  # for data or label process
-  character_dict_path: ppocr/utils/german_dict.txt
+  character_dict_path: ppocr/utils/dict/german_dict.txt
  character_type: german
  max_text_length: 25
  infer_mode: False

--- a/configs/rec/multi_language/rec_japan_lite_train.yml
+++ b/configs/rec/multi_language/rec_japan_lite_train.yml
@@ -15,7 +15,7 @@ Global:
  use_visualdl: False
  infer_img:
  # for data or label process
-  character_dict_path: ppocr/utils/japan_dict.txt
+  character_dict_path: ppocr/utils/dict/japan_dict.txt
  character_type: japan
  max_text_length: 25
  infer_mode: False

--- a/configs/rec/multi_language/rec_korean_lite_train.yml
+++ b/configs/rec/multi_language/rec_korean_lite_train.yml
@@ -15,7 +15,7 @@ Global:
  use_visualdl: False
  infer_img:
  # for data or label process
-  character_dict_path: ppocr/utils/korean_dict.txt
+  character_dict_path: ppocr/utils/dict/korean_dict.txt
  character_type: korean
  max_text_length: 25
  infer_mode: False

--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
@@ -261,6 +261,61 @@ im_show.save('result.jpg')
 paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
 ```
+### 使用网络图片或者numpy数组作为输入
+1. 网络图片
+代码使用
+```python
+from paddleocr import PaddleOCR, draw_ocr
+# Paddleocr目前支持中英文、英文、法语、德语、韩语、日语，可以通过修改lang参数进行切换
+# 参数依次为`ch`, `en`, `french`, `german`, `korean`, `japan`。
+ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
+img_path = 'http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg'
+result = ocr.ocr(img_path, cls=True)
+for line in result:
+    print(line)
+# 显示结果
+from PIL import Image
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+命令行模式
+```bash
+paddleocr --image_dir http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg --use_angle_cls=true
+```
+2. numpy数组
+仅通过代码使用时支持numpy数组作为输入
+```python
+from paddleocr import PaddleOCR, draw_ocr
+# Paddleocr目前支持中英文、英文、法语、德语、韩语、日语，可以通过修改lang参数进行切换
+# 参数依次为`ch`, `en`, `french`, `german`, `korean`, `japan`。
+ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
+img_path = 'PaddleOCR/doc/imgs/11.jpg'
+img = cv2.imread(img_path)
+# img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), 如果你自己训练的模型支持灰度图，可以将这句话的注释取消
+result = ocr.ocr(img_path, cls=True)
+for line in result:
+    print(line)
+# 显示结果
+from PIL import Image
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
 ## 参数说明
 | 字段                    | 说明                                                                                                                                                                                                                 | 默认值                  |
@@ -285,6 +340,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
 | max_text_length         | 识别算法能识别的最大文字长度                                                                                                                                                                                         | 25                      |
 | rec_char_dict_path      | 识别模型字典路径，当rec_model_dir使用方式2传参时需要修改为自己的字典路径                                                                                                                                                | ./ppocr/utils/ppocr_keys_v1.txt                        |
 | use_space_char          | 是否识别空格                                                                                                                                                                                                         | TRUE                    |
+| drop_score          | 对输出按照分数(来自于识别模型)进行过滤，低于此分数的不返回                                                                                                                                                                                                         | 0.5                    |
 | use_angle_cls          | 是否加载分类模型                                                                                                                                                                                                         | FALSE                    |
 | cls_model_dir          | 分类模型所在文件夹。传参方式有两种，1. None: 自动下载内置模型到 `~/.paddleocr/cls`；2.自己转换好的inference模型路径，模型路径下必须包含model和params文件                                                                                 | None                    |
 | cls_image_shape          | 分类算法的输入图片尺寸                                                                           | "3, 48, 192"                    |
@@ -295,4 +351,4 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
 | lang                     | 模型语言类型,目前支持 中文(ch)和英文(en)                                                                                                                                                                                                  | ch                    |
 | det                     | 前向时使用启动检测                                                                                                                                                                                                   | TRUE                    |
 | rec                     | 前向时是否启动识别                                                                                                                                                                                                   | TRUE                    |
-| cls                     | 前向时是否启动分类                                                                                                                                                                                                 | FALSE                    |
+| cls                     | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类)                                                                                                                                                                                                | FALSE                    |
--- a/doc/doc_en/whl_en.md
+++ b/doc/doc_en/whl_en.md
@@ -271,6 +271,59 @@ im_show.save('result.jpg')
 paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_dir} --rec_model_dir {your_rec_model_dir} --rec_char_dict_path {your_rec_char_dict_path} --cls_model_dir {your_cls_model_dir} --use_angle_cls true --cls true
 ```
+### Use web images or numpy array as input
+1. Web image
+Use by code
+```python
+from paddleocr import PaddleOCR, draw_ocr
+ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
+img_path = 'http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg'
+result = ocr.ocr(img_path, cls=True)
+for line in result:
+    print(line)
+# show result
+from PIL import Image
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
+Use by command line
+```bash
+paddleocr --image_dir http://n.sinaimg.cn/ent/transform/w630h933/20171222/o111-fypvuqf1838418.jpg --use_angle_cls=true
+```
+2. Numpy array
+Support numpy array as input only when used by code
+```python
+from paddleocr import PaddleOCR, draw_ocr
+ocr = PaddleOCR(use_angle_cls=True, lang="ch") # need to run only once to download and load model into memory
+img_path = 'PaddleOCR/doc/imgs/11.jpg'
+img = cv2.imread(img_path)
+# img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY), If your own training model supports grayscale images, you can uncomment this line
+result = ocr.ocr(img_path, cls=True)
+for line in result:
+    print(line)
+# show result
+from PIL import Image
+image = Image.open(img_path).convert('RGB')
+boxes = [line[0] for line in result]
+txts = [line[1][0] for line in result]
+scores = [line[1][1] for line in result]
+im_show = draw_ocr(image, boxes, txts, scores, font_path='/path/to/PaddleOCR/doc/simfang.ttf')
+im_show = Image.fromarray(im_show)
+im_show.save('result.jpg')
+```
 ## Parameter Description
 | Parameter                    | Description                                                                                                                                                                                                                 | Default value                  |
@@ -295,6 +348,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
 | max_text_length         | The maximum text length that the recognition algorithm can recognize                                                                                                                                                                                         | 25                      |
 | rec_char_dict_path      | the alphabet path which needs to be modified to your own path when `rec_model_Name` use mode 2                                                                                                                                              | ./ppocr/utils/ppocr_keys_v1.txt                        |
 | use_space_char          | Whether to recognize spaces                                                                                                                                                                                                         | TRUE                    |
+| drop_score          | Filter the output by score (from the recognition model), and those below this score will not be returned                                                                                                                                                                                                        | 0.5                    |
 | use_angle_cls          | Whether to load classification model                                                                                                                                                                                                       | FALSE                    |
 | cls_model_dir           | the classification inference model folder. There are two ways to transfer parameters, 1. None: Automatically download the built-in model to `~/.paddleocr/cls`; 2. The path of the inference model converted by yourself, the model and params files must be included in the model path | None |
 | cls_image_shape         | image shape of classification algorithm                                                                                                                                                                                            | "3,48,192"              |
@@ -305,4 +359,4 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --det_model_dir {your_det_model_
 | lang                     | The support language, now only Chinese(ch)、English(en)、French(french)、German(german)、Korean(korean)、Japanese(japan) are supported                                                                                                                                                                                                  | ch                    |
 | det                     | Enable detction when `ppocr.ocr` func exec                                                                                                                                                                                                   | TRUE                    |
 | rec                     | Enable recognition when `ppocr.ocr` func exec                                                                                                                                                                                                   | TRUE                    |
-| cls                     | Enable classification when `ppocr.ocr` func exec                                                                                                                                                                                                   | FALSE                    |
+| cls                     | Enable classification when `ppocr.ocr` func exec((Use use_angle_cls in command line mode to control whether to start classification in the forward direction)                                                                                                                                                                                                   | FALSE                    |
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -26,17 +26,50 @@ import requests
 from tqdm import tqdm
 from tools.infer import predict_system
-from ppocr.utils.utility import initial_logger
+from ppocr.utils.logging import get_logger
-logger = initial_logger()
+logger = get_logger()
 from ppocr.utils.utility import check_and_read_gif, get_image_file_list
 __all__ = ['PaddleOCR']
-model_params = {
+model_urls = {
-    'det': 'https://paddleocr.bj.bcebos.com/ch_models/ch_det_mv3_db_infer.tar',
+    'det':
-    'rec':
+        'https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar',
-    'https://paddleocr.bj.bcebos.com/ch_models/ch_rec_mv3_crnn_enhance_infer.tar',
+    'rec': {
+        'ch': {
+            'url':
+                'https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar',
+            'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
+        },
+        'en': {
+            'url':
+                'https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_infer.tar',
+            'dict_path': './ppocr/utils/ic15_dict.txt'
+        },
+        'french': {
+            'url':
+                'https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_infer.tar',
+            'dict_path': './ppocr/utils/dict/french_dict.txt'
+        },
+        'german': {
+            'url':
+                'https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_infer.tar',
+            'dict_path': './ppocr/utils/dict/german_dict.txt'
+        },
+        'korean': {
+            'url':
+                'https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_infer.tar',
+            'dict_path': './ppocr/utils/dict/korean_dict.txt'
+        },
+        'japan': {
+            'url':
+                'https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_infer.tar',
+            'dict_path': './ppocr/utils/dict/japan_dict.txt'
+        }
+    },
+    'cls':
+        'https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar'
 }
 SUPPORT_DET_MODEL = ['DB']
@@ -54,8 +87,8 @@ def download_with_progressbar(url, save_path):
            progress_bar.update(len(data))
            file.write(data)
    progress_bar.close()
-    if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
+    if total_size_in_bytes == 0 or progress_bar.n != total_size_in_bytes:
-        logger.error("ERROR, something went wrong")
+        logger.error("Something went wrong while downloading models")
        sys.exit(0)
@@ -63,7 +96,7 @@ def maybe_download(model_storage_directory, url):
    # using custom model
    if not os.path.exists(os.path.join(
            model_storage_directory, 'model')) or not os.path.exists(
-                os.path.join(model_storage_directory, 'params')):
+        os.path.join(model_storage_directory, 'params')):
        tmp_path = os.path.join(model_storage_directory, url.split('/')[-1])
        print('download {} to {}'.format(url, tmp_path))
        os.makedirs(model_storage_directory, exist_ok=True)
@@ -84,53 +117,102 @@ def maybe_download(model_storage_directory, url):
        os.remove(tmp_path)
-def parse_args():
+def parse_args(mMain=True, add_help=True):
    import argparse
    def str2bool(v):
        return v.lower() in ("true", "t", "1")
-    parser = argparse.ArgumentParser()
+    if mMain:
-    # params for prediction engine
+        parser = argparse.ArgumentParser(add_help=add_help)
-    parser.add_argument("--use_gpu", type=str2bool, default=True)
+        # params for prediction engine
-    parser.add_argument("--ir_optim", type=str2bool, default=True)
+        parser.add_argument("--use_gpu", type=str2bool, default=True)
-    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
+        parser.add_argument("--ir_optim", type=str2bool, default=True)
-    parser.add_argument("--gpu_mem", type=int, default=8000)
+        parser.add_argument("--use_tensorrt", type=str2bool, default=False)
+        parser.add_argument("--gpu_mem", type=int, default=8000)
-    # params for text detector
-    parser.add_argument("--image_dir", type=str)
+        # params for text detector
-    parser.add_argument("--det_algorithm", type=str, default='DB')
+        parser.add_argument("--image_dir", type=str)
-    parser.add_argument("--det_model_dir", type=str, default=None)
+        parser.add_argument("--det_algorithm", type=str, default='DB')
-    parser.add_argument("--det_max_side_len", type=float, default=960)
+        parser.add_argument("--det_model_dir", type=str, default=None)
+        parser.add_argument("--det_limit_side_len", type=float, default=960)
-    # DB parmas
+        parser.add_argument("--det_limit_type", type=str, default='max')
-    parser.add_argument("--det_db_thresh", type=float, default=0.3)
-    parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
+        # DB parmas
-    parser.add_argument("--det_db_unclip_ratio", type=float, default=2.0)
+        parser.add_argument("--det_db_thresh", type=float, default=0.3)
+        parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
-    # EAST parmas
+        parser.add_argument("--det_db_unclip_ratio", type=float, default=2.0)
-    parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
-    parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
+        # EAST parmas
-    parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
+        parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
+        parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
-    # params for text recognizer
+        parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
-    parser.add_argument("--rec_algorithm", type=str, default='CRNN')
-    parser.add_argument("--rec_model_dir", type=str, default=None)
+        # params for text recognizer
-    parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
+        parser.add_argument("--rec_algorithm", type=str, default='CRNN')
-    parser.add_argument("--rec_char_type", type=str, default='ch')
+        parser.add_argument("--rec_model_dir", type=str, default=None)
-    parser.add_argument("--rec_batch_num", type=int, default=30)
+        parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
-    parser.add_argument("--max_text_length", type=int, default=25)
+        parser.add_argument("--rec_char_type", type=str, default='ch')
-    parser.add_argument(
+        parser.add_argument("--rec_batch_num", type=int, default=30)
-        "--rec_char_dict_path",
+        parser.add_argument("--max_text_length", type=int, default=25)
-        type=str,
+        parser.add_argument("--rec_char_dict_path", type=str, default=None)
-        default="./ppocr/utils/ppocr_keys_v1.txt")
+        parser.add_argument("--use_space_char", type=bool, default=True)
-    parser.add_argument("--use_space_char", type=bool, default=True)
+        parser.add_argument("--drop_score", type=float, default=0.5)
-    parser.add_argument("--enable_mkldnn", type=bool, default=False)
+        # params for text classifier
-    parser.add_argument("--det", type=str2bool, default=True)
+        parser.add_argument("--cls_model_dir", type=str, default=None)
-    parser.add_argument("--rec", type=str2bool, default=True)
+        parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
-    parser.add_argument("--use_zero_copy_run", type=bool, default=False)
+        parser.add_argument("--label_list", type=list, default=['0', '180'])
-    return parser.parse_args()
+        parser.add_argument("--cls_batch_num", type=int, default=30)
+        parser.add_argument("--cls_thresh", type=float, default=0.9)
+        parser.add_argument("--enable_mkldnn", type=bool, default=False)
+        parser.add_argument("--use_zero_copy_run", type=bool, default=False)
+        parser.add_argument("--use_pdserving", type=str2bool, default=False)
+        parser.add_argument("--lang", type=str, default='ch')
+        parser.add_argument("--det", type=str2bool, default=True)
+        parser.add_argument("--rec", type=str2bool, default=True)
+        parser.add_argument("--use_angle_cls", type=str2bool, default=False)
+        return parser.parse_args()
+    else:
+        return argparse.Namespace(use_gpu=True,
+                                  ir_optim=True,
+                                  use_tensorrt=False,
+                                  gpu_mem=8000,
+                                  image_dir='',
+                                  det_algorithm='DB',
+                                  det_model_dir=None,
+                                  det_limit_side_len=960,
+                                  det_limit_type='max',
+                                  det_db_thresh=0.3,
+                                  det_db_box_thresh=0.5,
+                                  det_db_unclip_ratio=2.0,
+                                  det_east_score_thresh=0.8,
+                                  det_east_cover_thresh=0.1,
+                                  det_east_nms_thresh=0.2,
+                                  rec_algorithm='CRNN',
+                                  rec_model_dir=None,
+                                  rec_image_shape="3, 32, 320",
+                                  rec_char_type='ch',
+                                  rec_batch_num=30,
+                                  max_text_length=25,
+                                  rec_char_dict_path=None,
+                                  use_space_char=True,
+                                  drop_score=0.5,
+                                  cls_model_dir=None,
+                                  cls_image_shape="3, 48, 192",
+                                  label_list=['0', '180'],
+                                  cls_batch_num=30,
+                                  cls_thresh=0.9,
+                                  enable_mkldnn=False,
+                                  use_zero_copy_run=False,
+                                  use_pdserving=False,
+                                  lang='ch',
+                                  det=True,
+                                  rec=True,
+                                  use_angle_cls=False
+                                  )
 class PaddleOCR(predict_system.TextSystem):
@@ -140,18 +222,31 @@ class PaddleOCR(predict_system.TextSystem):
        args:
            **kwargs: other params show in paddleocr --help
        """
-        postprocess_params = parse_args()
+        postprocess_params = parse_args(mMain=False, add_help=False)
        postprocess_params.__dict__.update(**kwargs)
+        self.use_angle_cls = postprocess_params.use_angle_cls
+        lang = postprocess_params.lang
+        assert lang in model_urls[
+            'rec'], 'param lang must in {}, but got {}'.format(
+            model_urls['rec'].keys(), lang)
+        if postprocess_params.rec_char_dict_path is None:
+            postprocess_params.rec_char_dict_path = model_urls['rec'][lang][
+                'dict_path']
        # init model dir
        if postprocess_params.det_model_dir is None:
            postprocess_params.det_model_dir = os.path.join(BASE_DIR, 'det')
        if postprocess_params.rec_model_dir is None:
-            postprocess_params.rec_model_dir = os.path.join(BASE_DIR, 'rec')
+            postprocess_params.rec_model_dir = os.path.join(
+                BASE_DIR, 'rec/{}'.format(lang))
+        if postprocess_params.cls_model_dir is None:
+            postprocess_params.cls_model_dir = os.path.join(BASE_DIR, 'cls')
        print(postprocess_params)
        # download model
-        maybe_download(postprocess_params.det_model_dir, model_params['det'])
+        maybe_download(postprocess_params.det_model_dir, model_urls['det'])
-        maybe_download(postprocess_params.rec_model_dir, model_params['rec'])
+        maybe_download(postprocess_params.rec_model_dir,
+                       model_urls['rec'][lang]['url'])
+        maybe_download(postprocess_params.cls_model_dir, model_urls['cls'])
        if postprocess_params.det_algorithm not in SUPPORT_DET_MODEL:
            logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
@@ -166,7 +261,7 @@ class PaddleOCR(predict_system.TextSystem):
        # init det_model and rec_model
        super().__init__(postprocess_params)
-    def ocr(self, img, det=True, rec=True):
+    def ocr(self, img, det=True, rec=True, cls=False):
        """
        ocr with paddleocr
        args：
@@ -175,7 +270,16 @@ class PaddleOCR(predict_system.TextSystem):
            rec: use text recognition or not, if false, only det will be exec. default is True
        """
        assert isinstance(img, (np.ndarray, list, str))
+        if isinstance(img, list) and det == True:
+            logger.error('When input a list of images, det must be false')
+            exit(0)
+        self.use_angle_cls = cls
        if isinstance(img, str):
+            # download net image
+            if img.startswith('http'):
+                download_with_progressbar(img, 'tmp.jpg')
+                img = 'tmp.jpg'
            image_file = img
            img, flag = check_and_read_gif(image_file)
            if not flag:
@@ -183,6 +287,8 @@ class PaddleOCR(predict_system.TextSystem):
            if img is None:
                logger.error("error in loading image:{}".format(image_file))
                return None
+        if isinstance(img, np.ndarray) and len(img.shape) == 2:
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
        if det and rec:
            dt_boxes, rec_res = self.__call__(img)
            return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
@@ -194,20 +300,34 @@ class PaddleOCR(predict_system.TextSystem):
        else:
            if not isinstance(img, list):
                img = [img]
+            if self.use_angle_cls:
+                img, cls_res, elapse = self.text_classifier(img)
+                if not rec:
+                    return cls_res
            rec_res, elapse = self.text_recognizer(img)
            return rec_res
 def main():
-    # for com
+    # for cmd
-    args = parse_args()
+    args = parse_args(mMain=True)
-    image_file_list = get_image_file_list(args.image_dir)
+    image_dir = args.image_dir
+    if image_dir.startswith('http'):
+        download_with_progressbar(image_dir, 'tmp.jpg')
+        image_file_list = ['tmp.jpg']
+    else:
+        image_file_list = get_image_file_list(args.image_dir)
    if len(image_file_list) == 0:
        logger.error('no images find in {}'.format(args.image_dir))
        return
-    ocr_engine = PaddleOCR()
+    ocr_engine = PaddleOCR(**(args.__dict__))
    for img_path in image_file_list:
-        print(img_path)
+        logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
-        result = ocr_engine.ocr(img_path, det=args.det, rec=args.rec)
+        result = ocr_engine.ocr(img_path,
-        for line in result:
+                                det=args.det,
-            print(line)
+                                rec=args.rec,
\ No newline at end of file
+                                cls=args.use_angle_cls)
+        if result is not None:
+            for line in result:
+                logger.info(line)
--- a/ppocr/data/imaug/__init__.py
+++ b/ppocr/data/imaug/__init__.py
@@ -26,6 +26,9 @@ from .randaugment import RandAugment
 from .operators import *
 from .label_ops import *
+from .east_process import *
+from .sast_process import *
 def transform(data, ops=None):
    """ transform """

--- a/ppocr/data/imaug/east_process.py
+++ b/ppocr/data/imaug/east_process.py
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+import math
+import cv2
+import numpy as np
+import json
+import sys
+import os
+__all__ = ['EASTProcessTrain']
+class EASTProcessTrain(object):
+    def __init__(self,
+                 image_shape = [512, 512],
+                 background_ratio = 0.125,
+                 min_crop_side_ratio = 0.1,
+                 min_text_size = 10,
+                 **kwargs):
+        self.input_size = image_shape[1]
+        self.random_scale = np.array([0.5, 1, 2.0, 3.0])
+        self.background_ratio = background_ratio
+        self.min_crop_side_ratio = min_crop_side_ratio
+        self.min_text_size = min_text_size
+    def preprocess(self, im):
+        input_size = self.input_size
+        im_shape = im.shape
+        im_size_min = np.min(im_shape[0:2])
+        im_size_max = np.max(im_shape[0:2])
+        im_scale = float(input_size) / float(im_size_max)
+        im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale)
+        img_mean = [0.485, 0.456, 0.406]
+        img_std = [0.229, 0.224, 0.225]
+        # im = im[:, :, ::-1].astype(np.float32)
+        im = im / 255
+        im -= img_mean
+        im /= img_std
+        new_h, new_w, _ = im.shape
+        im_padded = np.zeros((input_size, input_size, 3), dtype=np.float32)
+        im_padded[:new_h, :new_w, :] = im
+        im_padded = im_padded.transpose((2, 0, 1))
+        im_padded = im_padded[np.newaxis, :]
+        return im_padded, im_scale
+    def rotate_im_poly(self, im, text_polys):
+        """
+        rotate image with 90 / 180 / 270 degre
+        """
+        im_w, im_h = im.shape[1], im.shape[0]
+        dst_im = im.copy()
+        dst_polys = []
+        rand_degree_ratio = np.random.rand()
+        rand_degree_cnt = 1
+        if 0.333 < rand_degree_ratio < 0.666:
+            rand_degree_cnt = 2
+        elif rand_degree_ratio > 0.666:
+            rand_degree_cnt = 3
+        for i in range(rand_degree_cnt):
+            dst_im = np.rot90(dst_im)
+        rot_degree = -90 * rand_degree_cnt
+        rot_angle = rot_degree * math.pi / 180.0
+        n_poly = text_polys.shape[0]
+        cx, cy = 0.5 * im_w, 0.5 * im_h
+        ncx, ncy = 0.5 * dst_im.shape[1], 0.5 * dst_im.shape[0]
+        for i in range(n_poly):
+            wordBB = text_polys[i]
+            poly = []
+            for j in range(4):
+                sx, sy = wordBB[j][0], wordBB[j][1]
+                dx = math.cos(rot_angle) * (sx - cx)\
+                    - math.sin(rot_angle) * (sy - cy) + ncx
+                dy = math.sin(rot_angle) * (sx - cx)\
+                    + math.cos(rot_angle) * (sy - cy) + ncy
+                poly.append([dx, dy])
+            dst_polys.append(poly)
+        dst_polys = np.array(dst_polys, dtype=np.float32)
+        return dst_im, dst_polys
+    def polygon_area(self, poly):
+        """
+        compute area of a polygon
+        :param poly:
+        :return:
+        """
+        edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]),
+                (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]),
+                (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]),
+                (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])]
+        return np.sum(edge) / 2.
+    def check_and_validate_polys(self, polys, tags, img_height, img_width):
+        """
+        check so that the text poly is in the same direction,
+        and also filter some invalid polygons
+        :param polys:
+        :param tags:
+        :return:
+        """
+        h, w = img_height, img_width
+        if polys.shape[0] == 0:
+            return polys
+        polys[:, :, 0] = np.clip(polys[:, :, 0], 0, w - 1)
+        polys[:, :, 1] = np.clip(polys[:, :, 1], 0, h - 1)
+        validated_polys = []
+        validated_tags = []
+        for poly, tag in zip(polys, tags):
+            p_area = self.polygon_area(poly)
+            #invalid poly
+            if abs(p_area) < 1:
+                continue
+            if p_area > 0:
+                #'poly in wrong direction'
+                if not tag:
+                    tag = True  #reversed cases should be ignore
+                poly = poly[(0, 3, 2, 1), :]
+            validated_polys.append(poly)
+            validated_tags.append(tag)
+        return np.array(validated_polys), np.array(validated_tags)
+    def draw_img_polys(self, img, polys):
+        if len(img.shape) == 4:
+            img = np.squeeze(img, axis=0)
+        if img.shape[0] == 3:
+            img = img.transpose((1, 2, 0))
+            img[:, :, 2] += 123.68
+            img[:, :, 1] += 116.78
+            img[:, :, 0] += 103.94
+        cv2.imwrite("tmp.jpg", img)
+        img = cv2.imread("tmp.jpg")
+        for box in polys:
+            box = box.astype(np.int32).reshape((-1, 1, 2))
+            cv2.polylines(img, [box], True, color=(255, 255, 0), thickness=2)
+        import random
+        ino = random.randint(0, 100)
+        cv2.imwrite("tmp_%d.jpg" % ino, img)
+        return
+    def shrink_poly(self, poly, r):
+        """
+        fit a poly inside the origin poly, maybe bugs here...
+        used for generate the score map
+        :param poly: the text poly
+        :param r: r in the paper
+        :return: the shrinked poly
+        """
+        # shrink ratio
+        R = 0.3
+        # find the longer pair
+        dist0 = np.linalg.norm(poly[0] - poly[1])
+        dist1 = np.linalg.norm(poly[2] - poly[3])
+        dist2 = np.linalg.norm(poly[0] - poly[3])
+        dist3 = np.linalg.norm(poly[1] - poly[2])
+        if dist0 + dist1 > dist2 + dist3:
+            # first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2)
+            ## p0, p1
+            theta = np.arctan2((poly[1][1] - poly[0][1]),
+                               (poly[1][0] - poly[0][0]))
+            poly[0][0] += R * r[0] * np.cos(theta)
+            poly[0][1] += R * r[0] * np.sin(theta)
+            poly[1][0] -= R * r[1] * np.cos(theta)
+            poly[1][1] -= R * r[1] * np.sin(theta)
+            ## p2, p3
+            theta = np.arctan2((poly[2][1] - poly[3][1]),
+                               (poly[2][0] - poly[3][0]))
+            poly[3][0] += R * r[3] * np.cos(theta)
+            poly[3][1] += R * r[3] * np.sin(theta)
+            poly[2][0] -= R * r[2] * np.cos(theta)
+            poly[2][1] -= R * r[2] * np.sin(theta)
+            ## p0, p3
+            theta = np.arctan2((poly[3][0] - poly[0][0]),
+                               (poly[3][1] - poly[0][1]))
+            poly[0][0] += R * r[0] * np.sin(theta)
+            poly[0][1] += R * r[0] * np.cos(theta)
+            poly[3][0] -= R * r[3] * np.sin(theta)
+            poly[3][1] -= R * r[3] * np.cos(theta)
+            ## p1, p2
+            theta = np.arctan2((poly[2][0] - poly[1][0]),
+                               (poly[2][1] - poly[1][1]))
+            poly[1][0] += R * r[1] * np.sin(theta)
+            poly[1][1] += R * r[1] * np.cos(theta)
+            poly[2][0] -= R * r[2] * np.sin(theta)
+            poly[2][1] -= R * r[2] * np.cos(theta)
+        else:
+            ## p0, p3
+            # print poly
+            theta = np.arctan2((poly[3][0] - poly[0][0]),
+                               (poly[3][1] - poly[0][1]))
+            poly[0][0] += R * r[0] * np.sin(theta)
+            poly[0][1] += R * r[0] * np.cos(theta)
+            poly[3][0] -= R * r[3] * np.sin(theta)
+            poly[3][1] -= R * r[3] * np.cos(theta)
+            ## p1, p2
+            theta = np.arctan2((poly[2][0] - poly[1][0]),
+                               (poly[2][1] - poly[1][1]))
+            poly[1][0] += R * r[1] * np.sin(theta)
+            poly[1][1] += R * r[1] * np.cos(theta)
+            poly[2][0] -= R * r[2] * np.sin(theta)
+            poly[2][1] -= R * r[2] * np.cos(theta)
+            ## p0, p1
+            theta = np.arctan2((poly[1][1] - poly[0][1]),
+                               (poly[1][0] - poly[0][0]))
+            poly[0][0] += R * r[0] * np.cos(theta)
+            poly[0][1] += R * r[0] * np.sin(theta)
+            poly[1][0] -= R * r[1] * np.cos(theta)
+            poly[1][1] -= R * r[1] * np.sin(theta)
+            ## p2, p3
+            theta = np.arctan2((poly[2][1] - poly[3][1]),
+                               (poly[2][0] - poly[3][0]))
+            poly[3][0] += R * r[3] * np.cos(theta)
+            poly[3][1] += R * r[3] * np.sin(theta)
+            poly[2][0] -= R * r[2] * np.cos(theta)
+            poly[2][1] -= R * r[2] * np.sin(theta)
+        return poly
+    def generate_quad(self, im_size, polys, tags):
+        """
+        Generate quadrangle.
+        """
+        h, w = im_size
+        poly_mask = np.zeros((h, w), dtype=np.uint8)
+        score_map = np.zeros((h, w), dtype=np.uint8)
+        # (x1, y1, ..., x4, y4, short_edge_norm)
+        geo_map = np.zeros((h, w, 9), dtype=np.float32)
+        # mask used during traning, to ignore some hard areas
+        training_mask = np.ones((h, w), dtype=np.uint8)
+        for poly_idx, poly_tag in enumerate(zip(polys, tags)):
+            poly = poly_tag[0]
+            tag = poly_tag[1]
+            r = [None, None, None, None]
+            for i in range(4):
+                dist1 = np.linalg.norm(poly[i] - poly[(i + 1) % 4])
+                dist2 = np.linalg.norm(poly[i] - poly[(i - 1) % 4])
+                r[i] = min(dist1, dist2)
+            # score map
+            shrinked_poly = self.shrink_poly(
+                poly.copy(), r).astype(np.int32)[np.newaxis, :, :]
+            cv2.fillPoly(score_map, shrinked_poly, 1)
+            cv2.fillPoly(poly_mask, shrinked_poly, poly_idx + 1)
+            # if the poly is too small, then ignore it during training
+            poly_h = min(
+                np.linalg.norm(poly[0] - poly[3]),
+                np.linalg.norm(poly[1] - poly[2]))
+            poly_w = min(
+                np.linalg.norm(poly[0] - poly[1]),
+                np.linalg.norm(poly[2] - poly[3]))
+            if min(poly_h, poly_w) < self.min_text_size:
+                cv2.fillPoly(training_mask,
+                             poly.astype(np.int32)[np.newaxis, :, :], 0)
+            if tag:
+                cv2.fillPoly(training_mask,
+                             poly.astype(np.int32)[np.newaxis, :, :], 0)
+            xy_in_poly = np.argwhere(poly_mask == (poly_idx + 1))
+            # geo map.
+            y_in_poly = xy_in_poly[:, 0]
+            x_in_poly = xy_in_poly[:, 1]
+            poly[:, 0] = np.minimum(np.maximum(poly[:, 0], 0), w)
+            poly[:, 1] = np.minimum(np.maximum(poly[:, 1], 0), h)
+            for pno in range(4):
+                geo_channel_beg = pno * 2
+                geo_map[y_in_poly, x_in_poly, geo_channel_beg] =\
+                    x_in_poly - poly[pno, 0]
+                geo_map[y_in_poly, x_in_poly, geo_channel_beg+1] =\
+                    y_in_poly - poly[pno, 1]
+            geo_map[y_in_poly, x_in_poly, 8] = \
+                1.0 / max(min(poly_h, poly_w), 1.0)
+        return score_map, geo_map, training_mask
+    def crop_area(self,
+                  im,
+                  polys,
+                  tags,
+                  crop_background=False,
+                  max_tries=50):
+        """
+        make random crop from the input image
+        :param im:
+        :param polys:
+        :param tags:
+        :param crop_background:
+        :param max_tries:
+        :return:
+        """
+        h, w, _ = im.shape
+        pad_h = h // 10
+        pad_w = w // 10
+        h_array = np.zeros((h + pad_h * 2), dtype=np.int32)
+        w_array = np.zeros((w + pad_w * 2), dtype=np.int32)
+        for poly in polys:
+            poly = np.round(poly, decimals=0).astype(np.int32)
+            minx = np.min(poly[:, 0])
+            maxx = np.max(poly[:, 0])
+            w_array[minx + pad_w:maxx + pad_w] = 1
+            miny = np.min(poly[:, 1])
+            maxy = np.max(poly[:, 1])
+            h_array[miny + pad_h:maxy + pad_h] = 1
+        # ensure the cropped area not across a text
+        h_axis = np.where(h_array == 0)[0]
+        w_axis = np.where(w_array == 0)[0]
+        if len(h_axis) == 0 or len(w_axis) == 0:
+            return im, polys, tags
+        for i in range(max_tries):
+            xx = np.random.choice(w_axis, size=2)
+            xmin = np.min(xx) - pad_w
+            xmax = np.max(xx) - pad_w
+            xmin = np.clip(xmin, 0, w - 1)
+            xmax = np.clip(xmax, 0, w - 1)
+            yy = np.random.choice(h_axis, size=2)
+            ymin = np.min(yy) - pad_h
+            ymax = np.max(yy) - pad_h
+            ymin = np.clip(ymin, 0, h - 1)
+            ymax = np.clip(ymax, 0, h - 1)
+            if xmax - xmin < self.min_crop_side_ratio * w or \
+               ymax - ymin < self.min_crop_side_ratio * h:
+                # area too small
+                continue
+            if polys.shape[0] != 0:
+                poly_axis_in_area = (polys[:, :, 0] >= xmin)\
+                    & (polys[:, :, 0] <= xmax)\
+                    & (polys[:, :, 1] >= ymin)\
+                    & (polys[:, :, 1] <= ymax)
+                selected_polys = np.where(
+                    np.sum(poly_axis_in_area, axis=1) == 4)[0]
+            else:
+                selected_polys = []
+            if len(selected_polys) == 0:
+                # no text in this area
+                if crop_background:
+                    im = im[ymin:ymax + 1, xmin:xmax + 1, :]
+                    polys = []
+                    tags = []
+                    return im, polys, tags
+                else:
+                    continue
+            im = im[ymin:ymax + 1, xmin:xmax + 1, :]
+            polys = polys[selected_polys]
+            tags = tags[selected_polys]
+            polys[:, :, 0] -= xmin
+            polys[:, :, 1] -= ymin
+            return im, polys, tags
+        return im, polys, tags
+    def crop_background_infor(self, im, text_polys, text_tags):
+        im, text_polys, text_tags = self.crop_area(
+            im, text_polys, text_tags, crop_background=True)
+        if len(text_polys) > 0:
+            return None
+        # pad and resize image
+        input_size = self.input_size
+        im, ratio = self.preprocess(im)
+        score_map = np.zeros((input_size, input_size), dtype=np.float32)
+        geo_map = np.zeros((input_size, input_size, 9), dtype=np.float32)
+        training_mask = np.ones((input_size, input_size), dtype=np.float32)
+        return im, score_map, geo_map, training_mask
+    def crop_foreground_infor(self, im, text_polys, text_tags):
+        im, text_polys, text_tags = self.crop_area(
+            im, text_polys, text_tags, crop_background=False)
+        if text_polys.shape[0] == 0:
+            return None
+        #continue for all ignore case
+        if np.sum((text_tags * 1.0)) >= text_tags.size:
+            return None
+        # pad and resize image
+        input_size = self.input_size
+        im, ratio = self.preprocess(im)
+        text_polys[:, :, 0] *= ratio
+        text_polys[:, :, 1] *= ratio
+        _, _, new_h, new_w = im.shape
+        #         print(im.shape)
+        #         self.draw_img_polys(im, text_polys)
+        score_map, geo_map, training_mask = self.generate_quad(
+            (new_h, new_w), text_polys, text_tags)
+        return im, score_map, geo_map, training_mask
+    def __call__(self, data):
+        im = data['image']
+        text_polys = data['polys']
+        text_tags = data['ignore_tags']
+        if im is None:
+            return None
+        if text_polys.shape[0] == 0:
+            return None
+        #add rotate cases
+        if np.random.rand() < 0.5:
+            im, text_polys = self.rotate_im_poly(im, text_polys)
+        h, w, _ = im.shape
+        text_polys, text_tags = self.check_and_validate_polys(text_polys,
+                                                              text_tags, h, w)
+        if text_polys.shape[0] == 0:
+            return None
+        # random scale this image
+        rd_scale = np.random.choice(self.random_scale)
+        im = cv2.resize(im, dsize=None, fx=rd_scale, fy=rd_scale)
+        text_polys *= rd_scale
+        if np.random.rand() < self.background_ratio:
+            outs = self.crop_background_infor(im, text_polys, text_tags)
+        else:
+            outs = self.crop_foreground_infor(im, text_polys, text_tags)
+        if outs is None:
+            return None
+        im, score_map, geo_map, training_mask = outs
+        score_map = score_map[np.newaxis, ::4, ::4].astype(np.float32)
+        geo_map = np.swapaxes(geo_map, 1, 2)
+        geo_map = np.swapaxes(geo_map, 1, 0)
+        geo_map = geo_map[:, ::4, ::4].astype(np.float32)
+        training_mask = training_mask[np.newaxis, ::4, ::4]
+        training_mask = training_mask.astype(np.float32)
+        data['image'] = im[0]
+        data['score_map'] = score_map
+        data['geo_map'] = geo_map
+        data['training_mask'] = training_mask
+        # print(im.shape, score_map.shape, geo_map.shape, training_mask.shape)
+        return data
\ No newline at end of file
--- a/ppocr/data/imaug/label_ops.py
+++ b/ppocr/data/imaug/label_ops.py
@@ -52,6 +52,7 @@ class DetLabelEncode(object):
                txt_tags.append(True)
            else:
                txt_tags.append(False)
+        boxes = self.expand_points_num(boxes)
        boxes = np.array(boxes, dtype=np.float32)
        txt_tags = np.array(txt_tags, dtype=np.bool)
@@ -70,6 +71,17 @@ class DetLabelEncode(object):
        rect[3] = pts[np.argmax(diff)]
        return rect
+    def expand_points_num(self, boxes):
+        max_points_num = 0
+        for box in boxes:
+            if len(box) > max_points_num:
+                max_points_num = len(box)
+        ex_boxes = []
+        for box in boxes:
+            ex_box = box + [box[-1]] * (max_points_num - len(box))
+            ex_boxes.append(ex_box)
+        return ex_boxes
 class BaseRecLabelEncode(object):
    """ Convert between text-label and text-index """
@@ -79,7 +91,9 @@ class BaseRecLabelEncode(object):
                 character_dict_path=None,
                 character_type='ch',
                 use_space_char=False):
-        support_character_type = ['ch', 'en', 'en_sensitive']
+        support_character_type = [
+            'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean'
+        ]
        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
            support_character_type, self.character_str)
@@ -87,7 +101,7 @@ class BaseRecLabelEncode(object):
        if character_type == "en":
            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
            dict_character = list(self.character_str)
-        elif character_type == "ch":
+        elif character_type in ["ch", "french", "german", "japan", "korean"]:
            self.character_str = ""
            assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch"
            with open(character_dict_path, "rb") as fin:

--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -120,26 +120,37 @@ class DetResizeForTest(object):
        if 'limit_side_len' in kwargs:
            self.limit_side_len = kwargs['limit_side_len']
            self.limit_type = kwargs.get('limit_type', 'min')
+        if 'resize_long' in kwargs:
+            self.resize_type = 2
+            self.resize_long = kwargs.get('resize_long', 960)
        else:
            self.limit_side_len = 736
            self.limit_type = 'min'
    def __call__(self, data):
        img = data['image']
+        src_h, src_w, _ = img.shape
        if self.resize_type == 0:
-            img, shape = self.resize_image_type0(img)
+            # img, shape = self.resize_image_type0(img)
+            img, [ratio_h, ratio_w] = self.resize_image_type0(img)
+        elif self.resize_type == 2:
+            img, [ratio_h, ratio_w] = self.resize_image_type2(img)
        else:
-            img, shape = self.resize_image_type1(img)
+            # img, shape = self.resize_image_type1(img)
+            img, [ratio_h, ratio_w] = self.resize_image_type1(img)
        data['image'] = img
-        data['shape'] = shape
+        data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
        return data
    def resize_image_type1(self, img):
        resize_h, resize_w = self.image_shape
        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
        img = cv2.resize(img, (int(resize_w), int(resize_h)))
-        return img, np.array([ori_h, ori_w])
+        # return img, np.array([ori_h, ori_w])
+        return img, [ratio_h, ratio_w]
    def resize_image_type0(self, img):
        """
@@ -182,4 +193,31 @@ class DetResizeForTest(object):
        except:
            print(img.shape, resize_w, resize_h)
            sys.exit(0)
-        return img, np.array([h, w])
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        # return img, np.array([h, w])
+        return img, [ratio_h, ratio_w]
+    def resize_image_type2(self, img):
+        h, w, _ = img.shape
+        resize_w = w
+        resize_h = h
+        # Fix the longer side
+        if resize_h > resize_w:
+            ratio = float(self.resize_long) / resize_h
+        else:
+            ratio = float(self.resize_long) / resize_w
+        resize_h = int(resize_h * ratio)
+        resize_w = int(resize_w * ratio)
+        max_stride = 128
+        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
+        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        ratio_h = resize_h / float(h)
+        ratio_w = resize_w / float(w)
+        return img, [ratio_h, ratio_w]
--- a/ppocr/data/imaug/sast_process.py
+++ b/ppocr/data/imaug/sast_process.py
--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -18,6 +18,8 @@ import copy
 def build_loss(config):
    # det loss
    from .det_db_loss import DBLoss
+    from .det_east_loss import EASTLoss
+    from .det_sast_loss import SASTLoss
    # rec loss
    from .rec_ctc_loss import CTCLoss
@@ -25,7 +27,7 @@ def build_loss(config):
    # cls loss
    from .cls_loss import ClsLoss
-    support_dict = ['DBLoss', 'CTCLoss', 'ClsLoss']
+    support_dict = ['DBLoss', 'EASTLoss', 'SASTLoss', 'CTCLoss', 'ClsLoss']
    config = copy.deepcopy(config)
    module_name = config.pop('name')

--- a/ppocr/losses/det_east_loss.py
+++ b/ppocr/losses/det_east_loss.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+from paddle import nn
+from .det_basic_loss import DiceLoss
+class EASTLoss(nn.Layer):
+    """
+    """
+    def __init__(self,
+                 eps=1e-6,
+                 **kwargs):
+        super(EASTLoss, self).__init__()
+        self.dice_loss = DiceLoss(eps=eps)
+    def forward(self, predicts, labels):
+        l_score, l_geo, l_mask = labels[1:]
+        f_score = predicts['f_score']
+        f_geo = predicts['f_geo']
+        dice_loss = self.dice_loss(f_score, l_score, l_mask)
+        #smoooth_l1_loss
+        channels = 8
+        l_geo_split = paddle.split(
+            l_geo, num_or_sections=channels + 1, axis=1)
+        f_geo_split = paddle.split(f_geo, num_or_sections=channels, axis=1)
+        smooth_l1 = 0
+        for i in range(0, channels):
+            geo_diff = l_geo_split[i] - f_geo_split[i]
+            abs_geo_diff = paddle.abs(geo_diff)
+            smooth_l1_sign = paddle.less_than(abs_geo_diff, l_score)
+            smooth_l1_sign = paddle.cast(smooth_l1_sign, dtype='float32')
+            in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
+                (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
+            out_loss = l_geo_split[-1] / channels * in_loss * l_score
+            smooth_l1 += out_loss
+        smooth_l1_loss = paddle.mean(smooth_l1 * l_score)
+        dice_loss = dice_loss * 0.01
+        total_loss = dice_loss + smooth_l1_loss
+        losses = {"loss":total_loss, \
+                  "dice_loss":dice_loss,\
+                  "smooth_l1_loss":smooth_l1_loss}
+        return losses