Merge remote-tracking branch 'origin/dygraph' into dygraph

fcc70660 · Leif · 80aced81 · 013db618 · fcc70660 · fcc70660
Commit fcc70660 authored May 08, 2022 by Leif
20 changed files
--- a/PPOCRLabel/PPOCRLabel.py
+++ b/PPOCRLabel/PPOCRLabel.py
--- a/PPOCRLabel/libs/dataPartitionDialog.py
+++ b/PPOCRLabel/libs/dataPartitionDialog.py
+try:
+    from PyQt5.QtGui import *
+    from PyQt5.QtCore import *
+    from PyQt5.QtWidgets import *
+except ImportError:
+    from PyQt4.QtGui import *
+    from PyQt4.QtCore import *
+from libs.utils import newIcon
+import time
+import datetime
+import json
+import cv2
+import numpy as np
+BB = QDialogButtonBox
+class DataPartitionDialog(QDialog):
+    def __init__(self, parent=None):
+        super().__init__()
+        self.parnet = parent
+        self.title = 'DATA PARTITION'
+        self.train_ratio = 70
+        self.val_ratio = 15
+        self.test_ratio = 15
+        self.initUI()
+    def initUI(self):
+        self.setWindowTitle(self.title)
+        self.setWindowModality(Qt.ApplicationModal)
+        self.flag_accept = True
+        if self.parnet.lang == 'ch':
+            msg = "导出JSON前请保存所有图像的标注且关闭EXCEL!"
+        else:
+            msg = "Please save all the annotations and close the EXCEL before exporting JSON!"
+        info_msg = QLabel(msg, self)
+        info_msg.setWordWrap(True)
+        info_msg.setStyleSheet("color: red")
+        info_msg.setFont(QFont('Arial', 12))
+        train_lbl = QLabel('Train split: ', self)
+        train_lbl.setFont(QFont('Arial', 15))
+        val_lbl = QLabel('Valid split: ', self)
+        val_lbl.setFont(QFont('Arial', 15))
+        test_lbl = QLabel('Test split: ', self)
+        test_lbl.setFont(QFont('Arial', 15))
+        self.train_input = QLineEdit(self)
+        self.train_input.setFont(QFont('Arial', 15))
+        self.val_input = QLineEdit(self)
+        self.val_input.setFont(QFont('Arial', 15))
+        self.test_input = QLineEdit(self)
+        self.test_input.setFont(QFont('Arial', 15))
+        self.train_input.setText(str(self.train_ratio))
+        self.val_input.setText(str(self.val_ratio))
+        self.test_input.setText(str(self.test_ratio))
+        validator = QIntValidator(0, 100)
+        self.train_input.setValidator(validator)
+        self.val_input.setValidator(validator)
+        self.test_input.setValidator(validator)
+        gridlayout = QGridLayout()
+        gridlayout.addWidget(info_msg, 0, 0, 1, 2)
+        gridlayout.addWidget(train_lbl, 1, 0)
+        gridlayout.addWidget(val_lbl, 2, 0)
+        gridlayout.addWidget(test_lbl, 3, 0)
+        gridlayout.addWidget(self.train_input, 1, 1)
+        gridlayout.addWidget(self.val_input, 2, 1)
+        gridlayout.addWidget(self.test_input, 3, 1)
+        bb = BB(BB.Ok | BB.Cancel, Qt.Horizontal, self)
+        bb.button(BB.Ok).setIcon(newIcon('done'))
+        bb.button(BB.Cancel).setIcon(newIcon('undo'))
+        bb.accepted.connect(self.validate)
+        bb.rejected.connect(self.cancel)
+        gridlayout.addWidget(bb, 4, 0, 1, 2)
+        self.setLayout(gridlayout)
+        self.show()
+    def validate(self):
+        self.flag_accept = True
+        self.accept()
+    def cancel(self):
+        self.flag_accept = False
+        self.reject()
+    def getStatus(self):
+        return self.flag_accept
+    def getDataPartition(self):
+        self.train_ratio = int(self.train_input.text())
+        self.val_ratio = int(self.val_input.text())
+        self.test_ratio = int(self.test_input.text())
+        return self.train_ratio, self.val_ratio, self.test_ratio
+    def closeEvent(self, event):
+        self.flag_accept = False
+        self.reject()
--- a/PPOCRLabel/libs/utils.py
+++ b/PPOCRLabel/libs/utils.py
@@ -161,6 +161,77 @@ def get_rotate_crop_image(img, points):
        print(e)
+def boxPad(box, imgShape, pad : int) -> np.array:
+    """
+    Pad a box with [pad] pixels on each side.
+    """
+    box = np.array(box, dtype=np.int32)
+    box[0][0], box[0][1] = box[0][0] - pad, box[0][1] - pad
+    box[1][0], box[1][1] = box[1][0] + pad, box[1][1] - pad
+    box[2][0], box[2][1] = box[2][0] + pad, box[2][1] + pad
+    box[3][0], box[3][1] = box[3][0] - pad, box[3][1] + pad
+    h, w, _ = imgShape
+    box[:,0] = np.clip(box[:,0], 0, w)
+    box[:,1] = np.clip(box[:,1], 0, h)
+    return box
+def OBB2HBB(obb) -> np.array:
+    """
+    Convert Oriented Bounding Box to Horizontal Bounding Box.
+    """
+    hbb = np.zeros(4, dtype=np.int32)
+    hbb[0] = min(obb[:, 0])
+    hbb[1] = min(obb[:, 1])
+    hbb[2] = max(obb[:, 0])
+    hbb[3] = max(obb[:, 1])
+    return hbb
+def expand_list(merged, html_list):
+    '''
+    Fill blanks according to merged cells
+    '''
+    sr, er, sc, ec = merged
+    for i in range(sr, er):
+        for j in range(sc, ec):
+            html_list[i][j] = None
+    html_list[sr][sc] = ''
+    if ec - sc > 1:
+        html_list[sr][sc] += " colspan={}".format(ec - sc)
+    if er - sr > 1:
+        html_list[sr][sc] += " rowspan={}".format(er - sr)
+    return html_list
+def convert_token(html_list):
+    '''
+    Convert raw html to label format
+    '''
+    token_list = ["<tbody>"]
+    # final html list:
+    for row in html_list:
+        token_list.append("<tr>")
+        for col in row:
+            if col == None:
+                continue
+            elif col == 'td':
+                token_list.extend(["<td>", "</td>"])
+            else:
+                token_list.append("<td")
+                if 'colspan' in col:
+                    _, n = col.split('colspan=')
+                    token_list.append(" colspan=\"{}\"".format(n))
+                if 'rowspan' in col:
+                    _, n = col.split('rowspan=')
+                    token_list.append(" rowspan=\"{}\"".format(n))
+                token_list.extend([">", "</td>"])
+        token_list.append("</tr>")
+    token_list.append("</tbody>")
+    return token_list
 def stepsInfo(lang='en'):
    if lang == 'ch':
        msg = "1. 安装与运行：使用上述命令安装与运行程序。\n" \

--- a/PPOCRLabel/resources/strings/strings-en.properties
+++ b/PPOCRLabel/resources/strings/strings-en.properties
@@ -84,7 +84,7 @@ mhelp=Help
 iconList=Icon List
 detectionBoxposition=Detection box position
 recognitionResult=Recognition result
-creatPolygon=Create Quadrilateral
+creatPolygon=Create PolygonBox
 rotateLeft=Left turn 90 degrees
 rotateRight=Right turn 90 degrees
 drawSquares=Draw Squares
@@ -110,3 +110,6 @@ lockBoxDetail=Lock selected box/Unlock all box
 keyListTitle=Key List
 keyDialogTip=Enter object label
 keyChange=Change Box Key
+TableRecognition=Table Recognition
+cellreRecognition=Cell Re-Recognition
+exportJSON=export JSON(PubTabNet)
--- a/PPOCRLabel/resources/strings/strings-zh-CN.properties
+++ b/PPOCRLabel/resources/strings/strings-zh-CN.properties
@@ -84,7 +84,7 @@ mhelp=帮助
 iconList=缩略图
 detectionBoxposition=检测框位置
 recognitionResult=识别结果
-creatPolygon=四点标注
+creatPolygon=多边形标注
 drawSquares=正方形标注
 rotateLeft=图片左旋转90度
 rotateRight=图片右旋转90度
@@ -109,4 +109,7 @@ lockBox=锁定框/解除锁定框
 lockBoxDetail=若当前没有框处于锁定状态则锁定选中的框，若存在锁定框则解除所有锁定框的锁定状态
 keyListTitle=关键词列表
 keyDialogTip=请输入类型名称
 keyChange=更改Box关键字类别
\ No newline at end of file
+TableRecognition=表格识别
+cellreRecognition=单元格重识别
+exportJSON=导出表格JSON标注
\ No newline at end of file
--- a/README.md
+++ b/README.md
@@ -19,12 +19,9 @@ PaddleOCR aims to create multilingual, awesome, leading, and practical OCR tools
 **Recent updates**
- 2021.12.21 OCR open source online course starts. The lesson starts at 8:30 every night and lasts for ten days. Free registration: https://aistudio.baidu.com/aistudio/course/introduce/25207
+- 2021.12.21 release PaddleOCR v2.4, release 1 text detection algorithm (PSENet), 3 text recognition algorithms (NRTR、SEED、SAR), 1 key information extraction algorithm (SDMGR, [tutorial](./ppstructure/docs/kie_en.md)) and 3 DocVQA algorithms (LayoutLM, LayoutLMv2, LayoutXLM, [tutorial](./ppstructure/vqa)).
- 2021.12.21 release PaddleOCR v2.4, release 1 text detection algorithm (PSENet), 3 text recognition algorithms (NRTR、SEED、SAR), 1 key information extraction algorithm (SDMGR, [tutorial](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.4/ppstructure/docs/kie.md)) and 3 DocVQA algorithms (LayoutLM, LayoutLMv2, LayoutXLM, [tutorial](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.4/ppstructure/vqa)).
+- 2021.9.7 release PaddleOCR v2.3, [PP-OCRv2](./doc/doc_en/ppocr_introduction_en.md#pp-ocrv2) is proposed. The inference speed of PP-OCRv2 is 220% higher than that of PP-OCR server in CPU device. The F-score of PP-OCRv2 is 7% higher than that of PP-OCR mobile.
- PaddleOCR R&D team would like to share the key points of PP-OCRv2, at 20:15 pm on September 8th, [Course Address](https://aistudio.baidu.com/aistudio/education/group/info/6758).
+- 2021.8.3 released PaddleOCR v2.2, add a new structured documents analysis toolkit, i.e., [PP-Structure](./ppstructure/README.md), support layout analysis and table recognition (One-key to export chart images to Excel files).
- 2021.9.7 release PaddleOCR v2.3, [PP-OCRv2](#PP-OCRv2) is proposed. The inference speed of PP-OCRv2 is 220% higher than that of PP-OCR server in CPU device. The F-score of PP-OCRv2 is 7% higher than that of PP-OCR mobile.
- 2021.8.3 released PaddleOCR v2.2, add a new structured documents analysis toolkit, i.e., [PP-Structure](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.2/ppstructure/README.md), support layout analysis and table recognition (One-key to export chart images to Excel files).
- 2021.4.8 release end-to-end text recognition algorithm [PGNet](https://www.aaai.org/AAAI21Papers/AAAI-2885.WangP.pdf) which is published in AAAI 2021. Find tutorial [here](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/pgnet_en.md)；release multi language recognition [models](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/multi_languages_en.md), support more than 80 languages recognition; especically, the performance of [English recognition model](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.1/doc/doc_en/models_list_en.md#English) is Optimized.
 - [more](./doc/doc_en/update_en.md)
@@ -81,7 +78,6 @@ PaddleOCR support a variety of cutting-edge algorithms related to OCR, and devel
 ## Tutorials
 - [Environment Preparation](./doc/doc_en/environment_en.md)
- [Quick Start](./doc/doc_en/quickstart_en.md)
 - [PP-OCR 🔥](./doc/doc_en/ppocr_introduction_en.md)
    - [Quick Start](./doc/doc_en/quickstart_en.md)
    - [Model Zoo](./doc/doc_en/models_en.md)

--- a/README_ch.md
+++ b/README_ch.md
@@ -27,10 +27,9 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 ## 近期更新
- 2021.12.21《动手学OCR · 十讲》课程开讲，12月21日起每晚八点半线上授课！[免费报名地址](https://aistudio.baidu.com/aistudio/course/introduce/25207)。
+- 2021.12.21 发布PaddleOCR v2.4。OCR算法新增1种文本检测算法（PSENet），3种文本识别算法（NRTR、SEED、SAR）；文档结构化算法新增1种关键信息提取算法（SDMGR，[文档](./ppstructure/docs/kie.md)），3种DocVQA算法（LayoutLM、LayoutLMv2，LayoutXLM，[文档](./ppstructure/vqa)）。
- 2021.12.21 发布PaddleOCR v2.4。OCR算法新增1种文本检测算法（PSENet），3种文本识别算法（NRTR、SEED、SAR）；文档结构化算法新增1种关键信息提取算法（SDMGR，[文档](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.4/ppstructure/docs/kie.md)），3种DocVQA算法（LayoutLM、LayoutLMv2，LayoutXLM，[文档](https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.4/ppstructure/vqa)）。
+- 2021.9.7 发布PaddleOCR v2.3与[PP-OCRv2](./doc/doc_ch/ppocr_introduction.md#pp-ocrv2)，CPU推理速度相比于PP-OCR server提升220%；效果相比于PP-OCR mobile 提升7%。
- 2021.9.7 发布PaddleOCR v2.3与[PP-OCRv2](#PP-OCRv2)，CPU推理速度相比于PP-OCR server提升220%；效果相比于PP-OCR mobile 提升7%。
+- 2021.8.3 发布PaddleOCR v2.2，新增文档结构分析[PP-Structure](./ppstructure/README_ch.md)工具包，支持版面分析与表格识别（含Excel导出）。
- 2021.8.3 发布PaddleOCR v2.2，新增文档结构分析[PP-Structure](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.2/ppstructure/README_ch.md)工具包，支持版面分析与表格识别（含Excel导出）。
 > [更多](./doc/doc_ch/update.md)
@@ -83,7 +82,6 @@ PaddleOCR旨在打造一套丰富、领先、且实用的OCR工具库，助力
 ## 文档教程
 - [运行环境准备](./doc/doc_ch/environment.md)
- [快速开始（中英文/多语言/文档分析）](./doc/doc_ch/quickstart.md)
 - [PP-OCR文本检测识别🔥](./doc/doc_ch/ppocr_introduction.md)
    - [快速开始](./doc/doc_ch/quickstart.md)
    - [模型库](./doc/doc_ch/models_list.md)

--- a/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml
+++ b/configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml
@@ -129,7 +129,7 @@ Loss:
      key: head_out
      multi_head: True
  - DistillationSARLoss:
-      weight: 0.5
+      weight: 1.0
      model_name_list: ["Student", "Teacher"]
      key: head_out
      multi_head: True

--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -166,6 +166,10 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
    config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
  }
+  // get pass_builder object
+  auto pass_builder = config.pass_builder();
+  // delete "matmul_transpose_reshape_fuse_pass"
+  pass_builder->DeletePass("matmul_transpose_reshape_fuse_pass");
  config.SwitchUseFeedFetchOps(false);
  // true for multiple input
  config.SwitchSpecifyInputNames(true);

--- a/deploy/pdserving/config.yml
+++ b/deploy/pdserving/config.yml
@@ -36,8 +36,8 @@ op:
            #det模型路径
            model_config: ./ppocr_det_v3_serving
-            #Fetch结果列表，以client_config中fetch_var的alias_name为准
+            #Fetch结果列表，以client_config中fetch_var的alias_name为准，不设置默认取全部输出变量
-            fetch_list: ["sigmoid_0.tmp_0"]
+            #fetch_list: ["sigmoid_0.tmp_0"]
            #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
            devices: "0"
@@ -62,8 +62,8 @@ op:
            #rec模型路径
            model_config: ./ppocr_rec_v3_serving
-            #Fetch结果列表，以client_config中fetch_var的alias_name为准
+            #Fetch结果列表，以client_config中fetch_var的alias_name为准, 不设置默认取全部输出变量
-            fetch_list: ["softmax_5.tmp_0"]
+            #fetch_list: 
            #计算硬件ID，当devices为""或不写时为CPU预测；当devices为"0", "0,1,2"时为GPU预测，表示使用的GPU卡
            devices: "0"

--- a/deploy/pdserving/ocr_reader.py
+++ b/deploy/pdserving/ocr_reader.py
@@ -393,7 +393,7 @@ class OCRReader(object):
        return norm_img_batch[0]
    def postprocess(self, outputs, with_score=False):
-        preds = outputs["softmax_5.tmp_0"]
+        preds = list(outputs.values())[0]
        try:
            preds = preds.numpy()
        except:
@@ -404,8 +404,11 @@ class OCRReader(object):
            preds_idx, preds_prob, is_remove_duplicate=True)
        return text
-from argparse import ArgumentParser,RawDescriptionHelpFormatter
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
 import yaml
 class ArgsParser(ArgumentParser):
    def __init__(self):
        super(ArgsParser, self).__init__(
@@ -441,16 +444,16 @@ class ArgsParser(ArgumentParser):
            s = s.strip()
            k, v = s.split('=')
            v = self._parse_helper(v)
-            print(k,v, type(v))
+            print(k, v, type(v))
            cur = config
            parent = cur
            for kk in k.split("."):
                if kk not in cur:
-                     cur[kk] = {}
+                    cur[kk] = {}
-                     parent = cur
+                    parent = cur
-                     cur = cur[kk]
+                    cur = cur[kk]
                else:
-                     parent = cur
+                    parent = cur
-                     cur = cur[kk]
+                    cur = cur[kk]
            parent[k.split(".")[-1]] = v
        return config
\ No newline at end of file
--- a/deploy/pdserving/web_service.py
+++ b/deploy/pdserving/web_service.py
@@ -56,7 +56,7 @@ class DetOp(Op):
        return {"x": det_img[np.newaxis, :].copy()}, False, None, ""
    def postprocess(self, input_dicts, fetch_dict, data_id, log_id):
-        det_out = fetch_dict["sigmoid_0.tmp_0"]
+        det_out = list(fetch_dict.values())[0]
        ratio_list = [
            float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
        ]

--- a/deploy/pdserving/web_service_det.py
+++ b/deploy/pdserving/web_service_det.py
@@ -55,7 +55,7 @@ class DetOp(Op):
        return {"x": det_img[np.newaxis, :].copy()}, False, None, ""
    def postprocess(self, input_dicts, fetch_dict, data_id, log_id):
-        det_out = fetch_dict["sigmoid_0.tmp_0"]
+        det_out = list(fetch_dict.values())[0]
        ratio_list = [
            float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
        ]

--- a/doc/doc_ch/PP-OCRv3_introduction.md
+++ b/doc/doc_ch/PP-OCRv3_introduction.md
--- a/doc/doc_ch/models_list.md
+++ b/doc/doc_ch/models_list.md
--- a/doc/doc_ch/ppocr_introduction.md
+++ b/doc/doc_ch/ppocr_introduction.md
@@ -38,8 +38,9 @@ PP-OCRv2在PP-OCR的基础上，进一步在5个方面重点优化，检测模
 #### PP-OCRv3
-PP-OCRv3在PP-OCRv2的基础上进一步升级。检测模型仍然基于DB算法，优化策略采用了带残差注意力机制的FPN结构RSEFPN、增大感受野的PAN结构LKPAN、基于DML训练的更优的教师模型；识别模型将base模型从CRNN替换成了IJCAI 2022论文[SVTR](https://arxiv.org/abs/2205.00159)，并采用SVTR轻量化、带指导训练CTC、数据增广策略RecConAug、自监督训练的更好的预训练模型、无标签数据的使用进行模型加速和效果提升。更多细节请参考PP-OCRv3[技术报告](./PP-OCRv3_introduction.md)。
+PP-OCRv3在PP-OCRv2的基础上，针对检测模型和识别模型，进行了共计9个方面的升级：
+- PP-OCRv3检测模型对PP-OCRv2中的CML协同互学习文本检测蒸馏策略进行了升级，分别针对教师模型和学生模型进行进一步效果优化。其中，在对教师模型优化时，提出了大感受野的PAN结构LK-PAN和引入了DML蒸馏策略；在对学生模型优化时，提出了残差注意力机制的FPN结构RSE-FPN。
+- PP-OCRv3的识别模块是基于文本识别算法[SVTR](https://arxiv.org/abs/2205.00159)优化。SVTR不再采用RNN结构，通过引入Transformers结构更加有效地挖掘文本行图像的上下文信息，从而提升文本识别能力。PP-OCRv3通过轻量级文本识别网络SVTR_LCNet、Attention损失指导CTC损失训练策略、挖掘文字上下文信息的数据增广策略TextConAug、TextRotNet自监督预训练模型、UDML联合互学习策略、UIM无标注数据挖掘方案，6个方面进行模型加速和效果提升。
 PP-OCRv3系统pipeline如下：
@@ -47,6 +48,9 @@ PP-OCRv3系统pipeline如下：
    <img src="../ppocrv3_framework.png" width="800">
 </div>
+更多细节请参考PP-OCRv3[技术报告](./PP-OCRv3_introduction.md)。
 <a name="2"></a>
 ## 2. 特性

--- a/doc/doc_ch/quickstart.md
+++ b/doc/doc_ch/quickstart.md
@@ -59,15 +59,13 @@ cd /path/to/ppocr_img
 如果不使用提供的测试图片，可以将下方`--image_dir`参数替换为相应的测试图片路径。
-**注意** whl包默认使用`PP-OCRv3`模型，识别模型使用的输入shape为`3,48,320`, 因此如果使用识别功能，需要添加参数`--rec_image_shape 3,48,320`，如果不使用默认的`PP-OCRv3`模型，则无需设置该参数。
 <a name="211"></a>
 #### 2.1.1 中英文模型
 * 检测+方向分类器+识别全流程：`--use_angle_cls true`设置使用方向分类器识别180度旋转文字，`--use_gpu false`设置不使用GPU
  ```bash
-  paddleocr --image_dir ./imgs/11.jpg --use_angle_cls true --use_gpu false --rec_image_shape 3,48,320
+  paddleocr --image_dir ./imgs/11.jpg --use_angle_cls true --use_gpu false
  ```
  结果是一个list，每个item包含了文本框，文字和识别置信度
@@ -94,7 +92,7 @@ cd /path/to/ppocr_img
 - 单独使用识别：设置`--det`为`false`
  ```bash
-  paddleocr --image_dir ./imgs_words/ch/word_1.jpg --det false --rec_image_shape 3,48,320
+  paddleocr --image_dir ./imgs_words/ch/word_1.jpg --det false
  ```
  结果是一个list，每个item只包含识别结果和识别置信度
@@ -104,16 +102,16 @@ cd /path/to/ppocr_img
  ```
-如需使用2.0模型，请指定参数`--version PP-OCR`，paddleocr默认使用PP-OCRv3模型(`--versioin PP-OCRv3`)。更多whl包使用可参考[whl包文档](./whl.md)
+如需使用2.0模型，请指定参数`--ocr_version PP-OCR`，paddleocr默认使用PP-OCRv3模型(`--ocr_version PP-OCRv3`)。更多whl包使用可参考[whl包文档](./whl.md)
 <a name="212"></a>
 #### 2.1.2 多语言模型
-Paddleocr目前支持80个语种，可以通过修改`--lang`参数进行切换，对于英文模型，指定`--lang=en`, PP-OCRv3目前只支持中文和英文模型，其他多语言模型会陆续更新。
+PaddleOCR目前支持80个语种，可以通过修改`--lang`参数进行切换，对于英文模型，指定`--lang=en`。
 ``` bash
-paddleocr --image_dir ./imgs_en/254.jpg --lang=en --rec_image_shape 3,48,320
+paddleocr --image_dir ./imgs_en/254.jpg --lang=en
 ```
 <div align="center">

--- a/doc/doc_ch/update.md
+++ b/doc/doc_ch/update.md
 # 更新
+- 2022.5.7 添加对[Weights & Biases](https://docs.wandb.ai/)训练日志记录工具的支持。
 - 2021.12.21 《OCR十讲》课程开讲，12月21日起每晚八点半线上授课！ 【免费】报名地址：https://aistudio.baidu.com/aistudio/course/introduce/25207
 - 2021.12.21 发布PaddleOCR v2.4。OCR算法新增1种文本检测算法（PSENet），3种文本识别算法（NRTR、SEED、SAR）；文档结构化算法新增1种关键信息提取算法（SDMGR），3种DocVQA算法（LayoutLM、LayoutLMv2，LayoutXLM）。
 - 2021.9.7 发布PaddleOCR v2.3，发布[PP-OCRv2](#PP-OCRv2)，CPU推理速度相比于PP-OCR server提升220%；效果相比于PP-OCR mobile 提升7%。

--- a/doc/doc_ch/whl.md
+++ b/doc/doc_ch/whl.md
@@ -199,12 +199,10 @@ for line in result:
 paddleocr -h
 ```
-**注意** whl包默认使用`PP-OCRv3`模型，识别模型使用的输入shape为`3,48,320`, 因此如果使用识别功能，需要添加参数`--rec_image_shape 3,48,320`，如果不使用默认的`PP-OCRv3`模型，则无需设置该参数。
 * 检测+方向分类器+识别全流程
 ```bash
-paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true --rec_image_shape 3,48,320
+paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true
 ```
 结果是一个list，每个item包含了文本框，文字和识别置信度
@@ -217,7 +215,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --use_angle_cls true --rec_image
 * 检测+识别
 ```bash
-paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec_image_shape 3,48,320
+paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg
 ```
 结果是一个list，每个item包含了文本框，文字和识别置信度
@@ -230,7 +228,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec_image_shape 3,48,320
 * 方向分类器+识别
 ```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false --rec_image_shape 3,48,320
+paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --use_angle_cls true --det false
 ```
 结果是一个list，每个item只包含识别结果和识别置信度
@@ -256,7 +254,7 @@ paddleocr --image_dir PaddleOCR/doc/imgs/11.jpg --rec false
 * 单独执行识别
 ```bash
-paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false --rec_image_shape 3,48,320
+paddleocr --image_dir PaddleOCR/doc/imgs_words/ch/word_1.jpg --det false
 ```
 结果是一个list，每个item只包含识别结果和识别置信度
@@ -416,4 +414,4 @@ im_show.save('result.jpg')
 | cls                     | 前向时是否启动分类 (命令行模式下使用use_angle_cls控制前向是否启动分类)                                                                                                                                                                                                | FALSE                    |
 | show_log                     | 是否打印logger信息                                                                                                                                               | FALSE                    |
 | type                     | 执行ocr或者表格结构化, 值可选['ocr','structure']                                                                                                                                                                                             | ocr                    |
-| ocr_version                     | OCR模型版本，可选PP-OCRv3, PP-OCRv2, PP-OCR。PP-OCRv3 目前仅支持中、英文的检测和识别模型，方向分类器模型；PP-OCRv2 目前仅支持中文的检测和识别模型；PP-OCR支持中文的检测，识别，多语种识别，方向分类器等模型                                                                                                                                        | PP-OCRv3                   |
+| ocr_version                     | OCR模型版本，可选PP-OCRv3, PP-OCRv2, PP-OCR。PP-OCRv3 支持中、英文的检测、识别、多语种识别，方向分类器等模型；PP-OCRv2 目前仅支持中文的检测和识别模型；PP-OCR支持中文的检测，识别，多语种识别，方向分类器等模型                                                                                                                                        | PP-OCRv3                   |
--- a/doc/doc_en/config_en.md
+++ b/doc/doc_en/config_en.md
@@ -36,6 +36,7 @@ Take rec_chinese_lite_train_v2.0.yml as an example
 |      pretrained_model    |    Set the path of the pre-trained model      |  ./pretrain_models/CRNN/best_accuracy  |  \          |
 |      checkpoints         |    set model parameter path            |       None        |   Used to load parameters after interruption to continue training|
 |      use_visualdl  |    Set whether to enable visualdl for visual log display |          False        |    [Tutorial](https://www.paddlepaddle.org.cn/paddle/visualdl) |
+|      use_wandb     |    Set whether to enable W&B for visual log display      | False | [Documentation](https://docs.wandb.ai/)
 |      infer_img            |    Set inference image path or folder path     |       ./infer_img | \||
 |      character_dict_path |    Set dictionary path            |  ./ppocr/utils/ppocr_keys_v1.txt  | If the character_dict_path is None, model can only recognize number and lower letters |
 |      max_text_length     |    Set the maximum length of text        |       25          |                \                 |
@@ -66,7 +67,7 @@ In PaddleOCR, the network is divided into four stages: Transform, Backbone, Neck
 | :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
 |      model_type        |         Network Type          |  rec  |  Currently support`rec`,`det`,`cls`  |
 |      algorithm           |    Model name  |       CRNN         |               See [algorithm_overview](./algorithm_overview_en.md) for the support list             |
-|      **Transform**           |    Set the transformation method  |       -       |               Currently only recognition algorithms are supported, see [ppocr/modeling/transforms](../../ppocr/modeling/transforms) for details            |
+|      **Transform**           |    Set the transformation method  |       -       |               Currently only recognition algorithms are supported, see [ppocr/modeling/transform](../../ppocr/modeling/transforms) for details            |
 |        name    |      Transformation class name   |         TPS       | Currently supports `TPS` |
 |        num_fiducial      |   Number of TPS control points        |       20      |  Ten on the top and bottom       |
 |        loc_lr      |    Localization network learning rate        |       0.1      |  \      |
@@ -130,6 +131,17 @@ In PaddleOCR, the network is divided into four stages: Transform, Backbone, Neck
 |      drop_last        |        Whether to discard the last incomplete mini-batch because the number of samples in the data set cannot be divisible by batch_size        |  True | \  |
 |      num_workers        |        The number of sub-processes used to load data, if it is 0, the sub-process is not started, and the data is loaded in the main process       |  8 | \  |
+### Weights & Biases ([W&B](../../ppocr/utils/loggers/wandb_logger.py))
+|         Parameter             |            Use            |      Defaults        |            Note             |
+| :---------------------: |  :---------------------:   | :--------------:  |   :--------------------:   |
+|          project              |     Project to which the run is to be logged | uncategorized | \
+|          name                 |     Alias/Name of the run | Randomly generated by wandb | \ 
+|          id                   |     ID of the run    | Randomly generated by wandb     | \
+|          entity               | User or team to which the run is being logged         | The logged in user | \
+|          save_dir             | local directory in which all the models and other data is saved | wandb | \
+|          config               | model configuration | None | \
 <a name="3-multilingual-config-file-generation"></a>
 ## 3. Multilingual Config File Generation
@@ -233,4 +245,4 @@ For more supported languages, please refer to : [Multi-language model](https://g
 The multi-language model training method is the same as the Chinese model. The training data set is 100w synthetic data. A small amount of fonts and test data can be downloaded using the following two methods.
 * [Baidu Netdisk](https://pan.baidu.com/s/1bS_u207Rm7YbY33wOECKDA),Extraction code:frgi.
 * [Google drive](https://drive.google.com/file/d/18cSWX7wXSy4G0tbKJ0d9PuIaiwRLHpjA/view)
\ No newline at end of file