Merge remote-tracking branch 'origin/release/2.5' into release2.5

c9561e90 · Leif · 0667daa3 · d8a8ca81 · c9561e90 · c9561e90
Commit c9561e90 authored May 24, 2022 by Leif
17 changed files
--- a/doc/doc_en/models_list_en.md
+++ b/doc/doc_en/models_list_en.md
@@ -20,7 +20,7 @@ The downloadable models provided by PaddleOCR include `inference model`, `traine

 |model type|model format|description|
 |--- | --- | --- |
-|inference model|inference.pdmodel、inference.pdiparams|Used for inference based on Paddle inference engine，[detail](./inference_en.md)|
+|inference model|inference.pdmodel、inference.pdiparams|Used for inference based on Paddle inference engine，[detail](./inference_ppocr_en.md)|
 |trained model, pre-trained model|\*.pdparams、\*.pdopt、\*.states |The checkpoints model saved in the training process, which stores the parameters of the model, mostly used for model evaluation and continuous training.|
 |nb model|\*.nb| Model optimized by Paddle-Lite, which is suitable for mobile-side deployment scenarios (Paddle-Lite is needed for nb model deployment). |

@@ -37,7 +37,7 @@ Relationship of the above models is as follows.

 |model name|description|config|model size|download|
 | --- | --- | --- | --- | --- |
-|ch_PP-OCRv3_det_slim| [New] slim quantization with distillation lightweight model, supporting Chinese, English, multilingual text detection |[ch_PP-OCRv3_det_cml.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml)| 1.1M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/ch/ch_PP-OCRv3_det_slim_distill_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.nb)|
+|ch_PP-OCRv3_det_slim| [New] slim quantization with distillation lightweight model, supporting Chinese, English, multilingual text detection |[ch_PP-OCRv3_det_cml.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml)| 1.1M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_distill_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_slim_infer.nb)|
 |ch_PP-OCRv3_det| [New] Original lightweight model, supporting Chinese, English, multilingual text detection |[ch_PP-OCRv3_det_cml.yml](../../configs/det/ch_PP-OCRv3/ch_PP-OCRv3_det_cml.yml)| 3.8M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_distill_train.tar)|
 |ch_PP-OCRv2_det_slim| [New] slim quantization with distillation lightweight model, supporting Chinese, English, multilingual text detection|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml)| 3M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_slim_quant_infer.tar)|
 |ch_PP-OCRv2_det| [New] Original lightweight model, supporting Chinese, English, multilingual text detection|[ch_PP-OCRv2_det_cml.yml](../../configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml)|3M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_distill_train.tar)|
@@ -75,7 +75,7 @@ Relationship of the above models is as follows.

 |model name|description|config|model size|download|
 | --- | --- | --- | --- | --- |
-|ch_PP-OCRv3_rec_slim | [New] Slim qunatization with distillation lightweight model, supporting Chinese, English text recognition |[ch_PP-OCRv3_rec_distillation.yml](../../configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml)| 4.9M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/ch/ch_PP-OCRv3_rec_slim_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb) |
+|ch_PP-OCRv3_rec_slim | [New] Slim qunatization with distillation lightweight model, supporting Chinese, English text recognition |[ch_PP-OCRv3_rec_distillation.yml](../../configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml)| 4.9M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_slim_infer.nb) |
 |ch_PP-OCRv3_rec| [New] Original lightweight model, supporting Chinese, English, multilingual text recognition |[ch_PP-OCRv3_rec_distillation.yml](../../configs/rec/PP-OCRv3/ch_PP-OCRv3_rec_distillation.yml)| 12.4M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_train.tar) |
 |ch_PP-OCRv2_rec_slim| Slim qunatization with distillation lightweight model, supporting Chinese, English text recognition|[ch_PP-OCRv2_rec.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec.yml)| 9M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_slim_quant_train.tar) |
 |ch_PP-OCRv2_rec| Original lightweight model, supporting Chinese, English, multilingual text recognition |[ch_PP-OCRv2_rec_distillation.yml](../../configs/rec/ch_PP-OCRv2/ch_PP-OCRv2_rec_distillation.yml)|8.5M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_train.tar) |
@@ -91,7 +91,7 @@ Relationship of the above models is as follows.

 |model name|description|config|model size|download|
 | --- | --- | --- | --- | --- |
-|en_PP-OCRv3_rec_slim | [New] Slim qunatization with distillation lightweight model, supporting english, English text recognition |[en_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml)| 3.2M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_slim_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_slim_infer.nb) |
+|en_PP-OCRv3_rec_slim | [New] Slim qunatization with distillation lightweight model, supporting english, English text recognition |[en_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml)| 3.2M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_slim_train.tar) / [nb model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_slim_infer.nb) |
 |en_PP-OCRv3_rec| [New] Original lightweight model, supporting english, English, multilingual text recognition |[en_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/en_PP-OCRv3_rec.yml)| 9.6M |[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_train.tar) |
 |en_number_mobile_slim_v2.0_rec|Slim pruned and quantized lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)| 2.7M | [inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/en_number_mobile_v2.0_rec_slim_train.tar) |
 |en_number_mobile_v2.0_rec|Original lightweight model, supporting English and number recognition|[rec_en_number_lite_train.yml](../../configs/rec/multi_language/rec_en_number_lite_train.yml)|2.6M|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_train.tar) |
@@ -108,7 +108,7 @@ Relationship of the above models is as follows.
 | ka_PP-OCRv3_rec | ppocr/utils/dict/ka_dict.txt | Lightweight model for Kannada recognition |[ka_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/multi_language/ka_PP-OCRv3_rec.yml)|9.9M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ka_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ka_PP-OCRv3_rec_train.tar) |
 | ta_PP-OCRv3_rec | ppocr/utils/dict/ta_dict.txt |Lightweight model for Tamil recognition|[ta_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/multi_language/ta_PP-OCRv3_rec.yml)|9.6M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ta_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ta_PP-OCRv3_rec_train.tar) |
 | latin_PP-OCRv3_rec |  ppocr/utils/dict/latin_dict.txt | Lightweight model for latin recognition |  [latin_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/multi_language/latin_PP-OCRv3_rec.yml) |9.7M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/latin_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/latin_PP-OCRv3_rec_train.tar) |
-| arabic_PP-OCRv3_rec | ppocr/utils/dict/arabic_dict.txt | Lightweight model for arabic recognition  | [arabic_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/multi_language/rec_arabic_lite_train.yml) |9.6M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/arabic_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/arabic_PP-OCRv3_rec_train.tar) |
+| arabic_PP-OCRv3_rec | ppocr/utils/dict/arabic_dict.txt | Lightweight model for arabic recognition  | [arabic_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/multi_language/arabic_PP-OCRv3_rec.yml) |9.6M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/arabic_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/arabic_PP-OCRv3_rec_train.tar) |
 | cyrillic_PP-OCRv3_rec | ppocr/utils/dict/cyrillic_dict.txt | Lightweight model for cyrillic recognition  | [cyrillic_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/multi_language/cyrillic_PP-OCRv3_rec.yml) |9.6M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_train.tar) |
 | devanagari_PP-OCRv3_rec | ppocr/utils/dict/devanagari_dict.txt | Lightweight model for devanagari recognition | [devanagari_PP-OCRv3_rec.yml](../../configs/rec/PP-OCRv3/multi_language/devanagari_PP-OCRv3_rec.yml) |9.9M|[inference model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/devanagari_PP-OCRv3_rec_infer.tar) / [trained model](https://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/devanagari_PP-OCRv3_rec_train.tar) |


--- a/doc/doc_en/multi_languages_en.md
+++ b/doc/doc_en/multi_languages_en.md
@@ -187,10 +187,10 @@ In addition to installing the whl package for quick forecasting,
 PPOCR also provides a variety of forecasting deployment methods.
 If necessary, you can read related documents:

- [Python Inference](./inference_en.md)
- [C++ Inference](../../deploy/cpp_infer/readme_en.md)
+- [Python Inference](./inference_ppocr_en.md)
+- [C++ Inference](../../deploy/cpp_infer/readme.md)
 - [Serving](../../deploy/hubserving/readme_en.md)
- [Mobile](https://github.com/PaddlePaddle/PaddleOCR/blob/develop/deploy/lite/readme_en.md)
+- [Mobile](../../deploy/lite/readme.md)
 - [Benchmark](./benchmark_en.md)



--- a/doc/doc_en/ppocr_introduction_en.md
+++ b/doc/doc_en/ppocr_introduction_en.md
@@ -38,7 +38,7 @@ On the basis of PP-OCR, PP-OCRv2 is further optimized in five aspects. The detec

 PP-OCRv3 upgraded the detection model and recognition model in 9 aspects based on PP-OCRv2:
 - PP-OCRv3 detector upgrades the CML(Collaborative Mutual Learning) text detection strategy proposed in PP-OCRv2, and further optimizes the effect of teacher model and student model respectively. In the optimization of teacher model, a pan module with large receptive field named LK-PAN is proposed and the DML distillation strategy is adopted; In the optimization of student model, a FPN module with residual attention mechanism named RSE-FPN is proposed.
- PP-OCRv3 recognizer is optimized based on text recognition algorithm [SVTR](https://arxiv.org/abs/2205.00159). SVTR no longer adopts RNN by introducing transformers structure, which can mine the context information of text line image more effectively, so as to improve the ability of text recognition. PP-OCRv3 adopts lightweight text recognition network SVTR_LCNet, guided training of CTC loss by attention loss, data augmentation strategy TextConAug, better pre-trained model by self-supervised TextRotNet, UDML(Unified Deep Mutual Learning), and UIM (Unlabeled Images Mining) to accelerate the model and improve the effect.
+- PP-OCRv3 recognizer is optimized based on text recognition algorithm [SVTR](https://arxiv.org/abs/2205.00159). SVTR no longer adopts RNN by introducing transformers structure, which can mine the context information of text line image more effectively, so as to improve the ability of text recognition. PP-OCRv3 adopts lightweight text recognition network SVTR_LCNet, guided training of CTC by attention, data augmentation strategy TextConAug, better pre-trained model by self-supervised TextRotNet, UDML(Unified Deep Mutual Learning), and UIM (Unlabeled Images Mining) to accelerate the model and improve the effect.

 PP-OCRv3 pipeline is as follows:


--- a/doc/doc_en/update_en.md
+++ b/doc/doc_en/update_en.md
 # RECENT UPDATES
 - 2022.5.9 release PaddleOCR v2.5, including:
-    - [PP-OCRv3](./doc/doc_en/ppocr_introduction_en.md#pp-ocrv3): With comparable speed, the effect of Chinese scene is further improved by 5% compared with PP-OCRv2, the effect of English scene is improved by 11%, and the average recognition accuracy of 80 language multilingual models is improved by more than 5%.
-    - [PPOCRLabelv2](./PPOCRLabel): Add the annotation function for table recognition task, key information extraction task and irregular text image.
-    - Interactive e-book [*"Dive into OCR"*](./doc/doc_en/ocr_book_en.md), covers the cutting-edge theory and code practice of OCR full stack technology.
+    - [PP-OCRv3](./ppocr_introduction_en.md#pp-ocrv3): With comparable speed, the effect of Chinese scene is further improved by 5% compared with PP-OCRv2, the effect of English scene is improved by 11%, and the average recognition accuracy of 80 language multilingual models is improved by more than 5%.
+    - [PPOCRLabelv2](../../PPOCRLabel): Add the annotation function for table recognition task, key information extraction task and irregular text image.
+    - Interactive e-book [*"Dive into OCR"*](./ocr_book_en.md), covers the cutting-edge theory and code practice of OCR full stack technology.
 - 2022.5.7 Add support for metric and model logging during training to [Weights & Biases](https://docs.wandb.ai/).
 - 2021.12.21 OCR open source online course starts. The lesson starts at 8:30 every night and lasts for ten days. Free registration: https://aistudio.baidu.com/aistudio/course/introduce/25207
 - 2021.12.21 release PaddleOCR v2.4, release 1 text detection algorithm (PSENet), 3 text recognition algorithms (NRTR、SEED、SAR), 1 key information extraction algorithm (SDMGR) and 3 DocVQA algorithms (LayoutLM、LayoutLMv2，LayoutXLM).

--- a/ppocr/data/lmdb_dataset.py
+++ b/ppocr/data/lmdb_dataset.py
@@ -37,6 +37,8 @@ class LMDBDataSet(Dataset):
        if self.do_shuffle:
            np.random.shuffle(self.data_idx_order_list)
        self.ops = create_operators(dataset_config['transforms'], global_config)
+        self.ext_op_transform_idx = dataset_config.get("ext_op_transform_idx",
+                                                       2)

        ratio_list = dataset_config.get("ratio_list", [1.0])
        self.need_reset = True in [x < 1 for x in ratio_list]
@@ -88,6 +90,29 @@ class LMDBDataSet(Dataset):
        if imgori is None:
            return None
        return imgori
+    
+    def get_ext_data(self):
+        ext_data_num = 0
+        for op in self.ops:
+            if hasattr(op, 'ext_data_num'):
+                ext_data_num = getattr(op, 'ext_data_num')
+                break
+        load_data_ops = self.ops[:self.ext_op_transform_idx]
+        ext_data = []
+        
+        while len(ext_data) < ext_data_num:
+            lmdb_idx, file_idx = self.data_idx_order_list[np.random.randint(self.__len__())]
+            lmdb_idx = int(lmdb_idx)
+            file_idx = int(file_idx)
+            sample_info = self.get_lmdb_sample_info(self.lmdb_sets[lmdb_idx]['txn'],
+                                                file_idx)
+            if sample_info is None:
+                continue
+            img, label = sample_info
+            data = {'image': img, 'label': label}
+            outs = transform(data, load_data_ops)
+            ext_data.append(data)
+        return ext_data

    def get_lmdb_sample_info(self, txn, index):
        label_key = 'label-%09d'.encode() % index
@@ -109,6 +134,7 @@ class LMDBDataSet(Dataset):
            return self.__getitem__(np.random.randint(self.__len__()))
        img, label = sample_info
        data = {'image': img, 'label': label}
+        data['ext_data'] = self.get_ext_data()
        outs = transform(data, self.ops)
        if outs is None:
            return self.__getitem__(np.random.randint(self.__len__()))

--- a/ppocr/modeling/architectures/__init__.py
+++ b/ppocr/modeling/architectures/__init__.py
@@ -15,10 +15,13 @@
 import copy
 import importlib

+from paddle.jit import to_static
+from paddle.static import InputSpec
+
 from .base_model import BaseModel
 from .distillation_model import DistillationModel

-__all__ = ['build_model']
+__all__ = ["build_model", "apply_to_static"]


 def build_model(config):
@@ -30,3 +33,36 @@ def build_model(config):
        mod = importlib.import_module(__name__)
        arch = getattr(mod, name)(config)
    return arch
+
+
+def apply_to_static(model, config, logger):
+    if config["Global"].get("to_static", False) is not True:
+        return model
+    assert "image_shape" in config[
+        "Global"], "image_shape must be assigned for static training mode..."
+    supported_list = ["DB", "SVTR"]
+    if config["Architecture"]["algorithm"] in ["Distillation"]:
+        algo = list(config["Architecture"]["Models"].values())[0]["algorithm"]
+    else:
+        algo = config["Architecture"]["algorithm"]
+    assert algo in supported_list, f"algorithms that supports static training must in in {supported_list} but got {algo}"
+
+    specs = [
+        InputSpec(
+            [None] + config["Global"]["image_shape"], dtype='float32')
+    ]
+
+    if algo == "SVTR":
+        specs.append([
+            InputSpec(
+                [None, config["Global"]["max_text_length"]],
+                dtype='int64'), InputSpec(
+                    [None, config["Global"]["max_text_length"]], dtype='int64'),
+            InputSpec(
+                [None], dtype='int64'), InputSpec(
+                    [None], dtype='float64')
+        ])
+
+    model = to_static(model, input_spec=specs)
+    logger.info("Successfully to apply @to_static with specs: {}".format(specs))
+    return model
--- a/ppocr/modeling/heads/rec_sar_head.py
+++ b/ppocr/modeling/heads/rec_sar_head.py
@@ -83,7 +83,7 @@ class SAREncoder(nn.Layer):

    def forward(self, feat, img_metas=None):
        if img_metas is not None:
-            assert len(img_metas[0]) == feat.shape[0]
+            assert len(img_metas[0]) == paddle.shape(feat)[0]

        valid_ratios = None
        if img_metas is not None and self.mask:
@@ -98,9 +98,10 @@ class SAREncoder(nn.Layer):

        if valid_ratios is not None:
            valid_hf = []
-            T = holistic_feat.shape[1]
-            for i in range(len(valid_ratios)):
-                valid_step = min(T, math.ceil(T * valid_ratios[i])) - 1
+            T = paddle.shape(holistic_feat)[1]
+            for i in range(paddle.shape(valid_ratios)[0]):
+                valid_step = paddle.minimum(
+                    T, paddle.ceil(valid_ratios[i] * T).astype('int32')) - 1
                valid_hf.append(holistic_feat[i, valid_step, :])
            valid_hf = paddle.stack(valid_hf, axis=0)
        else:
@@ -247,13 +248,14 @@ class ParallelSARDecoder(BaseDecoder):
        # bsz * (seq_len + 1) * h * w * attn_size
        attn_weight = self.conv1x1_2(attn_weight)
        # bsz * (seq_len + 1) * h * w * 1
-        bsz, T, h, w, c = attn_weight.shape
+        bsz, T, h, w, c = paddle.shape(attn_weight)
        assert c == 1

        if valid_ratios is not None:
            # cal mask of attention weight
-            for i in range(len(valid_ratios)):
-                valid_width = min(w, math.ceil(w * valid_ratios[i]))
+            for i in range(paddle.shape(valid_ratios)[0]):
+                valid_width = paddle.minimum(
+                    w, paddle.ceil(valid_ratios[i] * w).astype("int32"))
                if valid_width < w:
                    attn_weight[i, :, :, valid_width:, :] = float('-inf')

@@ -288,7 +290,7 @@ class ParallelSARDecoder(BaseDecoder):
        img_metas: [label, valid_ratio]
        '''
        if img_metas is not None:
-            assert len(img_metas[0]) == feat.shape[0]
+            assert paddle.shape(img_metas[0])[0] == paddle.shape(feat)[0]

        valid_ratios = None
        if img_metas is not None and self.mask:
@@ -302,7 +304,6 @@ class ParallelSARDecoder(BaseDecoder):
        # bsz * (seq_len + 1) * C
        out_dec = self._2d_attention(
            in_dec, feat, out_enc, valid_ratios=valid_ratios)
-        # bsz * (seq_len + 1) * num_classes

        return out_dec[:, 1:, :]  # bsz * seq_len * num_classes

@@ -395,7 +396,6 @@ class SARHead(nn.Layer):

        if self.training:
            label = targets[0]  # label
-            label = paddle.to_tensor(label, dtype='int64')
            final_out = self.decoder(
                feat, holistic_feat, label, img_metas=targets)
        else:

--- a/ppstructure/vqa/README_ch.md
+++ b/ppstructure/vqa/README_ch.md
@@ -52,7 +52,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进

 ### 3.1 SER

-![](../../doc/vqa/result_ser/zh_val_0_ser.jpg) | ![](../../doc/vqa/result_ser/zh_val_42_ser.jpg)
+![](../docs/vqa/result_ser/zh_val_0_ser.jpg) | ![](../docs/vqa/result_ser/zh_val_42_ser.jpg)
 ---|---

 图中不同颜色的框表示不同的类别，对于XFUND数据集，有`QUESTION`, `ANSWER`, `HEADER` 3种类别
@@ -65,7 +65,7 @@ PP-Structure 里的 DOC-VQA算法基于PaddleNLP自然语言处理算法库进

 ### 3.2 RE

-![](../../doc/vqa/result_re/zh_val_21_re.jpg) | ![](../../doc/vqa/result_re/zh_val_40_re.jpg)
+![](../docs/vqa/result_re/zh_val_21_re.jpg) | ![](../docs/vqa/result_re/zh_val_40_re.jpg)
 ---|---



--- a/test_tipc/docs/jeston_test_train_inference_python.md
+++ b/test_tipc/docs/jeston_test_train_inference_python.md
@@ -115,4 +115,4 @@ ValueError: The results of python_infer_gpu_usetrt_True_precision_fp32_batchsize
 ## 3. 更多教程
 本文档为功能测试用，更丰富的训练预测使用教程请参考：  
 [模型训练](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/training.md)  
-[基于Python预测引擎推理](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/inference.md)
+[基于Python预测引擎推理](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/inference_ppocr.md)
--- a/test_tipc/docs/mac_test_train_inference_python.md
+++ b/test_tipc/docs/mac_test_train_inference_python.md
@@ -152,4 +152,4 @@ ValueError: The results of python_infer_cpu_usemkldnn_False_threads_1_batchsize_
 ## 3. 更多教程
 本文档为功能测试用，更丰富的训练预测使用教程请参考：  
 [模型训练](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/training.md)  
-[基于Python预测引擎推理](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/inference.md)
+[基于Python预测引擎推理](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/inference_ppocr.md)
--- a/test_tipc/docs/test_train_inference_python.md
+++ b/test_tipc/docs/test_train_inference_python.md
@@ -153,4 +153,4 @@ python3.7 test_tipc/compare_results.py --gt_file=./test_tipc/results/python_*.tx
 ## 3. 更多教程
 本文档为功能测试用，更丰富的训练预测使用教程请参考：  
 [模型训练](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/training.md)  
-[基于Python预测引擎推理](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/inference.md)
+[基于Python预测引擎推理](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/inference_ppocr.md)
--- a/test_tipc/docs/win_test_train_inference_python.md
+++ b/test_tipc/docs/win_test_train_inference_python.md
@@ -156,4 +156,4 @@ ValueError: The results of python_infer_cpu_usemkldnn_False_threads_1_batchsize_
 ## 3. 更多教程
 本文档为功能测试用，更丰富的训练预测使用教程请参考：  
 [模型训练](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/training.md)  
-[基于Python预测引擎推理](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/inference.md)
+[基于Python预测引擎推理](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/inference_ppocr.md)
--- a/tools/export_model.py
+++ b/tools/export_model.py
@@ -17,7 +17,7 @@ import sys

 __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
-sys.path.append(os.path.abspath(os.path.join(__dir__, "..")))
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, "..")))

 import argparse


--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -34,6 +34,7 @@ def init_args():
    parser = argparse.ArgumentParser()
    # params for prediction engine
    parser.add_argument("--use_gpu", type=str2bool, default=True)
+    parser.add_argument("--use_xpu", type=str2bool, default=False)
    parser.add_argument("--ir_optim", type=str2bool, default=True)
    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
    parser.add_argument("--min_subgraph_size", type=int, default=15)
@@ -285,6 +286,8 @@ def create_predictor(args, mode, logger):
                config.set_trt_dynamic_shape_info(
                    min_input_shape, max_input_shape, opt_input_shape)

+        elif args.use_xpu:
+            config.enable_xpu(10 * 1024 * 1024)
        else:
            config.disable_gpu()
            if hasattr(args, "cpu_threads"):

--- a/tools/infer_rec.py
+++ b/tools/infer_rec.py
@@ -157,7 +157,7 @@ def main():

            if info is not None:
                logger.info("\t result: {}".format(info))
-                fout.write(file + "\t" + info)
+                fout.write(file + "\t" + info + "\n")
    logger.info("success!")



--- a/tools/program.py
+++ b/tools/program.py
@@ -112,20 +112,25 @@ def merge_config(config, opts):
    return config


-def check_gpu(use_gpu):
+def check_device(use_gpu, use_xpu=False):
    """
    Log error and exit when set use_gpu=true in paddlepaddle
    cpu version.
    """
-    err = "Config use_gpu cannot be set as true while you are " \
-          "using paddlepaddle cpu version ! \nPlease try: \n" \
-          "\t1. Install paddlepaddle-gpu to run model on GPU \n" \
-          "\t2. Set use_gpu as false in config file to run " \
+    err = "Config {} cannot be set as true while your paddle " \
+          "is not compiled with {} ! \nPlease try: \n" \
+          "\t1. Install paddlepaddle to run model on {} \n" \
+          "\t2. Set {} as false in config file to run " \
          "model on CPU"

    try:
+        if use_gpu and use_xpu:
+            print("use_xpu and use_gpu can not both be ture.")
        if use_gpu and not paddle.is_compiled_with_cuda():
-            print(err)
+            print(err.format("use_gpu", "cuda", "gpu", "use_gpu"))
+            sys.exit(1)
+        if use_xpu and not paddle.device.is_compiled_with_xpu():
+            print(err.format("use_xpu", "xpu", "xpu", "use_xpu"))
            sys.exit(1)
    except Exception as e:
        pass
@@ -301,6 +306,7 @@ def train(config,
            stats['lr'] = lr
            train_stats.update(stats)

+
            if log_writer is not None and dist.get_rank() == 0:
                log_writer.log_metrics(metrics=train_stats.get(), prefix="TRAIN", step=global_step)

@@ -547,7 +553,7 @@ def preprocess(is_train=False):

    # check if set use_gpu=True in paddlepaddle cpu version
    use_gpu = config['Global']['use_gpu']
-    check_gpu(use_gpu)
+    use_xpu = config['Global'].get('use_xpu', False)

    # check if set use_xpu=True in paddlepaddle cpu/gpu version
    use_xpu = False
@@ -562,11 +568,13 @@ def preprocess(is_train=False):
        'SEED', 'SDMGR', 'LayoutXLM', 'LayoutLM', 'PREN', 'FCE', 'SVTR'
    ]

-    device = 'cpu'
-    if use_gpu:
-        device = 'gpu:{}'.format(dist.ParallelEnv().dev_id)
    if use_xpu:
-        device = 'xpu'
+        device = 'xpu:{0}'.format(os.getenv('FLAGS_selected_xpus', 0))
+    else:
+        device = 'gpu:{}'.format(dist.ParallelEnv()
+                                 .dev_id) if use_gpu else 'cpu'
+    check_device(use_gpu, use_xpu)
+
    device = paddle.set_device(device)

    config['Global']['distributed'] = dist.get_world_size() != 1

--- a/tools/train.py
+++ b/tools/train.py
@@ -35,6 +35,7 @@ from ppocr.postprocess import build_post_process
 from ppocr.metrics import build_metric
 from ppocr.utils.save_load import load_model
 from ppocr.utils.utility import set_seed
+from ppocr.modeling.architectures import apply_to_static
 import tools.program as program

 dist.get_world_size()
@@ -121,6 +122,8 @@ def main(config, device, logger, vdl_writer):
    if config['Global']['distributed']:
        model = paddle.DataParallel(model)

+    model = apply_to_static(model, config, logger)
+
    # build loss
    loss_class = build_loss(config['Loss'])