Merge branch 'dygraph' into sdmgr

5e4d1891 · Double_V · GitHub · 0cc1b5dc · e323c8bd · 5e4d1891
Unverified Commit 5e4d1891 authored Dec 16, 2021 by Double_V Committed by GitHub Dec 16, 2021
20 changed files
--- a/ppstructure/vqa/infer_ser.py
+++ b/ppstructure/vqa/infer_ser.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import json
+import cv2
+import numpy as np
+from copy import deepcopy
+
+import paddle
+
+# relative reference
+from utils import parse_args, get_image_file_list, draw_ser_results, get_bio_label_maps
+from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification
+
+
+def pad_sentences(tokenizer,
+                  encoded_inputs,
+                  max_seq_len=512,
+                  pad_to_max_seq_len=True,
+                  return_attention_mask=True,
+                  return_token_type_ids=True,
+                  return_overflowing_tokens=False,
+                  return_special_tokens_mask=False):
+    # Padding with larger size, reshape is carried out
+    max_seq_len = (
+        len(encoded_inputs["input_ids"]) // max_seq_len + 1) * max_seq_len
+
+    needs_to_be_padded = pad_to_max_seq_len and \
+                         max_seq_len and len(encoded_inputs["input_ids"]) < max_seq_len
+
+    if needs_to_be_padded:
+        difference = max_seq_len - len(encoded_inputs["input_ids"])
+        if tokenizer.padding_side == 'right':
+            if return_attention_mask:
+                encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[
+                    "input_ids"]) + [0] * difference
+            if return_token_type_ids:
+                encoded_inputs["token_type_ids"] = (
+                    encoded_inputs["token_type_ids"] +
+                    [tokenizer.pad_token_type_id] * difference)
+            if return_special_tokens_mask:
+                encoded_inputs["special_tokens_mask"] = encoded_inputs[
+                    "special_tokens_mask"] + [1] * difference
+            encoded_inputs["input_ids"] = encoded_inputs[
+                "input_ids"] + [tokenizer.pad_token_id] * difference
+            encoded_inputs["bbox"] = encoded_inputs["bbox"] + [[0, 0, 0, 0]
+                                                               ] * difference
+        else:
+            assert False, f"padding_side of tokenizer just supports [\"right\"] but got {tokenizer.padding_side}"
+    else:
+        if return_attention_mask:
+            encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[
+                "input_ids"])
+
+    return encoded_inputs
+
+
+def split_page(encoded_inputs, max_seq_len=512):
+    """
+    truncate is often used in training process
+    """
+    for key in encoded_inputs:
+        encoded_inputs[key] = paddle.to_tensor(encoded_inputs[key])
+        if encoded_inputs[key].ndim <= 1:  # for input_ids, att_mask and so on
+            encoded_inputs[key] = encoded_inputs[key].reshape([-1, max_seq_len])
+        else:  # for bbox
+            encoded_inputs[key] = encoded_inputs[key].reshape(
+                [-1, max_seq_len, 4])
+    return encoded_inputs
+
+
+def preprocess(
+        tokenizer,
+        ori_img,
+        ocr_info,
+        img_size=(224, 224),
+        pad_token_label_id=-100,
+        max_seq_len=512,
+        add_special_ids=False,
+        return_attention_mask=True, ):
+    ocr_info = deepcopy(ocr_info)
+    height = ori_img.shape[0]
+    width = ori_img.shape[1]
+
+    img = cv2.resize(ori_img,
+                     (224, 224)).transpose([2, 0, 1]).astype(np.float32)
+
+    segment_offset_id = []
+    words_list = []
+    bbox_list = []
+    input_ids_list = []
+    token_type_ids_list = []
+
+    for info in ocr_info:
+        # x1, y1, x2, y2
+        bbox = info["bbox"]
+        bbox[0] = int(bbox[0] * 1000.0 / width)
+        bbox[2] = int(bbox[2] * 1000.0 / width)
+        bbox[1] = int(bbox[1] * 1000.0 / height)
+        bbox[3] = int(bbox[3] * 1000.0 / height)
+
+        text = info["text"]
+        encode_res = tokenizer.encode(
+            text, pad_to_max_seq_len=False, return_attention_mask=True)
+
+        if not add_special_ids:
+            # TODO: use tok.all_special_ids to remove
+            encode_res["input_ids"] = encode_res["input_ids"][1:-1]
+            encode_res["token_type_ids"] = encode_res["token_type_ids"][1:-1]
+            encode_res["attention_mask"] = encode_res["attention_mask"][1:-1]
+
+        input_ids_list.extend(encode_res["input_ids"])
+        token_type_ids_list.extend(encode_res["token_type_ids"])
+        bbox_list.extend([bbox] * len(encode_res["input_ids"]))
+        words_list.append(text)
+        segment_offset_id.append(len(input_ids_list))
+
+    encoded_inputs = {
+        "input_ids": input_ids_list,
+        "token_type_ids": token_type_ids_list,
+        "bbox": bbox_list,
+        "attention_mask": [1] * len(input_ids_list),
+    }
+
+    encoded_inputs = pad_sentences(
+        tokenizer,
+        encoded_inputs,
+        max_seq_len=max_seq_len,
+        return_attention_mask=return_attention_mask)
+
+    encoded_inputs = split_page(encoded_inputs)
+
+    fake_bs = encoded_inputs["input_ids"].shape[0]
+
+    encoded_inputs["image"] = paddle.to_tensor(img).unsqueeze(0).expand(
+        [fake_bs] + list(img.shape))
+
+    encoded_inputs["segment_offset_id"] = segment_offset_id
+
+    return encoded_inputs
+
+
+def postprocess(attention_mask, preds, label_map_path):
+    if isinstance(preds, paddle.Tensor):
+        preds = preds.numpy()
+    preds = np.argmax(preds, axis=2)
+
+    _, label_map = get_bio_label_maps(label_map_path)
+
+    preds_list = [[] for _ in range(preds.shape[0])]
+
+    # keep batch info
+    for i in range(preds.shape[0]):
+        for j in range(preds.shape[1]):
+            if attention_mask[i][j] == 1:
+                preds_list[i].append(label_map[preds[i][j]])
+
+    return preds_list
+
+
+def merge_preds_list_with_ocr_info(label_map_path, ocr_info, segment_offset_id,
+                                   preds_list):
+    # must ensure the preds_list is generated from the same image
+    preds = [p for pred in preds_list for p in pred]
+    label2id_map, _ = get_bio_label_maps(label_map_path)
+    for key in label2id_map:
+        if key.startswith("I-"):
+            label2id_map[key] = label2id_map["B" + key[1:]]
+
+    id2label_map = dict()
+    for key in label2id_map:
+        val = label2id_map[key]
+        if key == "O":
+            id2label_map[val] = key
+        if key.startswith("B-") or key.startswith("I-"):
+            id2label_map[val] = key[2:]
+        else:
+            id2label_map[val] = key
+
+    for idx in range(len(segment_offset_id)):
+        if idx == 0:
+            start_id = 0
+        else:
+            start_id = segment_offset_id[idx - 1]
+
+        end_id = segment_offset_id[idx]
+
+        curr_pred = preds[start_id:end_id]
+        curr_pred = [label2id_map[p] for p in curr_pred]
+
+        if len(curr_pred) <= 0:
+            pred_id = 0
+        else:
+            counts = np.bincount(curr_pred)
+            pred_id = np.argmax(counts)
+        ocr_info[idx]["pred_id"] = int(pred_id)
+        ocr_info[idx]["pred"] = id2label_map[pred_id]
+    return ocr_info
+
+
+@paddle.no_grad()
+def infer(args):
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    # init token and model
+    tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path)
+    # model = LayoutXLMModel.from_pretrained(args.model_name_or_path)
+    model = LayoutXLMForTokenClassification.from_pretrained(
+        args.model_name_or_path)
+    model.eval()
+
+    # load ocr results json
+    ocr_results = dict()
+    with open(args.ocr_json_path, "r") as fin:
+        lines = fin.readlines()
+        for line in lines:
+            img_name, json_info = line.split("\t")
+            ocr_results[os.path.basename(img_name)] = json.loads(json_info)
+
+    # get infer img list
+    infer_imgs = get_image_file_list(args.infer_imgs)
+
+    # loop for infer
+    with open(os.path.join(args.output_dir, "infer_results.txt"), "w") as fout:
+        for idx, img_path in enumerate(infer_imgs):
+            print("process: [{}/{}]".format(idx, len(infer_imgs), img_path))
+
+            img = cv2.imread(img_path)
+
+            ocr_info = ocr_results[os.path.basename(img_path)]["ocr_info"]
+            inputs = preprocess(
+                tokenizer=tokenizer,
+                ori_img=img,
+                ocr_info=ocr_info,
+                max_seq_len=args.max_seq_length)
+
+            outputs = model(
+                input_ids=inputs["input_ids"],
+                bbox=inputs["bbox"],
+                image=inputs["image"],
+                token_type_ids=inputs["token_type_ids"],
+                attention_mask=inputs["attention_mask"])
+
+            preds = outputs[0]
+            preds = postprocess(inputs["attention_mask"], preds,
+                                args.label_map_path)
+            ocr_info = merge_preds_list_with_ocr_info(
+                args.label_map_path, ocr_info, inputs["segment_offset_id"],
+                preds)
+
+            fout.write(img_path + "\t" + json.dumps(
+                {
+                    "ocr_info": ocr_info,
+                }, ensure_ascii=False) + "\n")
+
+            img_res = draw_ser_results(img, ocr_info)
+            cv2.imwrite(
+                os.path.join(args.output_dir, os.path.basename(img_path)),
+                img_res)
+
+    return
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    infer(args)
--- a/ppstructure/vqa/infer_ser_e2e.py
+++ b/ppstructure/vqa/infer_ser_e2e.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import json
+import cv2
+import numpy as np
+from copy import deepcopy
+from PIL import Image
+
+import paddle
+from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification
+
+# relative reference
+from .utils import parse_args, get_image_file_list, draw_ser_results, get_bio_label_maps
+
+from .utils import pad_sentences, split_page, preprocess, postprocess, merge_preds_list_with_ocr_info
+
+
+def trans_poly_to_bbox(poly):
+    x1 = np.min([p[0] for p in poly])
+    x2 = np.max([p[0] for p in poly])
+    y1 = np.min([p[1] for p in poly])
+    y2 = np.max([p[1] for p in poly])
+    return [x1, y1, x2, y2]
+
+
+def parse_ocr_info_for_ser(ocr_result):
+    ocr_info = []
+    for res in ocr_result:
+        ocr_info.append({
+            "text": res[1][0],
+            "bbox": trans_poly_to_bbox(res[0]),
+            "poly": res[0],
+        })
+    return ocr_info
+
+
+class SerPredictor(object):
+    def __init__(self, args):
+
+        self.max_seq_length = args.max_seq_length
+
+        # init ser token and model
+        self.tokenizer = LayoutXLMTokenizer.from_pretrained(
+            args.model_name_or_path)
+        self.model = LayoutXLMForTokenClassification.from_pretrained(
+            args.model_name_or_path)
+        self.model.eval()
+
+        # init ocr_engine
+        from paddleocr import PaddleOCR
+
+        self.ocr_engine = PaddleOCR(
+            rec_model_dir=args.rec_model_dir,
+            det_model_dir=args.det_model_dir,
+            use_angle_cls=False,
+            show_log=False)
+        # init dict
+        label2id_map, self.id2label_map = get_bio_label_maps(
+            args.label_map_path)
+        self.label2id_map_for_draw = dict()
+        for key in label2id_map:
+            if key.startswith("I-"):
+                self.label2id_map_for_draw[key] = label2id_map["B" + key[1:]]
+            else:
+                self.label2id_map_for_draw[key] = label2id_map[key]
+
+    def __call__(self, img):
+        ocr_result = self.ocr_engine.ocr(img, cls=False)
+
+        ocr_info = parse_ocr_info_for_ser(ocr_result)
+
+        inputs = preprocess(
+            tokenizer=self.tokenizer,
+            ori_img=img,
+            ocr_info=ocr_info,
+            max_seq_len=self.max_seq_length)
+
+        outputs = self.model(
+            input_ids=inputs["input_ids"],
+            bbox=inputs["bbox"],
+            image=inputs["image"],
+            token_type_ids=inputs["token_type_ids"],
+            attention_mask=inputs["attention_mask"])
+
+        preds = outputs[0]
+        preds = postprocess(inputs["attention_mask"], preds, self.id2label_map)
+        ocr_info = merge_preds_list_with_ocr_info(
+            ocr_info, inputs["segment_offset_id"], preds,
+            self.label2id_map_for_draw)
+        return ocr_info, inputs
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    # get infer img list
+    infer_imgs = get_image_file_list(args.infer_imgs)
+
+    # loop for infer
+    ser_engine = SerPredictor(args)
+    with open(os.path.join(args.output_dir, "infer_results.txt"), "w") as fout:
+        for idx, img_path in enumerate(infer_imgs):
+            print("process: [{}/{}], {}".format(idx, len(infer_imgs), img_path))
+
+            img = cv2.imread(img_path)
+
+            result, _ = ser_engine(img)
+            fout.write(img_path + "\t" + json.dumps(
+                {
+                    "ser_resule": result,
+                }, ensure_ascii=False) + "\n")
+
+            img_res = draw_ser_results(img, result)
+            cv2.imwrite(
+                os.path.join(args.output_dir,
+                             os.path.splitext(os.path.basename(img_path))[0] +
+                             "_ser.jpg"), img_res)
--- a/ppstructure/vqa/infer_ser_re_e2e.py
+++ b/ppstructure/vqa/infer_ser_re_e2e.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import json
+import cv2
+import numpy as np
+from copy import deepcopy
+from PIL import Image
+
+import paddle
+from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForRelationExtraction
+
+# relative reference
+from utils import parse_args, get_image_file_list, draw_re_results
+from infer_ser_e2e import SerPredictor
+
+
+def make_input(ser_input, ser_result, max_seq_len=512):
+    entities_labels = {'HEADER': 0, 'QUESTION': 1, 'ANSWER': 2}
+
+    entities = ser_input['entities'][0]
+    assert len(entities) == len(ser_result)
+
+    # entities
+    start = []
+    end = []
+    label = []
+    entity_idx_dict = {}
+    for i, (res, entity) in enumerate(zip(ser_result, entities)):
+        if res['pred'] == 'O':
+            continue
+        entity_idx_dict[len(start)] = i
+        start.append(entity['start'])
+        end.append(entity['end'])
+        label.append(entities_labels[res['pred']])
+    entities = dict(start=start, end=end, label=label)
+
+    # relations
+    head = []
+    tail = []
+    for i in range(len(entities["label"])):
+        for j in range(len(entities["label"])):
+            if entities["label"][i] == 1 and entities["label"][j] == 2:
+                head.append(i)
+                tail.append(j)
+
+    relations = dict(head=head, tail=tail)
+
+    batch_size = ser_input["input_ids"].shape[0]
+    entities_batch = []
+    relations_batch = []
+    for b in range(batch_size):
+        entities_batch.append(entities)
+        relations_batch.append(relations)
+
+    ser_input['entities'] = entities_batch
+    ser_input['relations'] = relations_batch
+
+    ser_input.pop('segment_offset_id')
+    return ser_input, entity_idx_dict
+
+
+class SerReSystem(object):
+    def __init__(self, args):
+        self.ser_engine = SerPredictor(args)
+        self.tokenizer = LayoutXLMTokenizer.from_pretrained(
+            args.re_model_name_or_path)
+        self.model = LayoutXLMForRelationExtraction.from_pretrained(
+            args.re_model_name_or_path)
+        self.model.eval()
+
+    def __call__(self, img):
+        ser_result, ser_inputs = self.ser_engine(img)
+        re_input, entity_idx_dict = make_input(ser_inputs, ser_result)
+
+        re_result = self.model(**re_input)
+
+        pred_relations = re_result['pred_relations'][0]
+        # 进行 relations 到 ocr信息的转换
+        result = []
+        used_tail_id = []
+        for relation in pred_relations:
+            if relation['tail_id'] in used_tail_id:
+                continue
+            used_tail_id.append(relation['tail_id'])
+            ocr_info_head = ser_result[entity_idx_dict[relation['head_id']]]
+            ocr_info_tail = ser_result[entity_idx_dict[relation['tail_id']]]
+            result.append((ocr_info_head, ocr_info_tail))
+
+        return result
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    # get infer img list
+    infer_imgs = get_image_file_list(args.infer_imgs)
+
+    # loop for infer
+    ser_re_engine = SerReSystem(args)
+    with open(os.path.join(args.output_dir, "infer_results.txt"), "w") as fout:
+        for idx, img_path in enumerate(infer_imgs):
+            print("process: [{}/{}], {}".format(idx, len(infer_imgs), img_path))
+
+            img = cv2.imread(img_path)
+
+            result = ser_re_engine(img)
+            fout.write(img_path + "\t" + json.dumps(
+                {
+                    "result": result,
+                }, ensure_ascii=False) + "\n")
+
+            img_res = draw_re_results(img, result)
+            cv2.imwrite(
+                os.path.join(args.output_dir,
+                             os.path.splitext(os.path.basename(img_path))[0] +
+                             "_re.jpg"), img_res)
--- a/ppstructure/vqa/labels/labels_ser.txt
+++ b/ppstructure/vqa/labels/labels_ser.txt
+QUESTION
+ANSWER
+HEADER
--- a/ppstructure/vqa/metric.py
+++ b/ppstructure/vqa/metric.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import re
+
+import numpy as np
+
+import logging
+
+logger = logging.getLogger(__name__)
+
+PREFIX_CHECKPOINT_DIR = "checkpoint"
+_re_checkpoint = re.compile(r"^" + PREFIX_CHECKPOINT_DIR + r"\-(\d+)$")
+
+
+def get_last_checkpoint(folder):
+    content = os.listdir(folder)
+    checkpoints = [
+        path for path in content
+        if _re_checkpoint.search(path) is not None and os.path.isdir(
+            os.path.join(folder, path))
+    ]
+    if len(checkpoints) == 0:
+        return
+    return os.path.join(
+        folder,
+        max(checkpoints,
+            key=lambda x: int(_re_checkpoint.search(x).groups()[0])))
+
+
+def re_score(pred_relations, gt_relations, mode="strict"):
+    """Evaluate RE predictions
+
+    Args:
+        pred_relations (list) :  list of list of predicted relations (several relations in each sentence)
+        gt_relations (list) :    list of list of ground truth relations
+
+            rel = { "head": (start_idx (inclusive), end_idx (exclusive)),
+                    "tail": (start_idx (inclusive), end_idx (exclusive)),
+                    "head_type": ent_type,
+                    "tail_type": ent_type,
+                    "type": rel_type}
+
+        vocab (Vocab) :         dataset vocabulary
+        mode (str) :            in 'strict' or 'boundaries'"""
+
+    assert mode in ["strict", "boundaries"]
+
+    relation_types = [v for v in [0, 1] if not v == 0]
+    scores = {
+        rel: {
+            "tp": 0,
+            "fp": 0,
+            "fn": 0
+        }
+        for rel in relation_types + ["ALL"]
+    }
+
+    # Count GT relations and Predicted relations
+    n_sents = len(gt_relations)
+    n_rels = sum([len([rel for rel in sent]) for sent in gt_relations])
+    n_found = sum([len([rel for rel in sent]) for sent in pred_relations])
+
+    # Count TP, FP and FN per type
+    for pred_sent, gt_sent in zip(pred_relations, gt_relations):
+        for rel_type in relation_types:
+            # strict mode takes argument types into account
+            if mode == "strict":
+                pred_rels = {(rel["head"], rel["head_type"], rel["tail"],
+                              rel["tail_type"])
+                             for rel in pred_sent if rel["type"] == rel_type}
+                gt_rels = {(rel["head"], rel["head_type"], rel["tail"],
+                            rel["tail_type"])
+                           for rel in gt_sent if rel["type"] == rel_type}
+
+            # boundaries mode only takes argument spans into account
+            elif mode == "boundaries":
+                pred_rels = {(rel["head"], rel["tail"])
+                             for rel in pred_sent if rel["type"] == rel_type}
+                gt_rels = {(rel["head"], rel["tail"])
+                           for rel in gt_sent if rel["type"] == rel_type}
+
+            scores[rel_type]["tp"] += len(pred_rels & gt_rels)
+            scores[rel_type]["fp"] += len(pred_rels - gt_rels)
+            scores[rel_type]["fn"] += len(gt_rels - pred_rels)
+
+    # Compute per entity Precision / Recall / F1
+    for rel_type in scores.keys():
+        if scores[rel_type]["tp"]:
+            scores[rel_type]["p"] = scores[rel_type]["tp"] / (
+                scores[rel_type]["fp"] + scores[rel_type]["tp"])
+            scores[rel_type]["r"] = scores[rel_type]["tp"] / (
+                scores[rel_type]["fn"] + scores[rel_type]["tp"])
+        else:
+            scores[rel_type]["p"], scores[rel_type]["r"] = 0, 0
+
+        if not scores[rel_type]["p"] + scores[rel_type]["r"] == 0:
+            scores[rel_type]["f1"] = (
+                2 * scores[rel_type]["p"] * scores[rel_type]["r"] /
+                (scores[rel_type]["p"] + scores[rel_type]["r"]))
+        else:
+            scores[rel_type]["f1"] = 0
+
+    # Compute micro F1 Scores
+    tp = sum([scores[rel_type]["tp"] for rel_type in relation_types])
+    fp = sum([scores[rel_type]["fp"] for rel_type in relation_types])
+    fn = sum([scores[rel_type]["fn"] for rel_type in relation_types])
+
+    if tp:
+        precision = tp / (tp + fp)
+        recall = tp / (tp + fn)
+        f1 = 2 * precision * recall / (precision + recall)
+
+    else:
+        precision, recall, f1 = 0, 0, 0
+
+    scores["ALL"]["p"] = precision
+    scores["ALL"]["r"] = recall
+    scores["ALL"]["f1"] = f1
+    scores["ALL"]["tp"] = tp
+    scores["ALL"]["fp"] = fp
+    scores["ALL"]["fn"] = fn
+
+    # Compute Macro F1 Scores
+    scores["ALL"]["Macro_f1"] = np.mean(
+        [scores[ent_type]["f1"] for ent_type in relation_types])
+    scores["ALL"]["Macro_p"] = np.mean(
+        [scores[ent_type]["p"] for ent_type in relation_types])
+    scores["ALL"]["Macro_r"] = np.mean(
+        [scores[ent_type]["r"] for ent_type in relation_types])
+
+    # logger.info(f"RE Evaluation in *** {mode.upper()} *** mode")
+
+    # logger.info(
+    #     "processed {} sentences with {} relations; found: {} relations; correct: {}.".format(
+    #         n_sents, n_rels, n_found, tp
+    #     )
+    # )
+    # logger.info(
+    #     "\tALL\t TP: {};\tFP: {};\tFN: {}".format(scores["ALL"]["tp"], scores["ALL"]["fp"], scores["ALL"]["fn"])
+    # )
+    # logger.info("\t\t(m avg): precision: {:.2f};\trecall: {:.2f};\tf1: {:.2f} (micro)".format(precision, recall, f1))
+    # logger.info(
+    #     "\t\t(M avg): precision: {:.2f};\trecall: {:.2f};\tf1: {:.2f} (Macro)\n".format(
+    #         scores["ALL"]["Macro_p"], scores["ALL"]["Macro_r"], scores["ALL"]["Macro_f1"]
+    #     )
+    # )
+
+    # for rel_type in relation_types:
+    #     logger.info(
+    #         "\t{}: \tTP: {};\tFP: {};\tFN: {};\tprecision: {:.2f};\trecall: {:.2f};\tf1: {:.2f};\t{}".format(
+    #             rel_type,
+    #             scores[rel_type]["tp"],
+    #             scores[rel_type]["fp"],
+    #             scores[rel_type]["fn"],
+    #             scores[rel_type]["p"],
+    #             scores[rel_type]["r"],
+    #             scores[rel_type]["f1"],
+    #             scores[rel_type]["tp"] + scores[rel_type]["fp"],
+    #         )
+    #     )
+
+    return scores
--- a/ppstructure/vqa/requirements.txt
+++ b/ppstructure/vqa/requirements.txt
+sentencepiece
+yacs
--- a/ppstructure/vqa/train_re.py
+++ b/ppstructure/vqa/train_re.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+
+import random
+import numpy as np
+import paddle
+
+from paddlenlp.transformers import LayoutXLMTokenizer, LayoutXLMModel, LayoutXLMForRelationExtraction
+
+from xfun import XFUNDataset
+from utils import parse_args, get_bio_label_maps, print_arguments
+from data_collator import DataCollator
+from metric import re_score
+
+from ppocr.utils.logging import get_logger
+
+
+def set_seed(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    paddle.seed(seed)
+
+
+def cal_metric(re_preds, re_labels, entities):
+    gt_relations = []
+    for b in range(len(re_labels)):
+        rel_sent = []
+        for head, tail in zip(re_labels[b]["head"], re_labels[b]["tail"]):
+            rel = {}
+            rel["head_id"] = head
+            rel["head"] = (entities[b]["start"][rel["head_id"]],
+                           entities[b]["end"][rel["head_id"]])
+            rel["head_type"] = entities[b]["label"][rel["head_id"]]
+
+            rel["tail_id"] = tail
+            rel["tail"] = (entities[b]["start"][rel["tail_id"]],
+                           entities[b]["end"][rel["tail_id"]])
+            rel["tail_type"] = entities[b]["label"][rel["tail_id"]]
+
+            rel["type"] = 1
+            rel_sent.append(rel)
+        gt_relations.append(rel_sent)
+    re_metrics = re_score(re_preds, gt_relations, mode="boundaries")
+    return re_metrics
+
+
+def evaluate(model, eval_dataloader, logger, prefix=""):
+    # Eval!
+    logger.info("***** Running evaluation {} *****".format(prefix))
+    logger.info("  Num examples = {}".format(len(eval_dataloader.dataset)))
+
+    re_preds = []
+    re_labels = []
+    entities = []
+    eval_loss = 0.0
+    model.eval()
+    for idx, batch in enumerate(eval_dataloader):
+        with paddle.no_grad():
+            outputs = model(**batch)
+            loss = outputs['loss'].mean().item()
+            if paddle.distributed.get_rank() == 0:
+                logger.info("[Eval] process: {}/{}, loss: {:.5f}".format(
+                    idx, len(eval_dataloader), loss))
+
+            eval_loss += loss
+        re_preds.extend(outputs['pred_relations'])
+        re_labels.extend(batch['relations'])
+        entities.extend(batch['entities'])
+    re_metrics = cal_metric(re_preds, re_labels, entities)
+    re_metrics = {
+        "precision": re_metrics["ALL"]["p"],
+        "recall": re_metrics["ALL"]["r"],
+        "f1": re_metrics["ALL"]["f1"],
+    }
+    model.train()
+    return re_metrics
+
+
+def train(args):
+    logger = get_logger(log_file=os.path.join(args.output_dir, "train.log"))
+    print_arguments(args, logger)
+
+    # Added here for reproducibility (even between python 2 and 3)
+    set_seed(args.seed)
+
+    label2id_map, id2label_map = get_bio_label_maps(args.label_map_path)
+    pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
+
+    # dist mode
+    if paddle.distributed.get_world_size() > 1:
+        paddle.distributed.init_parallel_env()
+
+    tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path)
+
+    model = LayoutXLMModel.from_pretrained(args.model_name_or_path)
+    model = LayoutXLMForRelationExtraction(model, dropout=None)
+
+    # dist mode
+    if paddle.distributed.get_world_size() > 1:
+        model = paddle.distributed.DataParallel(model)
+
+    train_dataset = XFUNDataset(
+        tokenizer,
+        data_dir=args.train_data_dir,
+        label_path=args.train_label_path,
+        label2id_map=label2id_map,
+        img_size=(224, 224),
+        max_seq_len=args.max_seq_length,
+        pad_token_label_id=pad_token_label_id,
+        contains_re=True,
+        add_special_ids=False,
+        return_attention_mask=True,
+        load_mode='all')
+
+    eval_dataset = XFUNDataset(
+        tokenizer,
+        data_dir=args.eval_data_dir,
+        label_path=args.eval_label_path,
+        label2id_map=label2id_map,
+        img_size=(224, 224),
+        max_seq_len=args.max_seq_length,
+        pad_token_label_id=pad_token_label_id,
+        contains_re=True,
+        add_special_ids=False,
+        return_attention_mask=True,
+        load_mode='all')
+
+    train_sampler = paddle.io.DistributedBatchSampler(
+        train_dataset, batch_size=args.per_gpu_train_batch_size, shuffle=True)
+    args.train_batch_size = args.per_gpu_train_batch_size * \
+                            max(1, paddle.distributed.get_world_size())
+    train_dataloader = paddle.io.DataLoader(
+        train_dataset,
+        batch_sampler=train_sampler,
+        num_workers=8,
+        use_shared_memory=True,
+        collate_fn=DataCollator())
+
+    eval_dataloader = paddle.io.DataLoader(
+        eval_dataset,
+        batch_size=args.per_gpu_eval_batch_size,
+        num_workers=8,
+        shuffle=False,
+        collate_fn=DataCollator())
+
+    t_total = len(train_dataloader) * args.num_train_epochs
+
+    # build linear decay with warmup lr sch
+    lr_scheduler = paddle.optimizer.lr.PolynomialDecay(
+        learning_rate=args.learning_rate,
+        decay_steps=t_total,
+        end_lr=0.0,
+        power=1.0)
+    if args.warmup_steps > 0:
+        lr_scheduler = paddle.optimizer.lr.LinearWarmup(
+            lr_scheduler,
+            args.warmup_steps,
+            start_lr=0,
+            end_lr=args.learning_rate, )
+    grad_clip = paddle.nn.ClipGradByNorm(clip_norm=10)
+    optimizer = paddle.optimizer.Adam(
+        learning_rate=args.learning_rate,
+        parameters=model.parameters(),
+        epsilon=args.adam_epsilon,
+        grad_clip=grad_clip,
+        weight_decay=args.weight_decay)
+
+    # Train!
+    logger.info("***** Running training *****")
+    logger.info("  Num examples = {}".format(len(train_dataset)))
+    logger.info("  Num Epochs = {}".format(args.num_train_epochs))
+    logger.info("  Instantaneous batch size per GPU = {}".format(
+        args.per_gpu_train_batch_size))
+    logger.info(
+        "  Total train batch size (w. parallel, distributed & accumulation) = {}".
+        format(args.train_batch_size * paddle.distributed.get_world_size()))
+    logger.info("  Total optimization steps = {}".format(t_total))
+
+    global_step = 0
+    model.clear_gradients()
+    train_dataloader_len = len(train_dataloader)
+    best_metirc = {'f1': 0}
+    model.train()
+
+    for epoch in range(int(args.num_train_epochs)):
+        for step, batch in enumerate(train_dataloader):
+            outputs = model(**batch)
+            # model outputs are always tuple in ppnlp (see doc)
+            loss = outputs['loss']
+            loss = loss.mean()
+
+            logger.info(
+                "epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {}, lr: {}".
+                format(epoch, args.num_train_epochs, step, train_dataloader_len,
+                       global_step, np.mean(loss.numpy()), optimizer.get_lr()))
+
+            loss.backward()
+            optimizer.step()
+            optimizer.clear_grad()
+            # lr_scheduler.step()  # Update learning rate schedule
+
+            global_step += 1
+
+            if (paddle.distributed.get_rank() == 0 and args.eval_steps > 0 and
+                    global_step % args.eval_steps == 0):
+                # Log metrics
+                if (paddle.distributed.get_rank() == 0 and args.
+                        evaluate_during_training):  # Only evaluate when single GPU otherwise metrics may not average well
+                    results = evaluate(model, eval_dataloader, logger)
+                    if results['f1'] > best_metirc['f1']:
+                        best_metirc = results
+                        output_dir = os.path.join(args.output_dir,
+                                                  "checkpoint-best")
+                        os.makedirs(output_dir, exist_ok=True)
+                        model.save_pretrained(output_dir)
+                        tokenizer.save_pretrained(output_dir)
+                        paddle.save(args,
+                                    os.path.join(output_dir,
+                                                 "training_args.bin"))
+                        logger.info("Saving model checkpoint to {}".format(
+                            output_dir))
+                    logger.info("eval results: {}".format(results))
+                    logger.info("best_metirc: {}".format(best_metirc))
+
+            if (paddle.distributed.get_rank() == 0 and args.save_steps > 0 and
+                    global_step % args.save_steps == 0):
+                # Save model checkpoint
+                output_dir = os.path.join(args.output_dir, "checkpoint-latest")
+                os.makedirs(output_dir, exist_ok=True)
+                if paddle.distributed.get_rank() == 0:
+                    model.save_pretrained(output_dir)
+                    tokenizer.save_pretrained(output_dir)
+                    paddle.save(args,
+                                os.path.join(output_dir, "training_args.bin"))
+                    logger.info("Saving model checkpoint to {}".format(
+                        output_dir))
+    logger.info("best_metirc: {}".format(best_metirc))
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    os.makedirs(args.output_dir, exist_ok=True)
+    train(args)
--- a/ppstructure/vqa/train_ser.py
+++ b/ppstructure/vqa/train_ser.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+
+import random
+import copy
+import logging
+
+import argparse
+import paddle
+import numpy as np
+from seqeval.metrics import classification_report, f1_score, precision_score, recall_score
+from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification
+from xfun import XFUNDataset
+from utils import parse_args
+from utils import get_bio_label_maps
+from utils import print_arguments
+
+from ppocr.utils.logging import get_logger
+
+
+def set_seed(args):
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+    paddle.seed(args.seed)
+
+
+def train(args):
+    os.makedirs(args.output_dir, exist_ok=True)
+    logger = get_logger(log_file=os.path.join(args.output_dir, "train.log"))
+    print_arguments(args, logger)
+
+    label2id_map, id2label_map = get_bio_label_maps(args.label_map_path)
+    pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
+
+    # dist mode
+    if paddle.distributed.get_world_size() > 1:
+        paddle.distributed.init_parallel_env()
+
+    tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path)
+    base_model = LayoutXLMModel.from_pretrained(args.model_name_or_path)
+    model = LayoutXLMForTokenClassification(
+        base_model, num_classes=len(label2id_map), dropout=None)
+
+    # dist mode
+    if paddle.distributed.get_world_size() > 1:
+        model = paddle.DataParallel(model)
+
+    train_dataset = XFUNDataset(
+        tokenizer,
+        data_dir=args.train_data_dir,
+        label_path=args.train_label_path,
+        label2id_map=label2id_map,
+        img_size=(224, 224),
+        pad_token_label_id=pad_token_label_id,
+        contains_re=False,
+        add_special_ids=False,
+        return_attention_mask=True,
+        load_mode='all')
+
+    train_sampler = paddle.io.DistributedBatchSampler(
+        train_dataset, batch_size=args.per_gpu_train_batch_size, shuffle=True)
+
+    args.train_batch_size = args.per_gpu_train_batch_size * max(
+        1, paddle.distributed.get_world_size())
+
+    train_dataloader = paddle.io.DataLoader(
+        train_dataset,
+        batch_sampler=train_sampler,
+        num_workers=0,
+        use_shared_memory=True,
+        collate_fn=None, )
+
+    t_total = len(train_dataloader) * args.num_train_epochs
+
+    # build linear decay with warmup lr sch
+    lr_scheduler = paddle.optimizer.lr.PolynomialDecay(
+        learning_rate=args.learning_rate,
+        decay_steps=t_total,
+        end_lr=0.0,
+        power=1.0)
+    if args.warmup_steps > 0:
+        lr_scheduler = paddle.optimizer.lr.LinearWarmup(
+            lr_scheduler,
+            args.warmup_steps,
+            start_lr=0,
+            end_lr=args.learning_rate, )
+
+    optimizer = paddle.optimizer.AdamW(
+        learning_rate=lr_scheduler,
+        parameters=model.parameters(),
+        epsilon=args.adam_epsilon,
+        weight_decay=args.weight_decay)
+
+    # Train!
+    logger.info("***** Running training *****")
+    logger.info("  Num examples = %d", len(train_dataset))
+    logger.info("  Num Epochs = %d", args.num_train_epochs)
+    logger.info("  Instantaneous batch size per GPU = %d",
+                args.per_gpu_train_batch_size)
+    logger.info(
+        "  Total train batch size (w. parallel, distributed) = %d",
+        args.train_batch_size * paddle.distributed.get_world_size(), )
+    logger.info("  Total optimization steps = %d", t_total)
+
+    global_step = 0
+    tr_loss = 0.0
+    set_seed(args)
+    best_metrics = None
+
+    for epoch_id in range(args.num_train_epochs):
+        for step, batch in enumerate(train_dataloader):
+            model.train()
+            outputs = model(**batch)
+            # model outputs are always tuple in ppnlp (see doc)
+            loss = outputs[0]
+            loss = loss.mean()
+            logger.info(
+                "epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {}, lr: {}".
+                format(epoch_id, args.num_train_epochs, step,
+                       len(train_dataloader), global_step,
+                       loss.numpy()[0], lr_scheduler.get_lr()))
+
+            loss.backward()
+            tr_loss += loss.item()
+            optimizer.step()
+            lr_scheduler.step()  # Update learning rate schedule
+            optimizer.clear_grad()
+            global_step += 1
+
+            if (paddle.distributed.get_rank() == 0 and args.eval_steps > 0 and
+                    global_step % args.eval_steps == 0):
+                # Log metrics
+                # Only evaluate when single GPU otherwise metrics may not average well
+                if paddle.distributed.get_rank(
+                ) == 0 and args.evaluate_during_training:
+                    results, _ = evaluate(args, model, tokenizer, label2id_map,
+                                          id2label_map, pad_token_label_id,
+                                          logger)
+
+                    if best_metrics is None or results["f1"] >= best_metrics[
+                            "f1"]:
+                        best_metrics = copy.deepcopy(results)
+                        output_dir = os.path.join(args.output_dir, "best_model")
+                        os.makedirs(output_dir, exist_ok=True)
+                        if paddle.distributed.get_rank() == 0:
+                            model.save_pretrained(output_dir)
+                            tokenizer.save_pretrained(output_dir)
+                            paddle.save(
+                                args,
+                                os.path.join(output_dir, "training_args.bin"))
+                            logger.info("Saving model checkpoint to %s",
+                                        output_dir)
+
+                    logger.info("[epoch {}/{}][iter: {}/{}] results: {}".format(
+                        epoch_id, args.num_train_epochs, step,
+                        len(train_dataloader), results))
+                    if best_metrics is not None:
+                        logger.info("best metrics: {}".format(best_metrics))
+
+            if paddle.distributed.get_rank(
+            ) == 0 and args.save_steps > 0 and global_step % args.save_steps == 0:
+                # Save model checkpoint
+                output_dir = os.path.join(args.output_dir,
+                                          "checkpoint-{}".format(global_step))
+                os.makedirs(output_dir, exist_ok=True)
+                if paddle.distributed.get_rank() == 0:
+                    model.save_pretrained(output_dir)
+                    tokenizer.save_pretrained(output_dir)
+                    paddle.save(args,
+                                os.path.join(output_dir, "training_args.bin"))
+                    logger.info("Saving model checkpoint to %s", output_dir)
+
+    return global_step, tr_loss / global_step
+
+
+def evaluate(args,
+             model,
+             tokenizer,
+             label2id_map,
+             id2label_map,
+             pad_token_label_id,
+             logger,
+             prefix=""):
+    eval_dataset = XFUNDataset(
+        tokenizer,
+        data_dir=args.eval_data_dir,
+        label_path=args.eval_label_path,
+        label2id_map=label2id_map,
+        img_size=(224, 224),
+        pad_token_label_id=pad_token_label_id,
+        contains_re=False,
+        add_special_ids=False,
+        return_attention_mask=True,
+        load_mode='all')
+
+    args.eval_batch_size = args.per_gpu_eval_batch_size * max(
+        1, paddle.distributed.get_world_size())
+
+    eval_dataloader = paddle.io.DataLoader(
+        eval_dataset,
+        batch_size=args.eval_batch_size,
+        num_workers=0,
+        use_shared_memory=True,
+        collate_fn=None, )
+
+    # Eval!
+    logger.info("***** Running evaluation %s *****", prefix)
+    logger.info("  Num examples = %d", len(eval_dataset))
+    logger.info("  Batch size = %d", args.eval_batch_size)
+    eval_loss = 0.0
+    nb_eval_steps = 0
+    preds = None
+    out_label_ids = None
+    model.eval()
+    for idx, batch in enumerate(eval_dataloader):
+        with paddle.no_grad():
+            outputs = model(**batch)
+            tmp_eval_loss, logits = outputs[:2]
+
+            tmp_eval_loss = tmp_eval_loss.mean()
+
+            if paddle.distributed.get_rank() == 0:
+                logger.info("[Eval]process: {}/{}, loss: {:.5f}".format(
+                    idx, len(eval_dataloader), tmp_eval_loss.numpy()[0]))
+
+            eval_loss += tmp_eval_loss.item()
+        nb_eval_steps += 1
+        if preds is None:
+            preds = logits.numpy()
+            out_label_ids = batch["labels"].numpy()
+        else:
+            preds = np.append(preds, logits.numpy(), axis=0)
+            out_label_ids = np.append(
+                out_label_ids, batch["labels"].numpy(), axis=0)
+
+    eval_loss = eval_loss / nb_eval_steps
+    preds = np.argmax(preds, axis=2)
+
+    # label_map = {i: label.upper() for i, label in enumerate(labels)}
+
+    out_label_list = [[] for _ in range(out_label_ids.shape[0])]
+    preds_list = [[] for _ in range(out_label_ids.shape[0])]
+
+    for i in range(out_label_ids.shape[0]):
+        for j in range(out_label_ids.shape[1]):
+            if out_label_ids[i, j] != pad_token_label_id:
+                out_label_list[i].append(id2label_map[out_label_ids[i][j]])
+                preds_list[i].append(id2label_map[preds[i][j]])
+
+    results = {
+        "loss": eval_loss,
+        "precision": precision_score(out_label_list, preds_list),
+        "recall": recall_score(out_label_list, preds_list),
+        "f1": f1_score(out_label_list, preds_list),
+    }
+
+    with open(os.path.join(args.output_dir, "test_gt.txt"), "w") as fout:
+        for lbl in out_label_list:
+            for l in lbl:
+                fout.write(l + "\t")
+            fout.write("\n")
+    with open(os.path.join(args.output_dir, "test_pred.txt"), "w") as fout:
+        for lbl in preds_list:
+            for l in lbl:
+                fout.write(l + "\t")
+            fout.write("\n")
+
+    report = classification_report(out_label_list, preds_list)
+    logger.info("\n" + report)
+
+    logger.info("***** Eval results %s *****", prefix)
+    for key in sorted(results.keys()):
+        logger.info("  %s = %s", key, str(results[key]))
+
+    return results, preds_list
+
+
+if __name__ == "__main__":
+    args = parse_args()
+    train(args)
--- a/ppstructure/vqa/utils.py
+++ b/ppstructure/vqa/utils.py
--- a/ppstructure/vqa/xfun.py
+++ b/ppstructure/vqa/xfun.py
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,3 +13,4 @@ lxml
 premailer
 openpyxl
 fasttext==0.9.1
+
--- a/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/ch_PP-OCRv2/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
-===========================ch_ppocr_mobile_v2.0===========================
+===========================ch_PP-OCRv2===========================
 model_name:ch_PP-OCRv2
 python:python3.7
 infer_model:./inference/ch_PP-OCRv2_det_infer/
 infer_export:null
-infer_quant:True
+infer_quant:False
 inference:tools/infer/predict_system.py
--use_gpu:False
--enable_mkldnn:False
+--use_gpu:False|True
+--enable_mkldnn:False|True
 --cpu_threads:1|6
 --rec_batch_num:1
--use_tensorrt:False
--precision:int8
+--use_tensorrt:False|True
+--precision:fp32
 --det_model_dir:
 --image_dir:./inference/ch_det_data_50/all-sum-510/
 --rec_model_dir:./inference/ch_PP-OCRv2_rec_infer/

--- a/test_tipc/configs/ch_PP-OCRv2_det/train_infer_python.txt
+++ b/test_tipc/configs/ch_PP-OCRv2_det/train_infer_python.txt
@@ -12,9 +12,9 @@ train_model_name:latest
 train_infer_img_dir:./train_data/icdar2015/text_localization/ch4_test_images/
 null:null
 ##
-trainer:norm_train|pact_train
+trainer:norm_train
 norm_train:tools/train.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o 
-pact_train:deploy/slim/quantization/quant.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o
+pact_train:null
 fpgm_train:null
 distill_train:null
 null:null
@@ -26,9 +26,9 @@ null:null
 ##
 ===========================infer_params===========================
 Global.save_inference_dir:./output/
-Global.pretrained_model:
+Global.checkpoints:
 norm_export:tools/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o 
-quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o 
+quant_export:null
 fpgm_export: 
 distill_export:null
 export1:null

--- a/test_tipc/configs/ch_PP-OCRv2_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/ch_PP-OCRv2_det_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
 ===========================kl_quant_params===========================
 model_name:PPOCRv2_ocr_det_kl
 python:python3.7
+Global.pretrained_model:null
+Global.save_inference_dir:null
 infer_model:./inference/ch_PP-OCRv2_det_infer/
 infer_export:deploy/slim/quantization/quant_kl.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o
 infer_quant:True
 inference:tools/infer/predict_det.py
--use_gpu:False
--enable_mkldnn:False
+--use_gpu:False|True
+--enable_mkldnn:True
 --cpu_threads:1|6
 --rec_batch_num:1
--use_tensorrt:False
+--use_tensorrt:False|True
 --precision:int8
 --det_model_dir:
 --image_dir:./inference/ch_det_data_50/all-sum-510/

--- a/test_tipc/configs/ch_PP-OCRv2_det_PACT/train_infer_python.txt
+++ b/test_tipc/configs/ch_PP-OCRv2_det_PACT/train_infer_python.txt
 ===========================train_params===========================
-model_name:PPOCRv2_ocr_det
+model_name:ch_PPOCRv2_det_PACT
 python:python3.7
 gpu_list:0|0,1
 Global.use_gpu:True|True
@@ -26,7 +26,7 @@ null:null
 ##
 ===========================infer_params===========================
 Global.save_inference_dir:./output/
-Global.pretrained_model:
+Global.checkpoints:
 norm_export:null
 quant_export:deploy/slim/quantization/export_model.py -c configs/det/ch_PP-OCRv2/ch_PP-OCRv2_det_cml.yml -o 
 fpgm_export: 

--- a/test_tipc/configs/ch_PP-OCRv2_rec/train_infer_python.txt
+++ b/test_tipc/configs/ch_PP-OCRv2_rec/train_infer_python.txt
@@ -6,7 +6,7 @@ Global.use_gpu:True|True
 Global.auto_cast:fp32
 Global.epoch_num:lite_train_lite_infer=3|whole_train_whole_infer=300
 Global.save_model_dir:./output/
-Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128
+Train.loader.batch_size_per_card:lite_train_lite_infer=16|whole_train_whole_infer=128
 Global.pretrained_model:null
 train_model_name:latest
 train_infer_img_dir:./inference/rec_inference
@@ -26,7 +26,7 @@ null:null
 ##
 ===========================infer_params===========================
 Global.save_inference_dir:./output/
-Global.pretrained_model:
+Global.checkpoints:
 norm_export:tools/export_model.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o 
 quant_export:
 fpgm_export: 
@@ -34,7 +34,7 @@ distill_export:null
 export1:null
 export2:null
 inference_dir:Student
-infer_model:./inference/ch_PP-OCRv2_rec_infer/
+infer_model:./inference/ch_PP-OCRv2_rec_infer
 infer_export:null
 infer_quant:False
 inference:tools/infer/predict_rec.py
@@ -45,7 +45,7 @@ inference:tools/infer/predict_rec.py
 --use_tensorrt:False|True
 --precision:fp32|fp16|int8
 --rec_model_dir:
--image_dir:/inference/rec_inference
+--image_dir:./inference/rec_inference
 null:null
 --benchmark:True
 null:null

--- a/test_tipc/configs/ch_PP-OCRv2_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/ch_PP-OCRv2_rec_KL/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
 ===========================kl_quant_params===========================
 model_name:PPOCRv2_ocr_rec_kl
 python:python3.7
+Global.pretrained_model:null
+Global.save_inference_dir:null
 infer_model:./inference/ch_PP-OCRv2_rec_infer/
 infer_export:deploy/slim/quantization/quant_kl.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o
 infer_quant:True
 inference:tools/infer/predict_rec.py
--use_gpu:False
--enable_mkldnn:False
+--use_gpu:False|True
+--enable_mkldnn:False|True
 --cpu_threads:1|6
 --rec_batch_num:1|6
--use_tensorrt:False
+--use_tensorrt:True
 --precision:int8
 --rec_model_dir:
 --image_dir:./inference/rec_inference

--- a/test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_infer_python.txt
+++ b/test_tipc/configs/ch_PP-OCRv2_rec_PACT/train_infer_python.txt
@@ -6,15 +6,15 @@ Global.use_gpu:True|True
 Global.auto_cast:fp32
 Global.epoch_num:lite_train_lite_infer=3|whole_train_whole_infer=300
 Global.save_model_dir:./output/
-Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128
+Train.loader.batch_size_per_card:lite_train_lite_infer=16|whole_train_whole_infer=128
 Global.pretrained_model:null
 train_model_name:latest
 train_infer_img_dir:./inference/rec_inference
 null:null
 ##
 trainer:pact_train
-norm_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o 
-pact_train:null
+norm_train:null
+pact_train:deploy/slim/quantization/quant.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o 
 fpgm_train:null
 distill_train:null
 null:null
@@ -26,15 +26,15 @@ null:null
 ##
 ===========================infer_params===========================
 Global.save_inference_dir:./output/
-Global.pretrained_model:
-norm_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o 
-quant_export:
-fpgm_export: 
+Global.checkpoints:
+norm_export:null
+quant_export:deploy/slim/quantization/export_model.py -c test_tipc/configs/ch_PP-OCRv2_rec/ch_PP-OCRv2_rec_distillation.yml -o 
+fpgm_export: null
 distill_export:null
 export1:null
 export2:null
 inference_dir:Student
-infer_model:./inference/ch_PP-OCRv2_rec_infer/
+infer_model:./inference/ch_PP-OCRv2_rec_slim_quant_infer
 infer_export:null
 infer_quant:True
 inference:tools/infer/predict_rec.py
@@ -45,7 +45,7 @@ inference:tools/infer/predict_rec.py
 --use_tensorrt:False|True
 --precision:fp32|fp16|int8
 --rec_model_dir:
--image_dir:/inference/rec_inference
+--image_dir:./inference/rec_inference
 null:null
 --benchmark:True
 null:null

--- a/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt
+++ b/test_tipc/configs/ch_ppocr_mobile_V2.0_det_FPGM/train_infer_python.txt
@@ -4,7 +4,7 @@ python:python3.7
 gpu_list:0|0,1
 Global.use_gpu:True|True
 Global.auto_cast:null
-Global.epoch_num:lite_train_lite_infer=1|whole_train_whole_infer=300
+Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=300
 Global.save_model_dir:./output/
 Train.loader.batch_size_per_card:lite_train_lite_infer=2|whole_train_whole_infer=4
 Global.pretrained_model:null
@@ -15,7 +15,7 @@ null:null
 trainer:fpgm_train
 norm_train:null
 pact_train:null
-fpgm_train:deploy/slim/prune/sensitivity_anal.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
+fpgm_train:deploy/slim/prune/sensitivity_anal.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
 distill_train:null
 null:null
 null:null
@@ -29,7 +29,7 @@ Global.save_inference_dir:./output/
 Global.pretrained_model:
 norm_export:null
 quant_export:null
-fpgm_export:deploy/slim/prune/export_prune_model.py -c test_tipc/configs/ppocr_det_mobile/det_mv3_db.yml -o 
+fpgm_export:deploy/slim/prune/export_prune_model.py -c configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml -o 
 distill_export:null
 export1:null
 export2:null

--- a/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/ch_ppocr_mobile_v2.0/model_linux_gpu_normal_normal_infer_python_linux_gpu_cpu.txt