Merge remote-tracking branch 'origin/dygraph' into dygraph

41a1b292 · Leif · 9471054e · 3d30899b · 9471054e · 9471054e
Commit 41a1b292 authored Jan 20, 2022 by Leif
20 changed files
--- a/ppstructure/vqa/train_re.py
+++ b/ppstructure/vqa/train_re.py
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-
-__dir__ = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(__dir__)
-sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
-
-import random
-import time
-import numpy as np
-import paddle
-
-from paddlenlp.transformers import LayoutXLMTokenizer, LayoutXLMModel, LayoutXLMForRelationExtraction
-
-from xfun import XFUNDataset
-from vqa_utils import parse_args, get_bio_label_maps, print_arguments, set_seed
-from data_collator import DataCollator
-from eval_re import evaluate
-
-from ppocr.utils.logging import get_logger
-
-
-def train(args):
-    logger = get_logger(log_file=os.path.join(args.output_dir, "train.log"))
-    rank = paddle.distributed.get_rank()
-    distributed = paddle.distributed.get_world_size() > 1
-
-    print_arguments(args, logger)
-
-    # Added here for reproducibility (even between python 2 and 3)
-    set_seed(args.seed)
-
-    label2id_map, id2label_map = get_bio_label_maps(args.label_map_path)
-    pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
-
-    # dist mode
-    if distributed:
-        paddle.distributed.init_parallel_env()
-
-    tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path)
-    if not args.resume:
-        model = LayoutXLMModel.from_pretrained(args.model_name_or_path)
-        model = LayoutXLMForRelationExtraction(model, dropout=None)
-        logger.info('train from scratch')
-    else:
-        logger.info('resume from {}'.format(args.model_name_or_path))
-        model = LayoutXLMForRelationExtraction.from_pretrained(
-            args.model_name_or_path)
-
-    # dist mode
-    if distributed:
-        model = paddle.DataParallel(model)
-
-    train_dataset = XFUNDataset(
-        tokenizer,
-        data_dir=args.train_data_dir,
-        label_path=args.train_label_path,
-        label2id_map=label2id_map,
-        img_size=(224, 224),
-        max_seq_len=args.max_seq_length,
-        pad_token_label_id=pad_token_label_id,
-        contains_re=True,
-        add_special_ids=False,
-        return_attention_mask=True,
-        load_mode='all')
-
-    eval_dataset = XFUNDataset(
-        tokenizer,
-        data_dir=args.eval_data_dir,
-        label_path=args.eval_label_path,
-        label2id_map=label2id_map,
-        img_size=(224, 224),
-        max_seq_len=args.max_seq_length,
-        pad_token_label_id=pad_token_label_id,
-        contains_re=True,
-        add_special_ids=False,
-        return_attention_mask=True,
-        load_mode='all')
-
-    train_sampler = paddle.io.DistributedBatchSampler(
-        train_dataset, batch_size=args.per_gpu_train_batch_size, shuffle=True)
-
-    train_dataloader = paddle.io.DataLoader(
-        train_dataset,
-        batch_sampler=train_sampler,
-        num_workers=args.num_workers,
-        use_shared_memory=True,
-        collate_fn=DataCollator())
-
-    eval_dataloader = paddle.io.DataLoader(
-        eval_dataset,
-        batch_size=args.per_gpu_eval_batch_size,
-        num_workers=args.num_workers,
-        shuffle=False,
-        collate_fn=DataCollator())
-
-    t_total = len(train_dataloader) * args.num_train_epochs
-
-    # build linear decay with warmup lr sch
-    lr_scheduler = paddle.optimizer.lr.PolynomialDecay(
-        learning_rate=args.learning_rate,
-        decay_steps=t_total,
-        end_lr=0.0,
-        power=1.0)
-    if args.warmup_steps > 0:
-        lr_scheduler = paddle.optimizer.lr.LinearWarmup(
-            lr_scheduler,
-            args.warmup_steps,
-            start_lr=0,
-            end_lr=args.learning_rate, )
-    grad_clip = paddle.nn.ClipGradByNorm(clip_norm=10)
-    optimizer = paddle.optimizer.Adam(
-        learning_rate=args.learning_rate,
-        parameters=model.parameters(),
-        epsilon=args.adam_epsilon,
-        grad_clip=grad_clip,
-        weight_decay=args.weight_decay)
-
-    # Train!
-    logger.info("***** Running training *****")
-    logger.info("  Num examples = {}".format(len(train_dataset)))
-    logger.info("  Num Epochs = {}".format(args.num_train_epochs))
-    logger.info("  Instantaneous batch size per GPU = {}".format(
-        args.per_gpu_train_batch_size))
-    logger.info(
-        "  Total train batch size (w. parallel, distributed & accumulation) = {}".
-        format(args.per_gpu_train_batch_size *
-               paddle.distributed.get_world_size()))
-    logger.info("  Total optimization steps = {}".format(t_total))
-
-    global_step = 0
-    model.clear_gradients()
-    train_dataloader_len = len(train_dataloader)
-    best_metirc = {'f1': 0}
-    model.train()
-
-    train_reader_cost = 0.0
-    train_run_cost = 0.0
-    total_samples = 0
-    reader_start = time.time()
-
-    print_step = 1
-
-    for epoch in range(int(args.num_train_epochs)):
-        for step, batch in enumerate(train_dataloader):
-            train_reader_cost += time.time() - reader_start
-            train_start = time.time()
-            outputs = model(**batch)
-            train_run_cost += time.time() - train_start
-            # model outputs are always tuple in ppnlp (see doc)
-            loss = outputs['loss']
-            loss = loss.mean()
-
-            loss.backward()
-            optimizer.step()
-            optimizer.clear_grad()
-            # lr_scheduler.step()  # Update learning rate schedule
-
-            global_step += 1
-            total_samples += batch['image'].shape[0]
-
-            if rank == 0 and step % print_step == 0:
-                logger.info(
-                    "epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {:.6f}, lr: {:.6f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec".
-                    format(epoch, args.num_train_epochs, step,
-                           train_dataloader_len, global_step,
-                           np.mean(loss.numpy()),
-                           optimizer.get_lr(), train_reader_cost / print_step, (
-                               train_reader_cost + train_run_cost) / print_step,
-                           total_samples / print_step, total_samples / (
-                               train_reader_cost + train_run_cost)))
-
-                train_reader_cost = 0.0
-                train_run_cost = 0.0
-                total_samples = 0
-
-            if rank == 0 and args.eval_steps > 0 and global_step % args.eval_steps == 0 and args.evaluate_during_training:
-                # Log metrics
-                # Only evaluate when single GPU otherwise metrics may not average well
-                results = evaluate(model, eval_dataloader, logger)
-                if results['f1'] >= best_metirc['f1']:
-                    best_metirc = results
-                    output_dir = os.path.join(args.output_dir, "best_model")
-                    os.makedirs(output_dir, exist_ok=True)
-                    if distributed:
-                        model._layers.save_pretrained(output_dir)
-                    else:
-                        model.save_pretrained(output_dir)
-                    tokenizer.save_pretrained(output_dir)
-                    paddle.save(args,
-                                os.path.join(output_dir, "training_args.bin"))
-                    logger.info("Saving model checkpoint to {}".format(
-                        output_dir))
-                logger.info("eval results: {}".format(results))
-                logger.info("best_metirc: {}".format(best_metirc))
-            reader_start = time.time()
-
-        if rank == 0:
-            # Save model checkpoint
-            output_dir = os.path.join(args.output_dir, "latest_model")
-            os.makedirs(output_dir, exist_ok=True)
-            if distributed:
-                model._layers.save_pretrained(output_dir)
-            else:
-                model.save_pretrained(output_dir)
-            tokenizer.save_pretrained(output_dir)
-            paddle.save(args, os.path.join(output_dir, "training_args.bin"))
-            logger.info("Saving model checkpoint to {}".format(output_dir))
-    logger.info("best_metirc: {}".format(best_metirc))
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    os.makedirs(args.output_dir, exist_ok=True)
-    train(args)
--- a/ppstructure/vqa/train_ser.py
+++ b/ppstructure/vqa/train_ser.py
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-
-__dir__ = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(__dir__)
-sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
-
-import random
-import time
-import copy
-import logging
-
-import argparse
-import paddle
-import numpy as np
-from seqeval.metrics import classification_report, f1_score, precision_score, recall_score
-from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification
-from paddlenlp.transformers import LayoutLMModel, LayoutLMTokenizer, LayoutLMForTokenClassification
-
-from xfun import XFUNDataset
-from vqa_utils import parse_args, get_bio_label_maps, print_arguments, set_seed
-from eval_ser import evaluate
-from losses import SERLoss
-from ppocr.utils.logging import get_logger
-
-MODELS = {
-    'LayoutXLM':
-    (LayoutXLMTokenizer, LayoutXLMModel, LayoutXLMForTokenClassification),
-    'LayoutLM':
-    (LayoutLMTokenizer, LayoutLMModel, LayoutLMForTokenClassification)
-}
-
-
-def train(args):
-    os.makedirs(args.output_dir, exist_ok=True)
-    rank = paddle.distributed.get_rank()
-    distributed = paddle.distributed.get_world_size() > 1
-
-    logger = get_logger(log_file=os.path.join(args.output_dir, "train.log"))
-    print_arguments(args, logger)
-
-    label2id_map, id2label_map = get_bio_label_maps(args.label_map_path)
-    loss_class = SERLoss(len(label2id_map))
-
-    pad_token_label_id = loss_class.ignore_index
-
-    # dist mode
-    if distributed:
-        paddle.distributed.init_parallel_env()
-
-    tokenizer_class, base_model_class, model_class = MODELS[args.ser_model_type]
-    tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path)
-    if not args.resume:
-        base_model = base_model_class.from_pretrained(args.model_name_or_path)
-        model = model_class(
-            base_model, num_classes=len(label2id_map), dropout=None)
-        logger.info('train from scratch')
-    else:
-        logger.info('resume from {}'.format(args.model_name_or_path))
-        model = model_class.from_pretrained(args.model_name_or_path)
-
-    # dist mode
-    if distributed:
-        model = paddle.DataParallel(model)
-
-    train_dataset = XFUNDataset(
-        tokenizer,
-        data_dir=args.train_data_dir,
-        label_path=args.train_label_path,
-        label2id_map=label2id_map,
-        img_size=(224, 224),
-        pad_token_label_id=pad_token_label_id,
-        contains_re=False,
-        add_special_ids=False,
-        return_attention_mask=True,
-        load_mode='all')
-    eval_dataset = XFUNDataset(
-        tokenizer,
-        data_dir=args.eval_data_dir,
-        label_path=args.eval_label_path,
-        label2id_map=label2id_map,
-        img_size=(224, 224),
-        pad_token_label_id=pad_token_label_id,
-        contains_re=False,
-        add_special_ids=False,
-        return_attention_mask=True,
-        load_mode='all')
-
-    train_sampler = paddle.io.DistributedBatchSampler(
-        train_dataset, batch_size=args.per_gpu_train_batch_size, shuffle=True)
-
-    train_dataloader = paddle.io.DataLoader(
-        train_dataset,
-        batch_sampler=train_sampler,
-        num_workers=args.num_workers,
-        use_shared_memory=True,
-        collate_fn=None, )
-
-    eval_dataloader = paddle.io.DataLoader(
-        eval_dataset,
-        batch_size=args.per_gpu_eval_batch_size,
-        num_workers=args.num_workers,
-        use_shared_memory=True,
-        collate_fn=None, )
-
-    t_total = len(train_dataloader) * args.num_train_epochs
-
-    # build linear decay with warmup lr sch
-    lr_scheduler = paddle.optimizer.lr.PolynomialDecay(
-        learning_rate=args.learning_rate,
-        decay_steps=t_total,
-        end_lr=0.0,
-        power=1.0)
-    if args.warmup_steps > 0:
-        lr_scheduler = paddle.optimizer.lr.LinearWarmup(
-            lr_scheduler,
-            args.warmup_steps,
-            start_lr=0,
-            end_lr=args.learning_rate, )
-
-    optimizer = paddle.optimizer.AdamW(
-        learning_rate=lr_scheduler,
-        parameters=model.parameters(),
-        epsilon=args.adam_epsilon,
-        weight_decay=args.weight_decay)
-
-    # Train!
-    logger.info("***** Running training *****")
-    logger.info("  Num examples = %d", len(train_dataset))
-    logger.info("  Num Epochs = %d", args.num_train_epochs)
-    logger.info("  Instantaneous batch size per GPU = %d",
-                args.per_gpu_train_batch_size)
-    logger.info(
-        "  Total train batch size (w. parallel, distributed) = %d",
-        args.per_gpu_train_batch_size * paddle.distributed.get_world_size(), )
-    logger.info("  Total optimization steps = %d", t_total)
-
-    global_step = 0
-    tr_loss = 0.0
-    set_seed(args.seed)
-    best_metrics = None
-
-    train_reader_cost = 0.0
-    train_run_cost = 0.0
-    total_samples = 0
-    reader_start = time.time()
-
-    print_step = 1
-    model.train()
-    for epoch_id in range(args.num_train_epochs):
-        for step, batch in enumerate(train_dataloader):
-            train_reader_cost += time.time() - reader_start
-
-            if args.ser_model_type == 'LayoutLM':
-                if 'image' in batch:
-                    batch.pop('image')
-            labels = batch.pop('labels')
-
-            train_start = time.time()
-            outputs = model(**batch)
-            train_run_cost += time.time() - train_start
-            if args.ser_model_type == 'LayoutXLM':
-                outputs = outputs[0]
-            loss = loss_class(labels, outputs, batch['attention_mask'])
-
-            # model outputs are always tuple in ppnlp (see doc)
-            loss = loss.mean()
-            loss.backward()
-            tr_loss += loss.item()
-            optimizer.step()
-            lr_scheduler.step()  # Update learning rate schedule
-            optimizer.clear_grad()
-            global_step += 1
-            total_samples += batch['input_ids'].shape[0]
-
-            if rank == 0 and step % print_step == 0:
-                logger.info(
-                    "epoch: [{}/{}], iter: [{}/{}], global_step:{}, train loss: {:.6f}, lr: {:.6f}, avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec".
-                    format(epoch_id, args.num_train_epochs, step,
-                           len(train_dataloader), global_step,
-                           loss.numpy()[0],
-                           lr_scheduler.get_lr(), train_reader_cost /
-                           print_step, (train_reader_cost + train_run_cost) /
-                           print_step, total_samples / print_step, total_samples
-                           / (train_reader_cost + train_run_cost)))
-
-                train_reader_cost = 0.0
-                train_run_cost = 0.0
-                total_samples = 0
-
-            if rank == 0 and args.eval_steps > 0 and global_step % args.eval_steps == 0 and args.evaluate_during_training:
-                # Log metrics
-                # Only evaluate when single GPU otherwise metrics may not average well
-                results, _ = evaluate(args, model, tokenizer, loss_class,
-                                      eval_dataloader, label2id_map,
-                                      id2label_map, pad_token_label_id, logger)
-
-                if best_metrics is None or results["f1"] >= best_metrics["f1"]:
-                    best_metrics = copy.deepcopy(results)
-                    output_dir = os.path.join(args.output_dir, "best_model")
-                    os.makedirs(output_dir, exist_ok=True)
-                    if distributed:
-                        model._layers.save_pretrained(output_dir)
-                    else:
-                        model.save_pretrained(output_dir)
-                    tokenizer.save_pretrained(output_dir)
-                    paddle.save(args,
-                                os.path.join(output_dir, "training_args.bin"))
-                    logger.info("Saving model checkpoint to {}".format(
-                        output_dir))
-
-                logger.info("[epoch {}/{}][iter: {}/{}] results: {}".format(
-                    epoch_id, args.num_train_epochs, step,
-                    len(train_dataloader), results))
-                if best_metrics is not None:
-                    logger.info("best metrics: {}".format(best_metrics))
-            reader_start = time.time()
-        if rank == 0:
-            # Save model checkpoint
-            output_dir = os.path.join(args.output_dir, "latest_model")
-            os.makedirs(output_dir, exist_ok=True)
-            if distributed:
-                model._layers.save_pretrained(output_dir)
-            else:
-                model.save_pretrained(output_dir)
-            tokenizer.save_pretrained(output_dir)
-            paddle.save(args, os.path.join(output_dir, "training_args.bin"))
-            logger.info("Saving model checkpoint to {}".format(output_dir))
-    return global_step, tr_loss / global_step
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    train(args)
--- a/ppstructure/vqa/vqa_utils.py
+++ b/ppstructure/vqa/vqa_utils.py
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import argparse
-import cv2
-import random
-import numpy as np
-import imghdr
-from copy import deepcopy
-
-import paddle
-
-from PIL import Image, ImageDraw, ImageFont
-
-
-def set_seed(seed):
-    random.seed(seed)
-    np.random.seed(seed)
-    paddle.seed(seed)
-
-
-def get_bio_label_maps(label_map_path):
-    with open(label_map_path, "r", encoding='utf-8') as fin:
-        lines = fin.readlines()
-    lines = [line.strip() for line in lines]
-    if "O" not in lines:
-        lines.insert(0, "O")
-    labels = []
-    for line in lines:
-        if line == "O":
-            labels.append("O")
-        else:
-            labels.append("B-" + line)
-            labels.append("I-" + line)
-    label2id_map = {label: idx for idx, label in enumerate(labels)}
-    id2label_map = {idx: label for idx, label in enumerate(labels)}
-    return label2id_map, id2label_map
-
-
-def get_image_file_list(img_file):
-    imgs_lists = []
-    if img_file is None or not os.path.exists(img_file):
-        raise Exception("not found any img file in {}".format(img_file))
-
-    img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'GIF'}
-    if os.path.isfile(img_file) and imghdr.what(img_file) in img_end:
-        imgs_lists.append(img_file)
-    elif os.path.isdir(img_file):
-        for single_file in os.listdir(img_file):
-            file_path = os.path.join(img_file, single_file)
-            if os.path.isfile(file_path) and imghdr.what(file_path) in img_end:
-                imgs_lists.append(file_path)
-    if len(imgs_lists) == 0:
-        raise Exception("not found any img file in {}".format(img_file))
-    imgs_lists = sorted(imgs_lists)
-    return imgs_lists
-
-
-def draw_ser_results(image,
-                     ocr_results,
-                     font_path="../../doc/fonts/simfang.ttf",
-                     font_size=18):
-    np.random.seed(2021)
-    color = (np.random.permutation(range(255)),
-             np.random.permutation(range(255)),
-             np.random.permutation(range(255)))
-    color_map = {
-        idx: (color[0][idx], color[1][idx], color[2][idx])
-        for idx in range(1, 255)
-    }
-    if isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
-    img_new = image.copy()
-    draw = ImageDraw.Draw(img_new)
-
-    font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
-    for ocr_info in ocr_results:
-        if ocr_info["pred_id"] not in color_map:
-            continue
-        color = color_map[ocr_info["pred_id"]]
-        text = "{}: {}".format(ocr_info["pred"], ocr_info["text"])
-
-        draw_box_txt(ocr_info["bbox"], text, draw, font, font_size, color)
-
-    img_new = Image.blend(image, img_new, 0.5)
-    return np.array(img_new)
-
-
-def draw_box_txt(bbox, text, draw, font, font_size, color):
-    # draw ocr results outline
-    bbox = ((bbox[0], bbox[1]), (bbox[2], bbox[3]))
-    draw.rectangle(bbox, fill=color)
-
-    # draw ocr results
-    start_y = max(0, bbox[0][1] - font_size)
-    tw = font.getsize(text)[0]
-    draw.rectangle(
-        [(bbox[0][0] + 1, start_y), (bbox[0][0] + tw + 1, start_y + font_size)],
-        fill=(0, 0, 255))
-    draw.text((bbox[0][0] + 1, start_y), text, fill=(255, 255, 255), font=font)
-
-
-def draw_re_results(image,
-                    result,
-                    font_path="../../doc/fonts/simfang.ttf",
-                    font_size=18):
-    np.random.seed(0)
-    if isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
-    img_new = image.copy()
-    draw = ImageDraw.Draw(img_new)
-
-    font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
-    color_head = (0, 0, 255)
-    color_tail = (255, 0, 0)
-    color_line = (0, 255, 0)
-
-    for ocr_info_head, ocr_info_tail in result:
-        draw_box_txt(ocr_info_head["bbox"], ocr_info_head["text"], draw, font,
-                     font_size, color_head)
-        draw_box_txt(ocr_info_tail["bbox"], ocr_info_tail["text"], draw, font,
-                     font_size, color_tail)
-
-        center_head = (
-            (ocr_info_head['bbox'][0] + ocr_info_head['bbox'][2]) // 2,
-            (ocr_info_head['bbox'][1] + ocr_info_head['bbox'][3]) // 2)
-        center_tail = (
-            (ocr_info_tail['bbox'][0] + ocr_info_tail['bbox'][2]) // 2,
-            (ocr_info_tail['bbox'][1] + ocr_info_tail['bbox'][3]) // 2)
-
-        draw.line([center_head, center_tail], fill=color_line, width=5)
-
-    img_new = Image.blend(image, img_new, 0.5)
-    return np.array(img_new)
-
-
-# pad sentences
-def pad_sentences(tokenizer,
-                  encoded_inputs,
-                  max_seq_len=512,
-                  pad_to_max_seq_len=True,
-                  return_attention_mask=True,
-                  return_token_type_ids=True,
-                  return_overflowing_tokens=False,
-                  return_special_tokens_mask=False):
-    # Padding with larger size, reshape is carried out
-    max_seq_len = (
-        len(encoded_inputs["input_ids"]) // max_seq_len + 1) * max_seq_len
-
-    needs_to_be_padded = pad_to_max_seq_len and \
-        max_seq_len and len(encoded_inputs["input_ids"]) < max_seq_len
-
-    if needs_to_be_padded:
-        difference = max_seq_len - len(encoded_inputs["input_ids"])
-        if tokenizer.padding_side == 'right':
-            if return_attention_mask:
-                encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[
-                    "input_ids"]) + [0] * difference
-            if return_token_type_ids:
-                encoded_inputs["token_type_ids"] = (
-                    encoded_inputs["token_type_ids"] +
-                    [tokenizer.pad_token_type_id] * difference)
-            if return_special_tokens_mask:
-                encoded_inputs["special_tokens_mask"] = encoded_inputs[
-                    "special_tokens_mask"] + [1] * difference
-            encoded_inputs["input_ids"] = encoded_inputs[
-                "input_ids"] + [tokenizer.pad_token_id] * difference
-            encoded_inputs["bbox"] = encoded_inputs["bbox"] + [[0, 0, 0, 0]
-                                                               ] * difference
-    else:
-        if return_attention_mask:
-            encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[
-                "input_ids"])
-
-    return encoded_inputs
-
-
-def split_page(encoded_inputs, max_seq_len=512):
-    """
-    truncate is often used in training process
-    """
-    for key in encoded_inputs:
-        if key == 'entities':
-            encoded_inputs[key] = [encoded_inputs[key]]
-            continue
-        encoded_inputs[key] = paddle.to_tensor(encoded_inputs[key])
-        if encoded_inputs[key].ndim <= 1:  # for input_ids, att_mask and so on
-            encoded_inputs[key] = encoded_inputs[key].reshape([-1, max_seq_len])
-        else:  # for bbox
-            encoded_inputs[key] = encoded_inputs[key].reshape(
-                [-1, max_seq_len, 4])
-    return encoded_inputs
-
-
-def preprocess(
-        tokenizer,
-        ori_img,
-        ocr_info,
-        img_size=(224, 224),
-        pad_token_label_id=-100,
-        max_seq_len=512,
-        add_special_ids=False,
-        return_attention_mask=True, ):
-    ocr_info = deepcopy(ocr_info)
-    height = ori_img.shape[0]
-    width = ori_img.shape[1]
-
-    img = cv2.resize(ori_img, img_size).transpose([2, 0, 1]).astype(np.float32)
-
-    segment_offset_id = []
-    words_list = []
-    bbox_list = []
-    input_ids_list = []
-    token_type_ids_list = []
-    entities = []
-
-    for info in ocr_info:
-        # x1, y1, x2, y2
-        bbox = info["bbox"]
-        bbox[0] = int(bbox[0] * 1000.0 / width)
-        bbox[2] = int(bbox[2] * 1000.0 / width)
-        bbox[1] = int(bbox[1] * 1000.0 / height)
-        bbox[3] = int(bbox[3] * 1000.0 / height)
-
-        text = info["text"]
-        encode_res = tokenizer.encode(
-            text, pad_to_max_seq_len=False, return_attention_mask=True)
-
-        if not add_special_ids:
-            # TODO: use tok.all_special_ids to remove
-            encode_res["input_ids"] = encode_res["input_ids"][1:-1]
-            encode_res["token_type_ids"] = encode_res["token_type_ids"][1:-1]
-            encode_res["attention_mask"] = encode_res["attention_mask"][1:-1]
-
-        # for re
-        entities.append({
-            "start": len(input_ids_list),
-            "end": len(input_ids_list) + len(encode_res["input_ids"]),
-            "label": "O",
-        })
-
-        input_ids_list.extend(encode_res["input_ids"])
-        token_type_ids_list.extend(encode_res["token_type_ids"])
-        bbox_list.extend([bbox] * len(encode_res["input_ids"]))
-        words_list.append(text)
-        segment_offset_id.append(len(input_ids_list))
-
-    encoded_inputs = {
-        "input_ids": input_ids_list,
-        "token_type_ids": token_type_ids_list,
-        "bbox": bbox_list,
-        "attention_mask": [1] * len(input_ids_list),
-        "entities": entities
-    }
-
-    encoded_inputs = pad_sentences(
-        tokenizer,
-        encoded_inputs,
-        max_seq_len=max_seq_len,
-        return_attention_mask=return_attention_mask)
-
-    encoded_inputs = split_page(encoded_inputs)
-
-    fake_bs = encoded_inputs["input_ids"].shape[0]
-
-    encoded_inputs["image"] = paddle.to_tensor(img).unsqueeze(0).expand(
-        [fake_bs] + list(img.shape))
-
-    encoded_inputs["segment_offset_id"] = segment_offset_id
-
-    return encoded_inputs
-
-
-def postprocess(attention_mask, preds, id2label_map):
-    if isinstance(preds, paddle.Tensor):
-        preds = preds.numpy()
-    preds = np.argmax(preds, axis=2)
-
-    preds_list = [[] for _ in range(preds.shape[0])]
-
-    # keep batch info
-    for i in range(preds.shape[0]):
-        for j in range(preds.shape[1]):
-            if attention_mask[i][j] == 1:
-                preds_list[i].append(id2label_map[preds[i][j]])
-
-    return preds_list
-
-
-def merge_preds_list_with_ocr_info(ocr_info, segment_offset_id, preds_list,
-                                   label2id_map_for_draw):
-    # must ensure the preds_list is generated from the same image
-    preds = [p for pred in preds_list for p in pred]
-
-    id2label_map = dict()
-    for key in label2id_map_for_draw:
-        val = label2id_map_for_draw[key]
-        if key == "O":
-            id2label_map[val] = key
-        if key.startswith("B-") or key.startswith("I-"):
-            id2label_map[val] = key[2:]
-        else:
-            id2label_map[val] = key
-
-    for idx in range(len(segment_offset_id)):
-        if idx == 0:
-            start_id = 0
-        else:
-            start_id = segment_offset_id[idx - 1]
-
-        end_id = segment_offset_id[idx]
-
-        curr_pred = preds[start_id:end_id]
-        curr_pred = [label2id_map_for_draw[p] for p in curr_pred]
-
-        if len(curr_pred) <= 0:
-            pred_id = 0
-        else:
-            counts = np.bincount(curr_pred)
-            pred_id = np.argmax(counts)
-        ocr_info[idx]["pred_id"] = int(pred_id)
-        ocr_info[idx]["pred"] = id2label_map[int(pred_id)]
-    return ocr_info
-
-
-def print_arguments(args, logger=None):
-    print_func = logger.info if logger is not None else print
-    """print arguments"""
-    print_func('-----------  Configuration Arguments -----------')
-    for arg, value in sorted(vars(args).items()):
-        print_func('%s: %s' % (arg, value))
-    print_func('------------------------------------------------')
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    # Required parameters
-    # yapf: disable
-    parser.add_argument("--model_name_or_path",
-                        default=None, type=str, required=True,)
-    parser.add_argument("--ser_model_type",
-                        default='LayoutXLM', type=str)
-    parser.add_argument("--re_model_name_or_path",
-                        default=None, type=str, required=False,)
-    parser.add_argument("--train_data_dir", default=None,
-                        type=str, required=False,)
-    parser.add_argument("--train_label_path", default=None,
-                        type=str, required=False,)
-    parser.add_argument("--eval_data_dir", default=None,
-                        type=str, required=False,)
-    parser.add_argument("--eval_label_path", default=None,
-                        type=str, required=False,)
-    parser.add_argument("--output_dir", default=None, type=str, required=True,)
-    parser.add_argument("--max_seq_length", default=512, type=int,)
-    parser.add_argument("--evaluate_during_training", action="store_true",)
-    parser.add_argument("--num_workers", default=8, type=int,)
-    parser.add_argument("--per_gpu_train_batch_size", default=8,
-                        type=int, help="Batch size per GPU/CPU for training.",)
-    parser.add_argument("--per_gpu_eval_batch_size", default=8,
-                        type=int, help="Batch size per GPU/CPU for eval.",)
-    parser.add_argument("--learning_rate", default=5e-5,
-                        type=float, help="The initial learning rate for Adam.",)
-    parser.add_argument("--weight_decay", default=0.0,
-                        type=float, help="Weight decay if we apply some.",)
-    parser.add_argument("--adam_epsilon", default=1e-8,
-                        type=float, help="Epsilon for Adam optimizer.",)
-    parser.add_argument("--max_grad_norm", default=1.0,
-                        type=float, help="Max gradient norm.",)
-    parser.add_argument("--num_train_epochs", default=3, type=int,
-                        help="Total number of training epochs to perform.",)
-    parser.add_argument("--warmup_steps", default=0, type=int,
-                        help="Linear warmup over warmup_steps.",)
-    parser.add_argument("--eval_steps", type=int, default=10,
-                        help="eval every X updates steps.",)
-    parser.add_argument("--seed", type=int, default=2048,
-                        help="random seed for initialization",)
-
-    parser.add_argument("--rec_model_dir", default=None, type=str, )
-    parser.add_argument("--det_model_dir", default=None, type=str, )
-    parser.add_argument(
-        "--label_map_path", default="./labels/labels_ser.txt", type=str, required=False, )
-    parser.add_argument("--infer_imgs", default=None, type=str, required=False)
-    parser.add_argument("--resume", action='store_true')
-    parser.add_argument("--ocr_json_path", default=None,
-                        type=str, required=False, help="ocr prediction results")
-    # yapf: enable
-    args = parser.parse_args()
-    return args
--- a/ppstructure/vqa/xfun.py
+++ b/ppstructure/vqa/xfun.py
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import os
-import cv2
-import numpy as np
-import paddle
-import copy
-from paddle.io import Dataset
-
-__all__ = ["XFUNDataset"]
-
-
-class XFUNDataset(Dataset):
-    """
-    Example:
-        print("=====begin to build dataset=====")
-        from paddlenlp.transformers import LayoutXLMTokenizer
-        tokenizer = LayoutXLMTokenizer.from_pretrained("/paddle/models/transformers/layoutxlm-base-paddle/")
-        tok_res = tokenizer.tokenize("Maribyrnong")
-        # res = tokenizer.convert_ids_to_tokens(val_data["input_ids"][0])
-        dataset = XfunDatasetForSer(
-            tokenizer,
-            data_dir="./zh.val/",
-            label_path="zh.val/xfun_normalize_val.json",
-            img_size=(224,224))
-        print(len(dataset))
-
-        data = dataset[0]
-        print(data.keys())
-        print("input_ids: ", data["input_ids"])
-        print("labels: ", data["labels"])
-        print("token_type_ids: ", data["token_type_ids"])
-        print("words_list: ", data["words_list"])
-        print("image shape: ", data["image"].shape)
-    """
-
-    def __init__(self,
-                 tokenizer,
-                 data_dir,
-                 label_path,
-                 contains_re=False,
-                 label2id_map=None,
-                 img_size=(224, 224),
-                 pad_token_label_id=None,
-                 add_special_ids=False,
-                 return_attention_mask=True,
-                 load_mode='all',
-                 max_seq_len=512):
-        super().__init__()
-        self.tokenizer = tokenizer
-        self.data_dir = data_dir
-        self.label_path = label_path
-        self.contains_re = contains_re
-        self.label2id_map = label2id_map
-        self.img_size = img_size
-        self.pad_token_label_id = pad_token_label_id
-        self.add_special_ids = add_special_ids
-        self.return_attention_mask = return_attention_mask
-        self.load_mode = load_mode
-        self.max_seq_len = max_seq_len
-
-        if self.pad_token_label_id is None:
-            self.pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
-
-        self.all_lines = self.read_all_lines()
-
-        self.entities_labels = {'HEADER': 0, 'QUESTION': 1, 'ANSWER': 2}
-        self.return_keys = {
-            'bbox': {
-                'type': 'np',
-                'dtype': 'int64'
-            },
-            'input_ids': {
-                'type': 'np',
-                'dtype': 'int64'
-            },
-            'labels': {
-                'type': 'np',
-                'dtype': 'int64'
-            },
-            'attention_mask': {
-                'type': 'np',
-                'dtype': 'int64'
-            },
-            'image': {
-                'type': 'np',
-                'dtype': 'float32'
-            },
-            'token_type_ids': {
-                'type': 'np',
-                'dtype': 'int64'
-            },
-            'entities': {
-                'type': 'dict'
-            },
-            'relations': {
-                'type': 'dict'
-            }
-        }
-
-        if load_mode == "all":
-            self.encoded_inputs_all = self._parse_label_file_all()
-
-    def pad_sentences(self,
-                      encoded_inputs,
-                      max_seq_len=512,
-                      pad_to_max_seq_len=True,
-                      return_attention_mask=True,
-                      return_token_type_ids=True,
-                      truncation_strategy="longest_first",
-                      return_overflowing_tokens=False,
-                      return_special_tokens_mask=False):
-        # Padding
-        needs_to_be_padded = pad_to_max_seq_len and \
-            max_seq_len and len(encoded_inputs["input_ids"]) < max_seq_len
-
-        if needs_to_be_padded:
-            difference = max_seq_len - len(encoded_inputs["input_ids"])
-            if self.tokenizer.padding_side == 'right':
-                if return_attention_mask:
-                    encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[
-                        "input_ids"]) + [0] * difference
-                if return_token_type_ids:
-                    encoded_inputs["token_type_ids"] = (
-                        encoded_inputs["token_type_ids"] +
-                        [self.tokenizer.pad_token_type_id] * difference)
-                if return_special_tokens_mask:
-                    encoded_inputs["special_tokens_mask"] = encoded_inputs[
-                        "special_tokens_mask"] + [1] * difference
-                encoded_inputs["input_ids"] = encoded_inputs[
-                    "input_ids"] + [self.tokenizer.pad_token_id] * difference
-                encoded_inputs["labels"] = encoded_inputs[
-                    "labels"] + [self.pad_token_label_id] * difference
-                encoded_inputs["bbox"] = encoded_inputs[
-                    "bbox"] + [[0, 0, 0, 0]] * difference
-            elif self.tokenizer.padding_side == 'left':
-                if return_attention_mask:
-                    encoded_inputs["attention_mask"] = [0] * difference + [
-                        1
-                    ] * len(encoded_inputs["input_ids"])
-                if return_token_type_ids:
-                    encoded_inputs["token_type_ids"] = (
-                        [self.tokenizer.pad_token_type_id] * difference +
-                        encoded_inputs["token_type_ids"])
-                if return_special_tokens_mask:
-                    encoded_inputs["special_tokens_mask"] = [
-                        1
-                    ] * difference + encoded_inputs["special_tokens_mask"]
-                encoded_inputs["input_ids"] = [
-                    self.tokenizer.pad_token_id
-                ] * difference + encoded_inputs["input_ids"]
-                encoded_inputs["labels"] = [
-                    self.pad_token_label_id
-                ] * difference + encoded_inputs["labels"]
-                encoded_inputs["bbox"] = [
-                    [0, 0, 0, 0]
-                ] * difference + encoded_inputs["bbox"]
-        else:
-            if return_attention_mask:
-                encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[
-                    "input_ids"])
-
-        return encoded_inputs
-
-    def truncate_inputs(self, encoded_inputs, max_seq_len=512):
-        for key in encoded_inputs:
-            if key == "sample_id":
-                continue
-            length = min(len(encoded_inputs[key]), max_seq_len)
-            encoded_inputs[key] = encoded_inputs[key][:length]
-        return encoded_inputs
-
-    def read_all_lines(self, ):
-        with open(self.label_path, "r", encoding='utf-8') as fin:
-            lines = fin.readlines()
-        return lines
-
-    def _parse_label_file_all(self):
-        """
-        parse all samples
-        """
-        encoded_inputs_all = []
-        for line in self.all_lines:
-            encoded_inputs_all.extend(self._parse_label_file(line))
-        return encoded_inputs_all
-
-    def _parse_label_file(self, line):
-        """
-        parse single sample
-        """
-
-        image_name, info_str = line.split("\t")
-        image_path = os.path.join(self.data_dir, image_name)
-
-        def add_imgge_path(x):
-            x['image_path'] = image_path
-            return x
-
-        encoded_inputs = self._read_encoded_inputs_sample(info_str)
-        if self.contains_re:
-            encoded_inputs = self._chunk_re(encoded_inputs)
-        else:
-            encoded_inputs = self._chunk_ser(encoded_inputs)
-        encoded_inputs = list(map(add_imgge_path, encoded_inputs))
-        return encoded_inputs
-
-    def _read_encoded_inputs_sample(self, info_str):
-        """
-        parse label info
-        """
-        # read text info
-        info_dict = json.loads(info_str)
-        height = info_dict["height"]
-        width = info_dict["width"]
-
-        words_list = []
-        bbox_list = []
-        input_ids_list = []
-        token_type_ids_list = []
-        gt_label_list = []
-
-        if self.contains_re:
-            # for re
-            entities = []
-            relations = []
-            id2label = {}
-            entity_id_to_index_map = {}
-            empty_entity = set()
-        for info in info_dict["ocr_info"]:
-            if self.contains_re:
-                # for re
-                if len(info["text"]) == 0:
-                    empty_entity.add(info["id"])
-                    continue
-                id2label[info["id"]] = info["label"]
-                relations.extend([tuple(sorted(l)) for l in info["linking"]])
-
-            # x1, y1, x2, y2
-            bbox = info["bbox"]
-            label = info["label"]
-            bbox[0] = int(bbox[0] * 1000.0 / width)
-            bbox[2] = int(bbox[2] * 1000.0 / width)
-            bbox[1] = int(bbox[1] * 1000.0 / height)
-            bbox[3] = int(bbox[3] * 1000.0 / height)
-
-            text = info["text"]
-            encode_res = self.tokenizer.encode(
-                text, pad_to_max_seq_len=False, return_attention_mask=True)
-
-            gt_label = []
-            if not self.add_special_ids:
-                # TODO: use tok.all_special_ids to remove
-                encode_res["input_ids"] = encode_res["input_ids"][1:-1]
-                encode_res["token_type_ids"] = encode_res["token_type_ids"][1:
-                                                                            -1]
-                encode_res["attention_mask"] = encode_res["attention_mask"][1:
-                                                                            -1]
-            if label.lower() == "other":
-                gt_label.extend([0] * len(encode_res["input_ids"]))
-            else:
-                gt_label.append(self.label2id_map[("b-" + label).upper()])
-                gt_label.extend([self.label2id_map[("i-" + label).upper()]] *
-                                (len(encode_res["input_ids"]) - 1))
-            if self.contains_re:
-                if gt_label[0] != self.label2id_map["O"]:
-                    entity_id_to_index_map[info["id"]] = len(entities)
-                    entities.append({
-                        "start": len(input_ids_list),
-                        "end":
-                        len(input_ids_list) + len(encode_res["input_ids"]),
-                        "label": label.upper(),
-                    })
-            input_ids_list.extend(encode_res["input_ids"])
-            token_type_ids_list.extend(encode_res["token_type_ids"])
-            bbox_list.extend([bbox] * len(encode_res["input_ids"]))
-            gt_label_list.extend(gt_label)
-            words_list.append(text)
-
-        encoded_inputs = {
-            "input_ids": input_ids_list,
-            "labels": gt_label_list,
-            "token_type_ids": token_type_ids_list,
-            "bbox": bbox_list,
-            "attention_mask": [1] * len(input_ids_list),
-            # "words_list": words_list,
-        }
-        encoded_inputs = self.pad_sentences(
-            encoded_inputs,
-            max_seq_len=self.max_seq_len,
-            return_attention_mask=self.return_attention_mask)
-        encoded_inputs = self.truncate_inputs(encoded_inputs)
-
-        if self.contains_re:
-            relations = self._relations(entities, relations, id2label,
-                                        empty_entity, entity_id_to_index_map)
-            encoded_inputs['relations'] = relations
-            encoded_inputs['entities'] = entities
-        return encoded_inputs
-
-    def _chunk_ser(self, encoded_inputs):
-        encoded_inputs_all = []
-        seq_len = len(encoded_inputs['input_ids'])
-        chunk_size = 512
-        for chunk_id, index in enumerate(range(0, seq_len, chunk_size)):
-            chunk_beg = index
-            chunk_end = min(index + chunk_size, seq_len)
-            encoded_inputs_example = {}
-            for key in encoded_inputs:
-                encoded_inputs_example[key] = encoded_inputs[key][chunk_beg:
-                                                                  chunk_end]
-
-            encoded_inputs_all.append(encoded_inputs_example)
-        return encoded_inputs_all
-
-    def _chunk_re(self, encoded_inputs):
-        # prepare data
-        entities = encoded_inputs.pop('entities')
-        relations = encoded_inputs.pop('relations')
-        encoded_inputs_all = []
-        chunk_size = 512
-        for chunk_id, index in enumerate(
-                range(0, len(encoded_inputs["input_ids"]), chunk_size)):
-            item = {}
-            for k in encoded_inputs:
-                item[k] = encoded_inputs[k][index:index + chunk_size]
-
-            # select entity in current chunk
-            entities_in_this_span = []
-            global_to_local_map = {}  #
-            for entity_id, entity in enumerate(entities):
-                if (index <= entity["start"] < index + chunk_size and
-                        index <= entity["end"] < index + chunk_size):
-                    entity["start"] = entity["start"] - index
-                    entity["end"] = entity["end"] - index
-                    global_to_local_map[entity_id] = len(entities_in_this_span)
-                    entities_in_this_span.append(entity)
-
-            # select relations in current chunk
-            relations_in_this_span = []
-            for relation in relations:
-                if (index <= relation["start_index"] < index + chunk_size and
-                        index <= relation["end_index"] < index + chunk_size):
-                    relations_in_this_span.append({
-                        "head": global_to_local_map[relation["head"]],
-                        "tail": global_to_local_map[relation["tail"]],
-                        "start_index": relation["start_index"] - index,
-                        "end_index": relation["end_index"] - index,
-                    })
-            item.update({
-                "entities": reformat(entities_in_this_span),
-                "relations": reformat(relations_in_this_span),
-            })
-            item['entities']['label'] = [
-                self.entities_labels[x] for x in item['entities']['label']
-            ]
-            encoded_inputs_all.append(item)
-        return encoded_inputs_all
-
-    def _relations(self, entities, relations, id2label, empty_entity,
-                   entity_id_to_index_map):
-        """
-        build relations
-        """
-        relations = list(set(relations))
-        relations = [
-            rel for rel in relations
-            if rel[0] not in empty_entity and rel[1] not in empty_entity
-        ]
-        kv_relations = []
-        for rel in relations:
-            pair = [id2label[rel[0]], id2label[rel[1]]]
-            if pair == ["question", "answer"]:
-                kv_relations.append({
-                    "head": entity_id_to_index_map[rel[0]],
-                    "tail": entity_id_to_index_map[rel[1]]
-                })
-            elif pair == ["answer", "question"]:
-                kv_relations.append({
-                    "head": entity_id_to_index_map[rel[1]],
-                    "tail": entity_id_to_index_map[rel[0]]
-                })
-            else:
-                continue
-        relations = sorted(
-            [{
-                "head": rel["head"],
-                "tail": rel["tail"],
-                "start_index": get_relation_span(rel, entities)[0],
-                "end_index": get_relation_span(rel, entities)[1],
-            } for rel in kv_relations],
-            key=lambda x: x["head"], )
-        return relations
-
-    def load_img(self, image_path):
-        # read img
-        img = cv2.imread(image_path)
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        resize_h, resize_w = self.img_size
-        im_shape = img.shape[0:2]
-        im_scale_y = resize_h / im_shape[0]
-        im_scale_x = resize_w / im_shape[1]
-        img_new = cv2.resize(
-            img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2)
-        mean = np.array([0.485, 0.456, 0.406])[np.newaxis, np.newaxis, :]
-        std = np.array([0.229, 0.224, 0.225])[np.newaxis, np.newaxis, :]
-        img_new = img_new / 255.0
-        img_new -= mean
-        img_new /= std
-        img = img_new.transpose((2, 0, 1))
-        return img
-
-    def __getitem__(self, idx):
-        if self.load_mode == "all":
-            data = copy.deepcopy(self.encoded_inputs_all[idx])
-        else:
-            data = self._parse_label_file(self.all_lines[idx])[0]
-
-        image_path = data.pop('image_path')
-        data["image"] = self.load_img(image_path)
-
-        return_data = {}
-        for k, v in data.items():
-            if k in self.return_keys:
-                if self.return_keys[k]['type'] == 'np':
-                    v = np.array(v, dtype=self.return_keys[k]['dtype'])
-                return_data[k] = v
-        return return_data
-
-    def __len__(self, ):
-        if self.load_mode == "all":
-            return len(self.encoded_inputs_all)
-        else:
-            return len(self.all_lines)
-
-
-def get_relation_span(rel, entities):
-    bound = []
-    for entity_index in [rel["head"], rel["tail"]]:
-        bound.append(entities[entity_index]["start"])
-        bound.append(entities[entity_index]["end"])
-    return min(bound), max(bound)
-
-
-def reformat(data):
-    new_data = {}
-    for item in data:
-        for k, v in item.items():
-            if k not in new_data:
-                new_data[k] = []
-            new_data[k].append(v)
-    return new_data
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,4 +13,3 @@ lxml
 premailer
 openpyxl
 fasttext==0.9.1
-
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -239,8 +239,7 @@ fi

 if [ ${MODE} = "klquant_whole_infer" ]; then
    wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar --no-check-certificate
-    cd ./train_data/ && tar xf icdar2015_lite.tar
-    ln -s ./icdar2015_lite ./icdar2015 && cd ../
+    cd ./train_data/ && tar xf icdar2015_lite.tar && rm -rf ./icdar2015 && ln -s ./icdar2015_lite ./icdar2015 && cd ../
    if [ ${model_name} = "ch_ppocr_mobile_v2.0_det_KL" ]; then
        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar --no-check-certificate
        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar  --no-check-certificate
@@ -249,6 +248,8 @@ if [ ${MODE} = "klquant_whole_infer" ]; then
    if [ ${model_name} = "PPOCRv2_ocr_rec_kl" ]; then
        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tar  --no-check-certificate
        wget -nc -P ./inference/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar  --no-check-certificate
+        wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar --no-check-certificate
+        cd ./train_data/ && tar xf ic15_data.tar && cd ../
        cd ./inference && tar xf rec_inference.tar && tar xf ch_PP-OCRv2_rec_infer.tar && cd ../
    fi
    if [ ${model_name} = "PPOCRv2_ocr_det_kl" ]; then

--- a/test_tipc/readme.md
+++ b/test_tipc/readme.md
@@ -68,14 +68,14 @@ test_tipc/
        ├── model_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt     # 测试Linux上c++预测的配置文件
        ├── model_linux_gpu_normal_normal_infer_python_jetson.txt         # 测试Jetson上python预测的配置文件
        ├── train_linux_gpu_fleet_amp_infer_python_linux_gpu_cpu.txt      # 测试Linux上多机多卡、混合精度训练和python预测的配置文件
-        ├── ...  
+        ├── ...
    ├── ch_ppocr_server_v2.0_det               # ch_ppocr_server_v2.0_det模型的测试配置文件目录
-        ├── ...  
+        ├── ...
    ├── ch_ppocr_mobile_v2.0_rec               # ch_ppocr_mobile_v2.0_rec模型的测试配置文件目录
-        ├── ...  
+        ├── ...
    ├── ch_ppocr_server_v2.0_det               # ch_ppocr_server_v2.0_det模型的测试配置文件目录
-        ├── ...  
-    ├── ...  
+        ├── ...
+    ├── ...
 ├── results/   # 预先保存的预测结果，用于和实际预测结果进行精读比对
    ├── python_ppocr_det_mobile_results_fp32.txt           # 预存的mobile版ppocr检测模型python预测fp32精度的结果
    ├── python_ppocr_det_mobile_results_fp16.txt           # 预存的mobile版ppocr检测模型python预测fp16精度的结果
@@ -119,7 +119,7 @@ bash test_tipc/test_train_inference_python.sh configs/[model_name]/[params_file_
 bash test_tipc/prepare.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_lite_infer'
 # 运行测试
 bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobile_v2.0_det/train_infer_python.txt 'lite_train_lite_infer'
-```  
+```
 关于本示例命令的更多信息可查看[基础训练预测使用文档](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/test_tipc/docs/test_train_inference_python.md#22-%E5%8A%9F%E8%83%BD%E6%B5%8B%E8%AF%95)。

 ### 配置文件命名规范
@@ -136,9 +136,9 @@ bash test_tipc/test_train_inference_python.sh ./test_tipc/configs/ch_ppocr_mobil

 <a name="more"></a>
 ## 4. 开始测试
-各功能测试中涉及混合精度、裁剪、量化等训练相关，及mkldnn、Tensorrt等多种预测相关参数配置，请点击下方相应链接了解更多细节和使用教程：  
- [test_train_inference_python 使用](docs/test_train_inference_python.md) ：测试基于Python的模型训练、评估、推理等基本功能，包括裁剪、量化、蒸馏。 
+各功能测试中涉及混合精度、裁剪、量化等训练相关，及mkldnn、Tensorrt等多种预测相关参数配置，请点击下方相应链接了解更多细节和使用教程：
+- [test_train_inference_python 使用](docs/test_train_inference_python.md) ：测试基于Python的模型训练、评估、推理等基本功能，包括裁剪、量化、蒸馏。
 - [test_inference_cpp 使用](docs/test_inference_cpp.md)：测试基于C++的模型推理。
 - [test_serving 使用](docs/test_serving.md)：测试基于Paddle Serving的服务化部署功能。
- [test_lite_arm_cpu_cpp 使用](docs/test_lite_arm_cpu_cpp.md)：测试基于Paddle-Lite的ARM CPU端c++预测部署功能。
+- [test_lite_arm_cpp 使用](docs/test_lite_arm_cpp.md)：测试基于Paddle-Lite的ARM CPU端c++预测部署功能。
 - [test_paddle2onnx 使用](docs/test_paddle2onnx.md)：测试Paddle2ONNX的模型转化功能，并验证正确性。
--- a/test_tipc/supplementary/__init__.py
+++ b/test_tipc/supplementary/__init__.py
+
--- a/test_tipc/supplementary/config.py
+++ b/test_tipc/supplementary/config.py
+import numpy as np
+import os
+import sys
+import platform
+import yaml
+import time
+import shutil
+import paddle
+import paddle.distributed as dist
+from tqdm import tqdm
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+from utils import get_logger, print_dict
+
+
+class ArgsParser(ArgumentParser):
+    def __init__(self):
+        super(ArgsParser, self).__init__(
+            formatter_class=RawDescriptionHelpFormatter)
+        self.add_argument("-c", "--config", help="configuration file to use")
+        self.add_argument(
+            "-o", "--opt", nargs='+', help="set configuration options")
+        self.add_argument(
+            '-p',
+            '--profiler_options',
+            type=str,
+            default=None,
+            help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".'
+        )
+
+    def parse_args(self, argv=None):
+        args = super(ArgsParser, self).parse_args(argv)
+        assert args.config is not None, \
+            "Please specify --config=configure_file_path."
+        args.opt = self._parse_opt(args.opt)
+        return args
+
+    def _parse_opt(self, opts):
+        config = {}
+        if not opts:
+            return config
+        for s in opts:
+            s = s.strip()
+            k, v = s.split('=')
+            config[k] = yaml.load(v, Loader=yaml.Loader)
+        return config
+
+
+class AttrDict(dict):
+    """Single level attribute dict, NOT recursive"""
+
+    def __init__(self, **kwargs):
+        super(AttrDict, self).__init__()
+        super(AttrDict, self).update(kwargs)
+
+    def __getattr__(self, key):
+        if key in self:
+            return self[key]
+        raise AttributeError("object has no attribute '{}'".format(key))
+
+
+global_config = AttrDict()
+
+default_config = {'Global': {'debug': False, }}
+
+
+def load_config(file_path):
+    """
+    Load config from yml/yaml file.
+    Args:
+        file_path (str): Path of the config file to be loaded.
+    Returns: global config
+    """
+    merge_config(default_config)
+    _, ext = os.path.splitext(file_path)
+    assert ext in ['.yml', '.yaml'], "only support yaml files for now"
+    merge_config(yaml.load(open(file_path, 'rb'), Loader=yaml.Loader))
+    return global_config
+
+
+def merge_config(config):
+    """
+    Merge config into global config.
+    Args:
+        config (dict): Config to be merged.
+    Returns: global config
+    """
+    for key, value in config.items():
+        if "." not in key:
+            if isinstance(value, dict) and key in global_config:
+                global_config[key].update(value)
+            else:
+                global_config[key] = value
+        else:
+            sub_keys = key.split('.')
+            assert (
+                sub_keys[0] in global_config
+            ), "the sub_keys can only be one of global_config: {}, but get: {}, please check your running command".format(
+                global_config.keys(), sub_keys[0])
+            cur = global_config[sub_keys[0]]
+            for idx, sub_key in enumerate(sub_keys[1:]):
+                if idx == len(sub_keys) - 2:
+                    cur[sub_key] = value
+                else:
+                    cur = cur[sub_key]
+
+
+def preprocess(is_train=False):
+    FLAGS = ArgsParser().parse_args()
+    profiler_options = FLAGS.profiler_options
+    config = load_config(FLAGS.config)
+    merge_config(FLAGS.opt)
+    profile_dic = {"profiler_options": FLAGS.profiler_options}
+    merge_config(profile_dic)
+
+    if is_train:
+        # save_config
+        save_model_dir = config['save_model_dir']
+        os.makedirs(save_model_dir, exist_ok=True)
+        with open(os.path.join(save_model_dir, 'config.yml'), 'w') as f:
+            yaml.dump(
+                dict(config), f, default_flow_style=False, sort_keys=False)
+        log_file = '{}/train.log'.format(save_model_dir)
+    else:
+        log_file = None
+    logger = get_logger(name='root', log_file=log_file)
+
+    # check if set use_gpu=True in paddlepaddle cpu version
+    use_gpu = config['use_gpu']
+
+    print_dict(config, logger)
+
+    return config, logger
+
+
+if __name__ == "__main__":
+    config, logger = preprocess(is_train=False)
+    # print(config)
--- a/test_tipc/supplementary/custom_op/custom_relu_op.cc
+++ b/test_tipc/supplementary/custom_op/custom_relu_op.cc
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+// reference from : https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/python/custom-operator/custom_relu_op.cc
+#include <iostream>
+#include <vector>
+
+#include "paddle/extension.h"
+
+template <typename data_t>
+void relu_cpu_forward_kernel(const data_t* x_data,
+                             data_t* out_data,
+                             int64_t x_numel) {
+  for (int i = 0; i < x_numel; ++i) {
+    out_data[i] = std::max(static_cast<data_t>(0.), x_data[i]);
+  }
+}
+
+template <typename data_t>
+void relu_cpu_backward_kernel(const data_t* grad_out_data,
+                              const data_t* out_data,
+                              data_t* grad_x_data,
+                              int64_t out_numel) {
+  for (int i = 0; i < out_numel; ++i) {
+    grad_x_data[i] =
+        grad_out_data[i] * (out_data[i] > static_cast<data_t>(0) ? 1. : 0.);
+  }
+}
+
+std::vector<paddle::Tensor> relu_cpu_forward(const paddle::Tensor& x) {
+  auto out = paddle::Tensor(paddle::PlaceType::kCPU);
+
+  out.reshape(x.shape());
+  PD_DISPATCH_FLOATING_TYPES(
+      x.type(), "relu_cpu_forward", ([&] {
+        relu_cpu_forward_kernel<data_t>(
+            x.data<data_t>(), out.mutable_data<data_t>(x.place()), x.size());
+      }));
+
+  return {out};
+}
+
+std::vector<paddle::Tensor> relu_cpu_backward(const paddle::Tensor& x,
+                                              const paddle::Tensor& out,
+                                              const paddle::Tensor& grad_out) {
+  auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU);
+  grad_x.reshape(x.shape());
+
+  PD_DISPATCH_FLOATING_TYPES(out.type(), "relu_cpu_backward", ([&] {
+                               relu_cpu_backward_kernel<data_t>(
+                                   grad_out.data<data_t>(),
+                                   out.data<data_t>(),
+                                   grad_x.mutable_data<data_t>(x.place()),
+                                   out.size());
+                             }));
+
+  return {grad_x};
+}
+
+std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x);
+std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
+                                               const paddle::Tensor& out,
+                                               const paddle::Tensor& grad_out);
+
+std::vector<paddle::Tensor> ReluForward(const paddle::Tensor& x) {
+  // TODO(chenweihang): Check Input
+  if (x.place() == paddle::PlaceType::kCPU) {
+    return relu_cpu_forward(x);
+  } else if (x.place() == paddle::PlaceType::kGPU) {
+    return relu_cuda_forward(x);
+  } else {
+    throw std::runtime_error("Not implemented.");
+  }
+}
+
+std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x,
+                                         const paddle::Tensor& out,
+                                         const paddle::Tensor& grad_out) {
+  // TODO(chenweihang): Check Input
+  if (x.place() == paddle::PlaceType::kCPU) {
+    return relu_cpu_backward(x, out, grad_out);
+  } else if (x.place() == paddle::PlaceType::kGPU) {
+    return relu_cuda_backward(x, out, grad_out);
+  } else {
+    throw std::runtime_error("Not implemented.");
+  }
+}
+
+PD_BUILD_OP(custom_relu)
+    .Inputs({"X"})
+    .Outputs({"Out"})
+    .SetKernelFn(PD_KERNEL(ReluForward));
+
+PD_BUILD_GRAD_OP(custom_relu)
+    .Inputs({"X", "Out", paddle::Grad("Out")})
+    .Outputs({paddle::Grad("X")})
+    .SetKernelFn(PD_KERNEL(ReluBackward));
\ No newline at end of file
--- a/test_tipc/supplementary/custom_op/custom_relu_op.cu
+++ b/test_tipc/supplementary/custom_op/custom_relu_op.cu
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+// reference https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/python/custom-operator/custom_relu_op.cu
+
+#include "paddle/extension.h"
+
+template <typename data_t>
+__global__ void relu_cuda_forward_kernel(const data_t* x,
+                                         data_t* y,
+                                         const int num) {
+  int gid = blockIdx.x * blockDim.x + threadIdx.x;
+  for (int i = gid; i < num; i += blockDim.x * gridDim.x) {
+    y[i] = max(x[i], static_cast<data_t>(0.));
+  }
+}
+
+template <typename data_t>
+__global__ void relu_cuda_backward_kernel(const data_t* dy,
+                                          const data_t* y,
+                                          data_t* dx,
+                                          const int num) {
+  int gid = blockIdx.x * blockDim.x + threadIdx.x;
+  for (int i = gid; i < num; i += blockDim.x * gridDim.x) {
+    dx[i] = dy[i] * (y[i] > 0 ? 1. : 0.);
+  }
+}
+
+std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
+  auto out = paddle::Tensor(paddle::PlaceType::kGPU);
+
+  out.reshape(x.shape());
+  int numel = x.size();
+  int block = 512;
+  int grid = (numel + block - 1) / block;
+  PD_DISPATCH_FLOATING_TYPES(
+      x.type(), "relu_cuda_forward_kernel", ([&] {
+        relu_cuda_forward_kernel<data_t><<<grid, block, 0, x.stream()>>>(
+            x.data<data_t>(), out.mutable_data<data_t>(x.place()), numel);
+      }));
+
+  return {out};
+}
+
+std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
+                                               const paddle::Tensor& out,
+                                               const paddle::Tensor& grad_out) {
+  auto grad_x = paddle::Tensor(paddle::PlaceType::kGPU);
+  grad_x.reshape(x.shape());
+
+  int numel = out.size();
+  int block = 512;
+  int grid = (numel + block - 1) / block;
+  PD_DISPATCH_FLOATING_TYPES(
+      out.type(), "relu_cuda_backward_kernel", ([&] {
+        relu_cuda_backward_kernel<data_t><<<grid, block, 0, x.stream()>>>(
+            grad_out.data<data_t>(),
+            out.data<data_t>(),
+            grad_x.mutable_data<data_t>(x.place()),
+            numel);
+      }));
+
+  return {grad_x};
+}
--- a/test_tipc/supplementary/custom_op/test.py
+++ b/test_tipc/supplementary/custom_op/test.py
+import paddle
+import paddle.nn as nn
+from paddle.vision.transforms import Compose, Normalize
+from paddle.utils.cpp_extension import load
+from paddle.inference import Config
+from paddle.inference import create_predictor
+import numpy as np
+
+EPOCH_NUM = 4
+BATCH_SIZE = 64
+
+# jit compile custom op
+custom_ops = load(
+    name="custom_jit_ops", sources=["custom_relu_op.cc", "custom_relu_op.cu"])
+
+
+class LeNet(nn.Layer):
+    def __init__(self):
+        super(LeNet, self).__init__()
+        self.conv1 = nn.Conv2D(
+            in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2)
+        self.max_pool1 = nn.MaxPool2D(kernel_size=2, stride=2)
+        self.conv2 = nn.Conv2D(
+            in_channels=6, out_channels=16, kernel_size=5, stride=1)
+        self.max_pool2 = nn.MaxPool2D(kernel_size=2, stride=2)
+        self.linear1 = nn.Linear(in_features=16 * 5 * 5, out_features=120)
+        self.linear2 = nn.Linear(in_features=120, out_features=84)
+        self.linear3 = nn.Linear(in_features=84, out_features=10)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = custom_ops.custom_relu(x)
+        x = self.max_pool1(x)
+        x = custom_ops.custom_relu(x)
+        x = self.conv2(x)
+        x = self.max_pool2(x)
+        x = paddle.flatten(x, start_axis=1, stop_axis=-1)
+        x = self.linear1(x)
+        x = custom_ops.custom_relu(x)
+        x = self.linear2(x)
+        x = custom_ops.custom_relu(x)
+        x = self.linear3(x)
+        return x
+
+
+# set device
+paddle.set_device("gpu")
+
+# model
+net = LeNet()
+loss_fn = nn.CrossEntropyLoss()
+opt = paddle.optimizer.Adam(learning_rate=0.001, parameters=net.parameters())
+
+# data loader
+transform = Compose([Normalize(mean=[127.5], std=[127.5], data_format='CHW')])
+train_dataset = paddle.vision.datasets.MNIST(mode='train', transform=transform)
+train_loader = paddle.io.DataLoader(
+    train_dataset,
+    batch_size=BATCH_SIZE,
+    shuffle=True,
+    drop_last=True,
+    num_workers=2)
+
+# train
+for epoch_id in range(EPOCH_NUM):
+    for batch_id, (image, label) in enumerate(train_loader()):
+        out = net(image)
+        loss = loss_fn(out, label)
+        loss.backward()
+
+        if batch_id % 300 == 0:
+            print("Epoch {} batch {}: loss = {}".format(epoch_id, batch_id,
+                                                        np.mean(loss.numpy())))
+
+        opt.step()
+        opt.clear_grad()
--- a/test_tipc/supplementary/data.py
+++ b/test_tipc/supplementary/data.py
+import numpy as np
+import paddle
+import os
+import cv2
+import glob
+
+
+def transform(data, ops=None):
+    """ transform """
+    if ops is None:
+        ops = []
+    for op in ops:
+        data = op(data)
+        if data is None:
+            return None
+    return data
+
+
+def create_operators(op_param_list, global_config=None):
+    """
+    create operators based on the config
+    Args:
+        params(list): a dict list, used to create some operators
+    """
+    assert isinstance(op_param_list, list), ('operator config should be a list')
+    ops = []
+    for operator in op_param_list:
+        assert isinstance(operator,
+                          dict) and len(operator) == 1, "yaml format error"
+        op_name = list(operator)[0]
+        param = {} if operator[op_name] is None else operator[op_name]
+        if global_config is not None:
+            param.update(global_config)
+        op = eval(op_name)(**param)
+        ops.append(op)
+    return ops
+
+
+class DecodeImage(object):
+    """ decode image """
+
+    def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
+        self.img_mode = img_mode
+        self.channel_first = channel_first
+
+    def __call__(self, data):
+        img = data['image']
+        if six.PY2:
+            assert type(img) is str and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        else:
+            assert type(img) is bytes and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        img = np.frombuffer(img, dtype='uint8')
+        img = cv2.imdecode(img, 1)
+        if img is None:
+            return None
+        if self.img_mode == 'GRAY':
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+        elif self.img_mode == 'RGB':
+            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape)
+            img = img[:, :, ::-1]
+
+        if self.channel_first:
+            img = img.transpose((2, 0, 1))
+
+        data['image'] = img
+        data['src_image'] = img
+        return data
+
+
+class NormalizeImage(object):
+    """ normalize image such as substract mean, divide std
+    """
+
+    def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
+        if isinstance(scale, str):
+            scale = eval(scale)
+        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
+        mean = mean if mean is not None else [0.485, 0.456, 0.406]
+        std = std if std is not None else [0.229, 0.224, 0.225]
+
+        shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
+        self.mean = np.array(mean).reshape(shape).astype('float32')
+        self.std = np.array(std).reshape(shape).astype('float32')
+
+    def __call__(self, data):
+        img = data['image']
+        from PIL import Image
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+        assert isinstance(img,
+                          np.ndarray), "invalid input 'img' in NormalizeImage"
+        data['image'] = (
+            img.astype('float32') * self.scale - self.mean) / self.std
+        return data
+
+
+class ToCHWImage(object):
+    """ convert hwc image to chw image
+    """
+
+    def __init__(self, **kwargs):
+        pass
+
+    def __call__(self, data):
+        img = data['image']
+        from PIL import Image
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+        data['image'] = img.transpose((2, 0, 1))
+
+        src_img = data['src_image']
+        from PIL import Image
+        if isinstance(img, Image.Image):
+            src_img = np.array(src_img)
+        data['src_image'] = img.transpose((2, 0, 1))
+
+        return data
+
+
+class SimpleDataset(nn.Dataset):
+    def __init__(self, config, mode, logger, seed=None):
+        self.logger = logger
+        self.mode = mode.lower()
+
+        data_dir = config['Train']['data_dir']
+
+        imgs_list = self.get_image_list(data_dir)
+
+        self.ops = create_operators(cfg['transforms'], None)
+
+    def get_image_list(self, img_dir):
+        imgs = glob.glob(os.path.join(img_dir, "*.png"))
+        if len(imgs) == 0:
+            raise ValueError(f"not any images founded in {img_dir}")
+        return imgs
+
+    def __getitem__(self, idx):
+        return None
--- a/test_tipc/supplementary/data_loader.py
+++ b/test_tipc/supplementary/data_loader.py
+import numpy as np
+from paddle.vision.datasets import Cifar100
+from paddle.vision.transforms import Normalize
+from paddle.fluid.dataloader.collate import default_collate_fn
+import signal
+import os
+from paddle.io import Dataset, DataLoader, DistributedBatchSampler
+
+
+def term_mp(sig_num, frame):
+    """ kill all child processes
+    """
+    pid = os.getpid()
+    pgid = os.getpgid(os.getpid())
+    print("main proc {} exit, kill process group " "{}".format(pid, pgid))
+    os.killpg(pgid, signal.SIGKILL)
+    return
+
+
+def build_dataloader(mode,
+                     batch_size=4,
+                     seed=None,
+                     num_workers=0,
+                     device='gpu:0'):
+
+    normalize = Normalize(
+        mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], data_format='HWC')
+
+    if mode.lower() == "train":
+        dataset = Cifar100(mode=mode, transform=normalize)
+    elif mode.lower() in ["test", 'valid', 'eval']:
+        dataset = Cifar100(mode="test", transform=normalize)
+    else:
+        raise ValueError(f"{mode} should be one of ['train', 'test']")
+
+    # define batch sampler
+    batch_sampler = DistributedBatchSampler(
+        dataset=dataset, batch_size=batch_size, shuffle=False, drop_last=True)
+
+    data_loader = DataLoader(
+        dataset=dataset,
+        batch_sampler=batch_sampler,
+        places=device,
+        num_workers=num_workers,
+        return_list=True,
+        use_shared_memory=False)
+
+    # support exit using ctrl+c
+    signal.signal(signal.SIGINT, term_mp)
+    signal.signal(signal.SIGTERM, term_mp)
+
+    return data_loader
+
+
+# cifar100 = Cifar100(mode='train', transform=normalize)
+
+# data = cifar100[0]
+
+# image, label = data
+
+# reader = build_dataloader('train')
+
+# for idx, data in enumerate(reader):
+#     print(idx, data[0].shape, data[1].shape)
+#     if idx >= 10:
+#         break
--- a/test_tipc/supplementary/load_cifar.py
+++ b/test_tipc/supplementary/load_cifar.py
+import pickle as p
+import numpy as np
+from PIL import Image
+
+
+def load_CIFAR_batch(filename):
+    """ load single batch of cifar """
+    with open(filename, 'rb') as f:
+        datadict = p.load(f, encoding='bytes')
+        # 以字典的形式取出数据
+        X = datadict[b'data']
+        Y = datadict[b'fine_labels']
+        try:
+            X = X.reshape(10000, 3, 32, 32)
+        except:
+            X = X.reshape(50000, 3, 32, 32)
+        Y = np.array(Y)
+        print(Y.shape)
+        return X, Y
+
+
+if __name__ == "__main__":
+    mode = "train"
+    imgX, imgY = load_CIFAR_batch(f"./cifar-100-python/{mode}")
+    with open(f'./cifar-100-python/{mode}_imgs/img_label.txt', 'a+') as f:
+        for i in range(imgY.shape[0]):
+            f.write('img' + str(i) + ' ' + str(imgY[i]) + '\n')
+
+    for i in range(imgX.shape[0]):
+        imgs = imgX[i]
+        img0 = imgs[0]
+        img1 = imgs[1]
+        img2 = imgs[2]
+        i0 = Image.fromarray(img0)
+        i1 = Image.fromarray(img1)
+        i2 = Image.fromarray(img2)
+        img = Image.merge("RGB", (i0, i1, i2))
+        name = "img" + str(i) + ".png"
+        img.save(f"./cifar-100-python/{mode}_imgs/" + name, "png")
+    print("save successfully!")
--- a/test_tipc/supplementary/loss.py
+++ b/test_tipc/supplementary/loss.py
+import paddle
+import paddle.nn.functional as F
+
+
+class Loss(object):
+    """
+    Loss
+    """
+
+    def __init__(self, class_dim=1000, epsilon=None):
+        assert class_dim > 1, "class_dim=%d is not larger than 1" % (class_dim)
+        self._class_dim = class_dim
+        if epsilon is not None and epsilon >= 0.0 and epsilon <= 1.0:
+            self._epsilon = epsilon
+            self._label_smoothing = True
+        else:
+            self._epsilon = None
+            self._label_smoothing = False
+
+    def _labelsmoothing(self, target):
+        if target.shape[-1] != self._class_dim:
+            one_hot_target = F.one_hot(target, self._class_dim)
+        else:
+            one_hot_target = target
+        soft_target = F.label_smooth(one_hot_target, epsilon=self._epsilon)
+        soft_target = paddle.reshape(soft_target, shape=[-1, self._class_dim])
+        return soft_target
+
+    def _crossentropy(self, input, target, use_pure_fp16=False):
+        if self._label_smoothing:
+            target = self._labelsmoothing(target)
+            input = -F.log_softmax(input, axis=-1)
+            cost = paddle.sum(target * input, axis=-1)
+        else:
+            cost = F.cross_entropy(input=input, label=target)
+        if use_pure_fp16:
+            avg_cost = paddle.sum(cost)
+        else:
+            avg_cost = paddle.mean(cost)
+        return avg_cost
+
+    def __call__(self, input, target):
+        return self._crossentropy(input, target)
+
+
+def build_loss(config, epsilon=None):
+    class_dim = config['class_dim']
+    loss_func = Loss(class_dim=class_dim, epsilon=epsilon)
+    return loss_func
+
+
+class LossDistill(Loss):
+    def __init__(self, model_name_list, class_dim=1000, epsilon=None):
+        assert class_dim > 1, "class_dim=%d is not larger than 1" % (class_dim)
+        self._class_dim = class_dim
+        if epsilon is not None and epsilon >= 0.0 and epsilon <= 1.0:
+            self._epsilon = epsilon
+            self._label_smoothing = True
+        else:
+            self._epsilon = None
+            self._label_smoothing = False
+
+        self.model_name_list = model_name_list
+        assert len(self.model_name_list) > 1, "error"
+
+    def __call__(self, input, target):
+        losses = {}
+        for k in self.model_name_list:
+            inp = input[k]
+            losses[k] = self._crossentropy(inp, target)
+        return losses
+
+
+class KLJSLoss(object):
+    def __init__(self, mode='kl'):
+        assert mode in ['kl', 'js', 'KL', 'JS'
+                        ], "mode can only be one of ['kl', 'js', 'KL', 'JS']"
+        self.mode = mode
+
+    def __call__(self, p1, p2, reduction="mean"):
+        p1 = F.softmax(p1, axis=-1)
+        p2 = F.softmax(p2, axis=-1)
+
+        loss = paddle.multiply(p2, paddle.log((p2 + 1e-5) / (p1 + 1e-5) + 1e-5))
+
+        if self.mode.lower() == "js":
+            loss += paddle.multiply(
+                p1, paddle.log((p1 + 1e-5) / (p2 + 1e-5) + 1e-5))
+            loss *= 0.5
+        if reduction == "mean":
+            loss = paddle.mean(loss)
+        elif reduction == "none" or reduction is None:
+            return loss
+        else:
+            loss = paddle.sum(loss)
+        return loss
+
+
+class DMLLoss(object):
+    def __init__(self, model_name_pairs, mode='js'):
+
+        self.model_name_pairs = self._check_model_name_pairs(model_name_pairs)
+        self.kljs_loss = KLJSLoss(mode=mode)
+
+    def _check_model_name_pairs(self, model_name_pairs):
+        if not isinstance(model_name_pairs, list):
+            return []
+        elif isinstance(model_name_pairs[0], list) and isinstance(
+                model_name_pairs[0][0], str):
+            return model_name_pairs
+        else:
+            return [model_name_pairs]
+
+    def __call__(self, predicts, target=None):
+        loss_dict = dict()
+        for pairs in self.model_name_pairs:
+            p1 = predicts[pairs[0]]
+            p2 = predicts[pairs[1]]
+
+            loss_dict[pairs[0] + "_" + pairs[1]] = self.kljs_loss(p1, p2)
+
+        return loss_dict
+
+
+# def build_distill_loss(config, epsilon=None):
+#     class_dim = config['class_dim']
+#     loss = LossDistill(model_name_list=['student', 'student1'], )
+#     return loss_func
--- a/test_tipc/supplementary/metric.py
+++ b/test_tipc/supplementary/metric.py
+import paddle
+import paddle.nn.functional as F
+from collections import OrderedDict
+
+
+def create_metric(out,
+                  label,
+                  architecture=None,
+                  topk=5,
+                  classes_num=1000,
+                  use_distillation=False,
+                  mode="train"):
+    """
+    Create measures of model accuracy, such as top1 and top5
+
+    Args:
+        out(variable): model output variable
+        feeds(dict): dict of model input variables(included label)
+        topk(int): usually top5
+        classes_num(int): num of classes
+        use_distillation(bool): whether to use distillation training
+        mode(str): mode, train/valid
+
+    Returns:
+        fetchs(dict): dict of measures
+    """
+    # if architecture["name"] == "GoogLeNet":
+    #     assert len(out) == 3, "GoogLeNet should have 3 outputs"
+    #     out = out[0]
+    # else:
+    #     # just need student label to get metrics
+    #     if use_distillation:
+    #         out = out[1]
+    softmax_out = F.softmax(out)
+
+    fetchs = OrderedDict()
+    # set top1 to fetchs
+    top1 = paddle.metric.accuracy(softmax_out, label=label, k=1)
+    # set topk to fetchs
+    k = min(topk, classes_num)
+    topk = paddle.metric.accuracy(softmax_out, label=label, k=k)
+
+    # multi cards' eval
+    if mode != "train" and paddle.distributed.get_world_size() > 1:
+        top1 = paddle.distributed.all_reduce(
+            top1, op=paddle.distributed.ReduceOp.
+            SUM) / paddle.distributed.get_world_size()
+        topk = paddle.distributed.all_reduce(
+            topk, op=paddle.distributed.ReduceOp.
+            SUM) / paddle.distributed.get_world_size()
+
+    fetchs['top1'] = top1
+    topk_name = 'top{}'.format(k)
+    fetchs[topk_name] = topk
+
+    return fetchs
--- a/test_tipc/supplementary/mv3.py
+++ b/test_tipc/supplementary/mv3.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.functional import hardswish, hardsigmoid
+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
+from paddle.regularizer import L2Decay
+import math
+
+from paddle.utils.cpp_extension import load
+# jit compile custom op
+custom_ops = load(
+    name="custom_jit_ops",
+    sources=["./custom_op/custom_relu_op.cc", "./custom_op/custom_relu_op.cu"])
+
+
+def make_divisible(v, divisor=8, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class MobileNetV3(nn.Layer):
+    def __init__(self,
+                 scale=1.0,
+                 model_name="small",
+                 dropout_prob=0.2,
+                 class_dim=1000,
+                 use_custom_relu=False):
+        super(MobileNetV3, self).__init__()
+        self.use_custom_relu = use_custom_relu
+
+        inplanes = 16
+        if model_name == "large":
+            self.cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, False, "relu", 1],
+                [3, 64, 24, False, "relu", 2],
+                [3, 72, 24, False, "relu", 1],
+                [5, 72, 40, True, "relu", 2],
+                [5, 120, 40, True, "relu", 1],
+                [5, 120, 40, True, "relu", 1],
+                [3, 240, 80, False, "hardswish", 2],
+                [3, 200, 80, False, "hardswish", 1],
+                [3, 184, 80, False, "hardswish", 1],
+                [3, 184, 80, False, "hardswish", 1],
+                [3, 480, 112, True, "hardswish", 1],
+                [3, 672, 112, True, "hardswish", 1],
+                [5, 672, 160, True, "hardswish", 2],
+                [5, 960, 160, True, "hardswish", 1],
+                [5, 960, 160, True, "hardswish", 1],
+            ]
+            self.cls_ch_squeeze = 960
+            self.cls_ch_expand = 1280
+        elif model_name == "small":
+            self.cfg = [
+                # k, exp, c,  se,     nl,  s,
+                [3, 16, 16, True, "relu", 2],
+                [3, 72, 24, False, "relu", 2],
+                [3, 88, 24, False, "relu", 1],
+                [5, 96, 40, True, "hardswish", 2],
+                [5, 240, 40, True, "hardswish", 1],
+                [5, 240, 40, True, "hardswish", 1],
+                [5, 120, 48, True, "hardswish", 1],
+                [5, 144, 48, True, "hardswish", 1],
+                [5, 288, 96, True, "hardswish", 2],
+                [5, 576, 96, True, "hardswish", 1],
+                [5, 576, 96, True, "hardswish", 1],
+            ]
+            self.cls_ch_squeeze = 576
+            self.cls_ch_expand = 1280
+        else:
+            raise NotImplementedError(
+                "mode[{}_model] is not implemented!".format(model_name))
+
+        self.conv1 = ConvBNLayer(
+            in_c=3,
+            out_c=make_divisible(inplanes * scale),
+            filter_size=3,
+            stride=2,
+            padding=1,
+            num_groups=1,
+            if_act=True,
+            act="hardswish",
+            name="conv1",
+            use_custom_relu=self.use_custom_relu)
+
+        self.block_list = []
+        i = 0
+        inplanes = make_divisible(inplanes * scale)
+        for (k, exp, c, se, nl, s) in self.cfg:
+            block = self.add_sublayer(
+                "conv" + str(i + 2),
+                ResidualUnit(
+                    in_c=inplanes,
+                    mid_c=make_divisible(scale * exp),
+                    out_c=make_divisible(scale * c),
+                    filter_size=k,
+                    stride=s,
+                    use_se=se,
+                    act=nl,
+                    name="conv" + str(i + 2),
+                    use_custom_relu=self.use_custom_relu))
+            self.block_list.append(block)
+            inplanes = make_divisible(scale * c)
+            i += 1
+
+        self.last_second_conv = ConvBNLayer(
+            in_c=inplanes,
+            out_c=make_divisible(scale * self.cls_ch_squeeze),
+            filter_size=1,
+            stride=1,
+            padding=0,
+            num_groups=1,
+            if_act=True,
+            act="hardswish",
+            name="conv_last",
+            use_custom_relu=self.use_custom_relu)
+
+        self.pool = AdaptiveAvgPool2D(1)
+
+        self.last_conv = Conv2D(
+            in_channels=make_divisible(scale * self.cls_ch_squeeze),
+            out_channels=self.cls_ch_expand,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(),
+            bias_attr=False)
+
+        self.dropout = Dropout(p=dropout_prob, mode="downscale_in_infer")
+
+        self.out = Linear(
+            self.cls_ch_expand,
+            class_dim,
+            weight_attr=ParamAttr(),
+            bias_attr=ParamAttr())
+
+    def forward(self, inputs, label=None):
+        x = self.conv1(inputs)
+
+        for block in self.block_list:
+            x = block(x)
+
+        x = self.last_second_conv(x)
+        x = self.pool(x)
+
+        x = self.last_conv(x)
+        x = hardswish(x)
+        x = self.dropout(x)
+        x = paddle.flatten(x, start_axis=1, stop_axis=-1)
+        x = self.out(x)
+        return x
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_c,
+                 out_c,
+                 filter_size,
+                 stride,
+                 padding,
+                 num_groups=1,
+                 if_act=True,
+                 act=None,
+                 use_cudnn=True,
+                 name="",
+                 use_custom_relu=False):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = Conv2D(
+            in_channels=in_c,
+            out_channels=out_c,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            weight_attr=ParamAttr(),
+            bias_attr=False)
+        self.bn = BatchNorm(
+            num_channels=out_c,
+            act=None,
+            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+        # moving_mean_name=name + "_bn_mean",
+        # moving_variance_name=name + "_bn_variance")
+
+        self.use_custom_relu = use_custom_relu
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        if self.if_act:
+            if self.act == "relu":
+                if self.use_custom_relu:
+                    x = custom_ops.custom_relu(x)
+                else:
+                    x = F.relu(x)
+            elif self.act == "hardswish":
+                x = hardswish(x)
+            else:
+                print("The activation function is selected incorrectly.")
+                exit()
+        return x
+
+
+class ResidualUnit(nn.Layer):
+    def __init__(self,
+                 in_c,
+                 mid_c,
+                 out_c,
+                 filter_size,
+                 stride,
+                 use_se,
+                 act=None,
+                 name='',
+                 use_custom_relu=False):
+        super(ResidualUnit, self).__init__()
+        self.if_shortcut = stride == 1 and in_c == out_c
+        self.if_se = use_se
+
+        self.use_custom_relu = use_custom_relu
+
+        self.expand_conv = ConvBNLayer(
+            in_c=in_c,
+            out_c=mid_c,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            if_act=True,
+            act=act,
+            name=name + "_expand",
+            use_custom_relu=self.use_custom_relu)
+        self.bottleneck_conv = ConvBNLayer(
+            in_c=mid_c,
+            out_c=mid_c,
+            filter_size=filter_size,
+            stride=stride,
+            padding=int((filter_size - 1) // 2),
+            num_groups=mid_c,
+            if_act=True,
+            act=act,
+            name=name + "_depthwise",
+            use_custom_relu=self.use_custom_relu)
+        if self.if_se:
+            self.mid_se = SEModule(mid_c, name=name + "_se")
+        self.linear_conv = ConvBNLayer(
+            in_c=mid_c,
+            out_c=out_c,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            if_act=False,
+            act=None,
+            name=name + "_linear",
+            use_custom_relu=self.use_custom_relu)
+
+    def forward(self, inputs):
+        x = self.expand_conv(inputs)
+        x = self.bottleneck_conv(x)
+        if self.if_se:
+            x = self.mid_se(x)
+        x = self.linear_conv(x)
+        if self.if_shortcut:
+            x = paddle.add(inputs, x)
+        return x
+
+
+class SEModule(nn.Layer):
+    def __init__(self, channel, reduction=4, name=""):
+        super(SEModule, self).__init__()
+        self.avg_pool = AdaptiveAvgPool2D(1)
+        self.conv1 = Conv2D(
+            in_channels=channel,
+            out_channels=channel // reduction,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(),
+            bias_attr=ParamAttr())
+        self.conv2 = Conv2D(
+            in_channels=channel // reduction,
+            out_channels=channel,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(),
+            bias_attr=ParamAttr())
+
+    def forward(self, inputs):
+        outputs = self.avg_pool(inputs)
+        outputs = self.conv1(outputs)
+        outputs = F.relu(outputs)
+        outputs = self.conv2(outputs)
+        outputs = hardsigmoid(outputs, slope=0.2, offset=0.5)
+        return paddle.multiply(x=inputs, y=outputs)
+
+
+def MobileNetV3_small_x0_35(**args):
+    model = MobileNetV3(model_name="small", scale=0.35, **args)
+    return model
+
+
+def MobileNetV3_small_x0_5(**args):
+    model = MobileNetV3(model_name="small", scale=0.5, **args)
+    return model
+
+
+def MobileNetV3_small_x0_75(**args):
+    model = MobileNetV3(model_name="small", scale=0.75, **args)
+    return model
+
+
+def MobileNetV3_small_x1_0(**args):
+    model = MobileNetV3(model_name="small", scale=1.0, **args)
+    return model
+
+
+def MobileNetV3_small_x1_25(**args):
+    model = MobileNetV3(model_name="small", scale=1.25, **args)
+    return model
+
+
+def MobileNetV3_large_x0_35(**args):
+    model = MobileNetV3(model_name="large", scale=0.35, **args)
+    return model
+
+
+def MobileNetV3_large_x0_5(**args):
+    model = MobileNetV3(model_name="large", scale=0.5, **args)
+    return model
+
+
+def MobileNetV3_large_x0_75(**args):
+    model = MobileNetV3(model_name="large", scale=0.75, **args)
+    return model
+
+
+def MobileNetV3_large_x1_0(**args):
+    model = MobileNetV3(model_name="large", scale=1.0, **args)
+    return model
+
+
+def MobileNetV3_large_x1_25(**args):
+    model = MobileNetV3(model_name="large", scale=1.25, **args)
+    return
+
+
+class DistillMV3(nn.Layer):
+    def __init__(self,
+                 scale=1.0,
+                 model_name="small",
+                 dropout_prob=0.2,
+                 class_dim=1000,
+                 args=None,
+                 use_custom_relu=False):
+        super(DistillMV3, self).__init__()
+
+        self.student = MobileNetV3(
+            model_name=model_name,
+            scale=scale,
+            class_dim=class_dim,
+            use_custom_relu=use_custom_relu)
+
+        self.student1 = MobileNetV3(
+            model_name=model_name,
+            scale=scale,
+            class_dim=class_dim,
+            use_custom_relu=use_custom_relu)
+
+    def forward(self, inputs, label=None):
+        predicts = dict()
+        predicts['student'] = self.student(inputs, label)
+        predicts['student1'] = self.student1(inputs, label)
+        return predicts
+
+
+def distillmv3_large_x0_5(**args):
+    model = DistillMV3(model_name="large", scale=0.5, **args)
+    return model
+
+
+class SiameseMV3(nn.Layer):
+    def __init__(self,
+                 scale=1.0,
+                 model_name="small",
+                 dropout_prob=0.2,
+                 class_dim=1000,
+                 args=None,
+                 use_custom_relu=False):
+        super(SiameseMV3, self).__init__()
+
+        self.net = MobileNetV3(
+            model_name=model_name,
+            scale=scale,
+            class_dim=class_dim,
+            use_custom_relu=use_custom_relu)
+        self.net1 = MobileNetV3(
+            model_name=model_name,
+            scale=scale,
+            class_dim=class_dim,
+            use_custom_relu=use_custom_relu)
+
+    def forward(self, inputs, label=None):
+        # net
+        x = self.net.conv1(inputs)
+        for block in self.net.block_list:
+            x = block(x)
+
+        # net1 
+        x1 = self.net1.conv1(inputs)
+        for block in self.net1.block_list:
+            x1 = block(x1)
+        # add
+        x = x + x1
+
+        x = self.net.last_second_conv(x)
+        x = self.net.pool(x)
+
+        x = self.net.last_conv(x)
+        x = hardswish(x)
+        x = self.net.dropout(x)
+        x = paddle.flatten(x, start_axis=1, stop_axis=-1)
+        x = self.net.out(x)
+        return x
+
+
+def siamese_mv3(class_dim, use_custom_relu):
+    model = SiameseMV3(
+        scale=0.5,
+        model_name="large",
+        class_dim=class_dim,
+        use_custom_relu=use_custom_relu)
+    return model
+
+
+def build_model(config):
+    model_type = config['model_type']
+    if model_type == "cls":
+        class_dim = config['MODEL']['class_dim']
+        use_custom_relu = config['MODEL']['use_custom_relu']
+        if 'siamese' in config['MODEL'] and config['MODEL']['siamese'] is True:
+            model = siamese_mv3(
+                class_dim=class_dim, use_custom_relu=use_custom_relu)
+        else:
+            model = MobileNetV3_large_x0_5(
+                class_dim=class_dim, use_custom_relu=use_custom_relu)
+
+    elif model_type == "cls_distill":
+        class_dim = config['MODEL']['class_dim']
+        use_custom_relu = config['MODEL']['use_custom_relu']
+        model = distillmv3_large_x0_5(
+            class_dim=class_dim, use_custom_relu=use_custom_relu)
+
+    elif model_type == "cls_distill_multiopt":
+        class_dim = config['MODEL']['class_dim']
+        use_custom_relu = config['MODEL']['use_custom_relu']
+        model = distillmv3_large_x0_5(
+            class_dim=100, use_custom_relu=use_custom_relu)
+    else:
+        raise ValueError("model_type should be one of ['']")
+
+    return model
--- a/test_tipc/supplementary/mv3_distill.yml
+++ b/test_tipc/supplementary/mv3_distill.yml
+
+class_dim: 100
+total_images: 50000
+epochs: 1000
+topk: 5
+save_model_dir: ./output/
+use_gpu: True
+model_type: cls_distill
+
+LEARNING_RATE:
+    function: 'Cosine'
+    params:
+        lr: 0.001
+        warmup_epoch: 5
+
+OPTIMIZER:
+    function: 'Momentum'
+    params:
+        momentum: 0.9
+    regularizer:
+        function: 'L2'
+        factor: 0.00002
+
+TRAIN:
+    batch_size: 1280
+    num_workers: 4
+
+VALID:
+    batch_size: 64
+    num_workers: 4
+
--- a/test_tipc/supplementary/mv3_large_x0_5.yml
+++ b/test_tipc/supplementary/mv3_large_x0_5.yml
+
+class_dim: 100
+total_images: 50000
+epoch: 1000
+topk: 5
+save_model_dir: ./output/
+use_gpu: True
+model_type: cls
+use_custom_relu: false
+pretrained_model: 
+checkpoints: 
+save_model_dir: ./output/cls/
+
+# slim
+quant_train: false
+prune_train: false
+
+MODEL:
+    class_dim: 100
+    use_custom_relu: False
+    siamese: False
+
+AMP:
+    use_amp: False
+    scale_loss: 1024.0
+    use_dynamic_loss_scale: True
+
+LEARNING_RATE:
+    function: 'Cosine'
+    params:
+        lr: 0.001
+        warmup_epoch: 5
+
+OPTIMIZER:
+    function: 'Momentum'
+    params:
+        momentum: 0.9
+    regularizer:
+        function: 'L2'
+        factor: 0.00002
+
+TRAIN:
+    batch_size: 1280
+    num_workers: 4
+
+VALID:
+    batch_size: 64
+    num_workers: 4
+