Merge remote-tracking branch 'origin/dygraph' into dygraph

# Conflicts: # PPOCRLabel/libs/resources.py 改为大写

Merge remote-tracking branch 'origin/dygraph' into dygraph
# Conflicts: # PPOCRLabel/libs/resources.py 改为大写
f6d2bc9b · Alchemist_W · 648a43fd · 07026825 · f6d2bc9b · f6d2bc9b
Commit f6d2bc9b authored Jul 21, 2021 by Alchemist_W
14 changed files
--- a/ppocr/metrics/distillation_metric.py
+++ b/ppocr/metrics/distillation_metric.py
@@ -24,8 +24,8 @@ from .cls_metric import ClsMetric
 class DistillationMetric(object):
    def __init__(self,
                 key=None,
-                 base_metric_name="RecMetric",
-                 main_indicator='acc',
+                 base_metric_name=None,
+                 main_indicator=None,
                 **kwargs):
        self.main_indicator = main_indicator
        self.key = key
@@ -42,16 +42,13 @@ class DistillationMetric(object):
                main_indicator=self.main_indicator, **self.kwargs)
            self.metrics[key].reset()

-    def __call__(self, preds, *args, **kwargs):
+    def __call__(self, preds, batch, **kwargs):
        assert isinstance(preds, dict)
        if self.metrics is None:
            self._init_metrcis(preds)
        output = dict()
        for key in preds:
-            metric = self.metrics[key].__call__(preds[key], *args, **kwargs)
-            for sub_key in metric:
-                output["{}_{}".format(key, sub_key)] = metric[sub_key]
-        return output
+            self.metrics[key].__call__(preds[key], batch, **kwargs)

    def get_metric(self):
        """

--- a/ppocr/modeling/architectures/base_model.py
+++ b/ppocr/modeling/architectures/base_model.py
@@ -79,7 +79,10 @@ class BaseModel(nn.Layer):
            x = self.neck(x)
        y["neck_out"] = x
        x = self.head(x, targets=data)
-        y["head_out"] = x
+        if isinstance(x, dict):
+            y.update(x)
+        else:
+            y["head_out"] = x
        if self.return_all_feats:
            return y
        else:

--- a/ppocr/modeling/architectures/distillation_model.py
+++ b/ppocr/modeling/architectures/distillation_model.py
@@ -21,7 +21,7 @@ from ppocr.modeling.backbones import build_backbone
 from ppocr.modeling.necks import build_neck
 from ppocr.modeling.heads import build_head
 from .base_model import BaseModel
-from ppocr.utils.save_load import init_model
+from ppocr.utils.save_load import init_model, load_pretrained_params

 __all__ = ['DistillationModel']

@@ -46,7 +46,7 @@ class DistillationModel(nn.Layer):
                pretrained = model_config.pop("pretrained")
            model = BaseModel(model_config)
            if pretrained is not None:
-                init_model(model, path=pretrained)
+                model = load_pretrained_params(model, pretrained)
            if freeze_params:
                for param in model.parameters():
                    param.trainable = False

--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -21,7 +21,7 @@ import copy

 __all__ = ['build_post_process']

-from .db_postprocess import DBPostProcess
+from .db_postprocess import DBPostProcess, DistillationDBPostProcess
 from .east_postprocess import EASTPostProcess
 from .sast_postprocess import SASTPostProcess
 from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, \
@@ -34,7 +34,8 @@ def build_post_process(config, global_config=None):
    support_dict = [
        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode',
        'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess',
-        'DistillationCTCLabelDecode', 'TableLabelDecode'
+        'DistillationCTCLabelDecode', 'TableLabelDecode',
+        'DistillationDBPostProcess'
    ]

    config = copy.deepcopy(config)

--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@@ -187,3 +187,29 @@ class DBPostProcess(object):

            boxes_batch.append({'points': boxes})
        return boxes_batch
+
+
+class DistillationDBPostProcess(object):
+    def __init__(self, model_name=["student"],
+                 key=None,
+                 thresh=0.3,
+                 box_thresh=0.6,
+                 max_candidates=1000,
+                 unclip_ratio=1.5,
+                 use_dilation=False,
+                 score_mode="fast",
+                 **kwargs):
+        self.model_name = model_name
+        self.key = key
+        self.post_process = DBPostProcess(thresh=thresh,
+                 box_thresh=box_thresh,
+                 max_candidates=max_candidates,
+                 unclip_ratio=unclip_ratio,
+                 use_dilation=use_dilation,
+                 score_mode=score_mode)
+
+    def __call__(self, predicts, shape_list):
+        results = {}
+        for k in self.model_name:
+            results[k] = self.post_process(predicts[k], shape_list=shape_list)
+        return results
--- a/ppocr/utils/save_load.py
+++ b/ppocr/utils/save_load.py
@@ -116,6 +116,27 @@ def load_dygraph_params(config, model, logger, optimizer):
        logger.info(f"loaded pretrained_model successful from {pm}")
        return {}

+def load_pretrained_params(model, path):
+    if path is None:
+        return False
+    if not os.path.exists(path) and not os.path.exists(path + ".pdparams"):
+        print(f"The pretrained_model {path} does not exists!")
+        return False
+
+    path = path if path.endswith('.pdparams') else path + '.pdparams'
+    params = paddle.load(path)
+    state_dict = model.state_dict()
+    new_state_dict = {}
+    for k1, k2 in zip(state_dict.keys(), params.keys()):
+        if list(state_dict[k1].shape) == list(params[k2].shape):
+            new_state_dict[k1] = params[k2]
+        else:
+            print(
+                f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !"
+            )
+    model.set_state_dict(new_state_dict)
+    print(f"load pretrain successful from {path}")
+    return model

 def save_model(model,
               optimizer,

--- a/test/ocr_rec_params.txt
+++ b/test/ocr_rec_params.txt
+model_name:ocr_rec
+python:python
+gpu_list:0|0,1
+Global.auto_cast:null
+Global.epoch_num:10
+Global.save_model_dir:./output/
+Train.loader.batch_size_per_card:
+Global.use_gpu:
+Global.pretrained_model:null
+
+trainer:norm|pact
+norm_train:tools/train.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml
+quant_train:deploy/slim/quantization/quant.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml
+fpgm_train:null
+distill_train:null
+
+eval:tools/eval.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml -o
+
+Global.save_inference_dir:./output/
+Global.pretrained_model:
+norm_export:tools/export_model.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml -o
+quant_export:deploy/slim/quantization/export_model.py -c configs/rec/rec_mv3_none_bilstm_ctc.yml -o
+fpgm_export:null
+distill_export:null
+
+inference:tools/infer/predict_rec.py
+--use_gpu:True|False
+--enable_mkldnn:True|False
+--cpu_threads:1|6
+--rec_batch_num:1
+--use_tensorrt:True|False
+--precision:fp32|fp16|int8
+--rec_model_dir:./inference/ch_ppocr_mobile_v2.0_rec_infer/
+--image_dir:./inference/rec_inference
+--save_log_path:./test/output/
\ No newline at end of file
--- a/test/prepare.sh
+++ b/test/prepare.sh
@@ -29,19 +29,21 @@ train_model_list=$(func_parser_value "${lines[0]}")

 trainer_list=$(func_parser_value "${lines[10]}")

-
 # MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer']
 MODE=$2
-# prepare pretrained weights and dataset 
-wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams
-wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar
-cd pretrain_models && tar xf det_mv3_db_v2.0_train.tar && cd ../
-
+# prepare pretrained weights and dataset
+if [ ${train_model_list[*]} = "ocr_det" ]; then
+  wget -nc -P  ./pretrain_models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams
+  wget -nc -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/det_mv3_db_v2.0_train.tar
+  cd pretrain_models && tar xf det_mv3_db_v2.0_train.tar && cd ../
+  fi
 if [ ${MODE} = "lite_train_infer" ];then
    # pretrain lite train data
    rm -rf ./train_data/icdar2015
    wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_lite.tar
-    cd ./train_data/ && tar xf icdar2015_lite.tar
+    wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar # todo change to bcebos
+
+    cd ./train_data/ && tar xf icdar2015_lite.tar && tar xf ic15_data.tar
    ln -s ./icdar2015_lite ./icdar2015
    cd ../
    epoch=10
@@ -49,13 +51,15 @@ if [ ${MODE} = "lite_train_infer" ];then
 elif [ ${MODE} = "whole_train_infer" ];then
    rm -rf ./train_data/icdar2015
    wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015.tar
-    cd ./train_data/ && tar xf icdar2015.tar && cd ../
+    wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar
+    cd ./train_data/ && tar xf icdar2015.tar && tar xf ic15_data.tar && cd ../
    epoch=500
    eval_batch_step=200
 elif [ ${MODE} = "whole_infer" ];then
    rm -rf ./train_data/icdar2015
    wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/icdar2015_infer.tar
-    cd ./train_data/ && tar xf icdar2015_infer.tar
+    wget -nc -P ./train_data/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ic15_data.tar
+    cd ./train_data/ && tar xf icdar2015_infer.tar && tar xf ic15_data.tar
    ln -s ./icdar2015_infer ./icdar2015
    cd ../
    epoch=10
@@ -88,9 +92,11 @@ for train_model in ${train_model_list[*]}; do
    elif [ ${train_model} = "ocr_rec" ];then
        model_name="ocr_rec"
        yml_file="configs/rec/rec_mv3_none_bilstm_ctc.yml"
-        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_rec_data_200.tar 
-        cd ./inference && tar xf ch_rec_data_200.tar  && cd ../
-        img_dir="./inference/ch_rec_data_200/"
+        wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/rec_inference.tar
+        cd ./inference && tar xf rec_inference.tar  && cd ../
+        img_dir="./inference/rec_inference/"
+        data_dir=./inference/rec_inference
+        data_label_file=[./inference/rec_inference/rec_gt_test.txt]
    fi

    # eval 

--- a/tools/eval.py
+++ b/tools/eval.py
@@ -27,7 +27,7 @@ from ppocr.data import build_dataloader
 from ppocr.modeling.architectures import build_model
 from ppocr.postprocess import build_post_process
 from ppocr.metrics import build_metric
-from ppocr.utils.save_load import init_model
+from ppocr.utils.save_load import init_model, load_pretrained_params
 from ppocr.utils.utility import print_dict
 import tools.program as program

@@ -55,7 +55,10 @@ def main():

    model = build_model(config['Architecture'])
    use_srn = config['Architecture']['algorithm'] == "SRN"
-    model_type = config['Architecture']['model_type']
+    if "model_type" in config['Architecture'].keys():
+        model_type = config['Architecture']['model_type']
+    else:
+        model_type = None

    best_model_dict = init_model(config, model)
    if len(best_model_dict):
@@ -68,7 +71,7 @@ def main():

    # start eval
    metric = program.eval(model, valid_dataloader, post_process_class,
-                          eval_class, model_type, use_srn)
+                        eval_class, model_type, use_srn)
    logger.info('metric eval ***************')
    for k, v in metric.items():
        logger.info('{}:{}'.format(k, v))

--- a/tools/infer/predict_cls.py
+++ b/tools/infer/predict_cls.py
@@ -112,7 +112,6 @@ class TextClassifier(object):
                if '180' in label and score > self.cls_thresh:
                    img_list[indices[beg_img_no + rno]] = cv2.rotate(
                        img_list[indices[beg_img_no + rno]], 1)
-        elapse = time.time() - starttime
        return img_list, cls_res, elapse


@@ -146,7 +145,6 @@ def main(args):
                                               cls_res[ino]))
    logger.info(
        "The predict time about text angle classify module is as follows: ")
-    text_classifier.cls_times.info(average=False)


 if __name__ == "__main__":

--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -64,6 +64,24 @@ class TextRecognizer(object):
        self.postprocess_op = build_post_process(postprocess_params)
        self.predictor, self.input_tensor, self.output_tensors, self.config = \
            utility.create_predictor(args, 'rec', logger)
+        self.benchmark = args.benchmark
+        if args.benchmark:
+            import auto_log
+            pid = os.getpid()
+            self.autolog = auto_log.AutoLogger(
+                model_name="rec",
+                model_precision=args.precision,
+                batch_size=args.rec_batch_num,
+                data_shape="dynamic",
+                save_path=args.save_log_path,
+                inference_config=self.config,
+                pids=pid,
+                process_name=None,
+                gpu_ids=0 if args.use_gpu else None,
+                time_keys=[
+                    'preprocess_time', 'inference_time', 'postprocess_time'
+                ],
+                warmup=10)

    def resize_norm_img(self, img, max_wh_ratio):
        imgC, imgH, imgW = self.rec_image_shape
@@ -168,6 +186,8 @@ class TextRecognizer(object):
        rec_res = [['', 0.0]] * img_num
        batch_num = self.rec_batch_num
        st = time.time()
+        if self.benchmark:
+            self.autolog.times.start()
        for beg_img_no in range(0, img_num, batch_num):
            end_img_no = min(img_num, beg_img_no + batch_num)
            norm_img_batch = []
@@ -196,6 +216,8 @@ class TextRecognizer(object):
                    norm_img_batch.append(norm_img[0])
            norm_img_batch = np.concatenate(norm_img_batch)
            norm_img_batch = norm_img_batch.copy()
+            if self.benchmark:
+                self.autolog.times.stamp()

            if self.rec_algorithm == "SRN":
                encoder_word_pos_list = np.concatenate(encoder_word_pos_list)
@@ -222,6 +244,8 @@ class TextRecognizer(object):
                for output_tensor in self.output_tensors:
                    output = output_tensor.copy_to_cpu()
                    outputs.append(output)
+                if self.benchmark:
+                    self.autolog.times.stamp()
                preds = {"predict": outputs[2]}
            else:
                self.input_tensor.copy_from_cpu(norm_img_batch)
@@ -231,11 +255,14 @@ class TextRecognizer(object):
                for output_tensor in self.output_tensors:
                    output = output_tensor.copy_to_cpu()
                    outputs.append(output)
+                if self.benchmark:
+                    self.autolog.times.stamp()
                preds = outputs[0]
            rec_result = self.postprocess_op(preds)
            for rno in range(len(rec_result)):
                rec_res[indices[beg_img_no + rno]] = rec_result[rno]
-
+            if self.benchmark:
+                self.autolog.times.end(stamp=True)
        return rec_res, time.time() - st


@@ -251,9 +278,6 @@ def main(args):
        for i in range(10):
            res = text_recognizer([img])

-    cpu_mem, gpu_mem, gpu_util = 0, 0, 0
-    count = 0
-
    for image_file in image_file_list:
        img, flag = check_and_read_gif(image_file)
        if not flag:
@@ -273,6 +297,8 @@ def main(args):
    for ino in range(len(img_list)):
        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
                                               rec_res[ino]))
+    if args.benchmark:
+        text_recognizer.autolog.report()


 if __name__ == "__main__":

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -24,9 +24,6 @@ from paddle import inference
 import time
 from ppocr.utils.logging import get_logger

-logger = get_logger()
-
-
 def str2bool(v):
    return v.lower() in ("true", "t", "1")


--- a/tools/program.py
+++ b/tools/program.py
@@ -186,7 +186,10 @@ def train(config,
    model.train()

    use_srn = config['Architecture']['algorithm'] == "SRN"
-    model_type = config['Architecture']['model_type']
+    try: 
+        model_type = config['Architecture']['model_type']
+    except: 
+        model_type = None

    if 'start_epoch' in best_model_dict:
        start_epoch = best_model_dict['start_epoch']

--- a/tools/train.py
+++ b/tools/train.py
@@ -98,7 +98,6 @@ def main(config, device, logger, vdl_writer):
    eval_class = build_metric(config['Metric'])
    # load pretrain model
    pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer)
-
    logger.info('train dataloader has {} iters'.format(len(train_dataloader)))
    if valid_dataloader is not None:
        logger.info('valid dataloader has {} iters'.format(