Merge branch 'dygraph' into dygraph

8ad4e307 · MissPenguin · GitHub · b1d31f8a · 2062b509 · 8ad4e307
Unverified Commit 8ad4e307 authored Jul 13, 2021 by MissPenguin Committed by GitHub Jul 13, 2021
16 changed files
--- a/ppocr/modeling/architectures/distillation_model.py
+++ b/ppocr/modeling/architectures/distillation_model.py
@@ -21,7 +21,7 @@ from ppocr.modeling.backbones import build_backbone
 from ppocr.modeling.necks import build_neck
 from ppocr.modeling.heads import build_head
 from .base_model import BaseModel
-from ppocr.utils.save_load import init_model
+from ppocr.utils.save_load import init_model, load_pretrained_params
 __all__ = ['DistillationModel']
@@ -46,7 +46,7 @@ class DistillationModel(nn.Layer):
                pretrained = model_config.pop("pretrained")
            model = BaseModel(model_config)
            if pretrained is not None:
-                init_model(model, path=pretrained)
+                model = load_pretrained_params(model, pretrained)
            if freeze_params:
                for param in model.parameters():
                    param.trainable = False

--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -12,33 +12,36 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__all__ = ['build_backbone']
+__all__ = ["build_backbone"]
 def build_backbone(config, model_type):
-    if model_type == 'det':
+    if model_type == "det":
        from .det_mobilenet_v3 import MobileNetV3
        from .det_resnet_vd import ResNet
        from .det_resnet_vd_sast import ResNet_SAST
-        support_dict = ['MobileNetV3', 'ResNet', 'ResNet_SAST']
+        support_dict = ["MobileNetV3", "ResNet", "ResNet_SAST"]
-    elif model_type == 'rec' or model_type == 'cls':
+    elif model_type == "rec" or model_type == "cls":
        from .rec_mobilenet_v3 import MobileNetV3
        from .rec_resnet_vd import ResNet
        from .rec_resnet_fpn import ResNetFPN
-        support_dict = ['MobileNetV3', 'ResNet', 'ResNetFPN']
+        from .rec_mv1_enhance import MobileNetV1Enhance
-    elif model_type == 'e2e':
+        support_dict = [
+            "MobileNetV1Enhance", "MobileNetV3", "ResNet", "ResNetFPN"
+        ]
+    elif model_type == "e2e":
        from .e2e_resnet_vd_pg import ResNet
-        support_dict = ['ResNet']
+        support_dict = ["ResNet"]
    elif model_type == "table":
        from .table_resnet_vd import ResNet
        from .table_mobilenet_v3 import MobileNetV3
-        support_dict = ['ResNet', 'MobileNetV3']
+        support_dict = ["ResNet", "MobileNetV3"]
    else:
        raise NotImplementedError
-    module_name = config.pop('name')
+    module_name = config.pop("name")
    assert module_name in support_dict, Exception(
-        'when model typs is {}, backbone only support {}'.format(model_type,
+        "when model typs is {}, backbone only support {}".format(model_type,
                                                                 support_dict))
    module_class = eval(module_name)(**config)
    return module_class
--- a/ppocr/modeling/backbones/rec_mv1_enhance.py
+++ b/ppocr/modeling/backbones/rec_mv1_enhance.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
+from paddle.nn.initializer import KaimingNormal
+import math
+import numpy as np
+import paddle
+from paddle import ParamAttr, reshape, transpose, concat, split
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
+from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
+from paddle.nn.initializer import KaimingNormal
+import math
+from paddle.nn.functional import hardswish, hardsigmoid
+from paddle.regularizer import L2Decay
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 filter_size,
+                 num_filters,
+                 stride,
+                 padding,
+                 channels=None,
+                 num_groups=1,
+                 act='hard_swish'):
+        super(ConvBNLayer, self).__init__()
+        self._conv = Conv2D(
+            in_channels=num_channels,
+            out_channels=num_filters,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=num_groups,
+            weight_attr=ParamAttr(initializer=KaimingNormal()),
+            bias_attr=False)
+        self._batch_norm = BatchNorm(
+            num_filters,
+            act=act,
+            param_attr=ParamAttr(regularizer=L2Decay(0.0)),
+            bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
+    def forward(self, inputs):
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        return y
+class DepthwiseSeparable(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters1,
+                 num_filters2,
+                 num_groups,
+                 stride,
+                 scale,
+                 dw_size=3,
+                 padding=1,
+                 use_se=False):
+        super(DepthwiseSeparable, self).__init__()
+        self.use_se = use_se
+        self._depthwise_conv = ConvBNLayer(
+            num_channels=num_channels,
+            num_filters=int(num_filters1 * scale),
+            filter_size=dw_size,
+            stride=stride,
+            padding=padding,
+            num_groups=int(num_groups * scale))
+        if use_se:
+            self._se = SEModule(int(num_filters1 * scale))
+        self._pointwise_conv = ConvBNLayer(
+            num_channels=int(num_filters1 * scale),
+            filter_size=1,
+            num_filters=int(num_filters2 * scale),
+            stride=1,
+            padding=0)
+    def forward(self, inputs):
+        y = self._depthwise_conv(inputs)
+        if self.use_se:
+            y = self._se(y)
+        y = self._pointwise_conv(y)
+        return y
+class MobileNetV1Enhance(nn.Layer):
+    def __init__(self, in_channels=3, scale=0.5, **kwargs):
+        super().__init__()
+        self.scale = scale
+        self.block_list = []
+        self.conv1 = ConvBNLayer(
+            num_channels=3,
+            filter_size=3,
+            channels=3,
+            num_filters=int(32 * scale),
+            stride=2,
+            padding=1)
+        conv2_1 = DepthwiseSeparable(
+            num_channels=int(32 * scale),
+            num_filters1=32,
+            num_filters2=64,
+            num_groups=32,
+            stride=1,
+            scale=scale)
+        self.block_list.append(conv2_1)
+        conv2_2 = DepthwiseSeparable(
+            num_channels=int(64 * scale),
+            num_filters1=64,
+            num_filters2=128,
+            num_groups=64,
+            stride=1,
+            scale=scale)
+        self.block_list.append(conv2_2)
+        conv3_1 = DepthwiseSeparable(
+            num_channels=int(128 * scale),
+            num_filters1=128,
+            num_filters2=128,
+            num_groups=128,
+            stride=1,
+            scale=scale)
+        self.block_list.append(conv3_1)
+        conv3_2 = DepthwiseSeparable(
+            num_channels=int(128 * scale),
+            num_filters1=128,
+            num_filters2=256,
+            num_groups=128,
+            stride=(2, 1),
+            scale=scale)
+        self.block_list.append(conv3_2)
+        conv4_1 = DepthwiseSeparable(
+            num_channels=int(256 * scale),
+            num_filters1=256,
+            num_filters2=256,
+            num_groups=256,
+            stride=1,
+            scale=scale)
+        self.block_list.append(conv4_1)
+        conv4_2 = DepthwiseSeparable(
+            num_channels=int(256 * scale),
+            num_filters1=256,
+            num_filters2=512,
+            num_groups=256,
+            stride=(2, 1),
+            scale=scale)
+        self.block_list.append(conv4_2)
+        for _ in range(5):
+            conv5 = DepthwiseSeparable(
+                num_channels=int(512 * scale),
+                num_filters1=512,
+                num_filters2=512,
+                num_groups=512,
+                stride=1,
+                dw_size=5,
+                padding=2,
+                scale=scale,
+                use_se=False)
+            self.block_list.append(conv5)
+        conv5_6 = DepthwiseSeparable(
+            num_channels=int(512 * scale),
+            num_filters1=512,
+            num_filters2=1024,
+            num_groups=512,
+            stride=(2, 1),
+            dw_size=5,
+            padding=2,
+            scale=scale,
+            use_se=True)
+        self.block_list.append(conv5_6)
+        conv6 = DepthwiseSeparable(
+            num_channels=int(1024 * scale),
+            num_filters1=1024,
+            num_filters2=1024,
+            num_groups=1024,
+            stride=1,
+            dw_size=5,
+            padding=2,
+            use_se=True,
+            scale=scale)
+        self.block_list.append(conv6)
+        self.block_list = nn.Sequential(*self.block_list)
+        self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
+        self.out_channels = int(1024 * scale)
+    def forward(self, inputs):
+        y = self.conv1(inputs)
+        y = self.block_list(y)
+        y = self.pool(y)
+        return y
+class SEModule(nn.Layer):
+    def __init__(self, channel, reduction=4):
+        super(SEModule, self).__init__()
+        self.avg_pool = AdaptiveAvgPool2D(1)
+        self.conv1 = Conv2D(
+            in_channels=channel,
+            out_channels=channel // reduction,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(),
+            bias_attr=ParamAttr())
+        self.conv2 = Conv2D(
+            in_channels=channel // reduction,
+            out_channels=channel,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(),
+            bias_attr=ParamAttr())
+    def forward(self, inputs):
+        outputs = self.avg_pool(inputs)
+        outputs = self.conv1(outputs)
+        outputs = F.relu(outputs)
+        outputs = self.conv2(outputs)
+        outputs = hardsigmoid(outputs)
+        return paddle.multiply(x=inputs, y=outputs)
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -21,7 +21,7 @@ import copy
 __all__ = ['build_post_process']
-from .db_postprocess import DBPostProcess
+from .db_postprocess import DBPostProcess, DistillationDBPostProcess
 from .east_postprocess import EASTPostProcess
 from .sast_postprocess import SASTPostProcess
 from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, \
@@ -34,7 +34,8 @@ def build_post_process(config, global_config=None):
    support_dict = [
        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode',
        'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess',
-        'DistillationCTCLabelDecode', 'TableLabelDecode'
+        'DistillationCTCLabelDecode', 'TableLabelDecode',
+        'DistillationDBPostProcess'
    ]
    config = copy.deepcopy(config)

--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@@ -187,3 +187,29 @@ class DBPostProcess(object):
            boxes_batch.append({'points': boxes})
        return boxes_batch
+class DistillationDBPostProcess(object):
+    def __init__(self, model_name=["student"],
+                 key=None,
+                 thresh=0.3,
+                 box_thresh=0.6,
+                 max_candidates=1000,
+                 unclip_ratio=1.5,
+                 use_dilation=False,
+                 score_mode="fast",
+                 **kwargs):
+        self.model_name = model_name
+        self.key = key
+        self.post_process = DBPostProcess(thresh=thresh,
+                 box_thresh=box_thresh,
+                 max_candidates=max_candidates,
+                 unclip_ratio=unclip_ratio,
+                 use_dilation=use_dilation,
+                 score_mode=score_mode)
+    def __call__(self, predicts, shape_list):
+        results = {}
+        for k in self.model_name:
+            results[k] = self.post_process(predicts[k], shape_list=shape_list)
+        return results
--- a/ppocr/utils/save_load.py
+++ b/ppocr/utils/save_load.py
@@ -91,14 +91,14 @@ def init_model(config, model, optimizer=None, lr_scheduler=None):
 def load_dygraph_params(config, model, logger, optimizer):
    ckp = config['Global']['checkpoints']
-    if ckp and os.path.exists(ckp):
+    if ckp and os.path.exists(ckp + ".pdparams"):
        pre_best_model_dict = init_model(config, model, optimizer)
        return pre_best_model_dict
    else:
        pm = config['Global']['pretrained_model']
        if pm is None:
            return {}
-        if not os.path.exists(pm) or not os.path.exists(pm + ".pdparams"):
+        if not os.path.exists(pm) and not os.path.exists(pm + ".pdparams"):
            logger.info(f"The pretrained_model {pm} does not exists!")
            return {}
        pm = pm if pm.endswith('.pdparams') else pm + '.pdparams'
@@ -116,6 +116,27 @@ def load_dygraph_params(config, model, logger, optimizer):
        logger.info(f"loaded pretrained_model successful from {pm}")
        return {}
+def load_pretrained_params(model, path):
+    if path is None:
+        return False
+    if not os.path.exists(path) and not os.path.exists(path + ".pdparams"):
+        print(f"The pretrained_model {path} does not exists!")
+        return False
+    path = path if path.endswith('.pdparams') else path + '.pdparams'
+    params = paddle.load(path)
+    state_dict = model.state_dict()
+    new_state_dict = {}
+    for k1, k2 in zip(state_dict.keys(), params.keys()):
+        if list(state_dict[k1].shape) == list(params[k2].shape):
+            new_state_dict[k1] = params[k2]
+        else:
+            print(
+                f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !"
+            )
+    model.set_state_dict(new_state_dict)
+    print(f"load pretrain successful from {path}")
+    return model
 def save_model(model,
               optimizer,

--- a/test/ocr_det_params.txt
+++ b/test/ocr_det_params.txt
 model_name:ocr_det
 python:python3.7
-gpu_list:-1|0|0,1
+gpu_list:0|0,1
-Global.auto_cast:False|True
+Global.auto_cast:null
 Global.epoch_num:10
 Global.save_model_dir:./output/
-Global.save_inference_dir:./output/
 Train.loader.batch_size_per_card:
-Global.use_gpu
+Global.use_gpu:
-Global.pretrained_model
+Global.pretrained_model:null
-trainer:norm|pact|fpgm
+trainer:norm|pact
 norm_train:tools/train.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
 quant_train:deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
 fpgm_train:null
@@ -17,6 +16,8 @@ distill_train:null
 eval:tools/eval.py -c configs/det/det_mv3_db.yml -o 
+Global.save_inference_dir:./output/
+Global.pretrained_model:
 norm_export:tools/export_model.py -c configs/det/det_mv3_db.yml -o 
 quant_export:deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o 
 fpgm_export:deploy/slim/prune/export_prune_model.py
@@ -29,7 +30,6 @@ inference:tools/infer/predict_det.py
 --rec_batch_num:1
 --use_tensorrt:True|False
 --precision:fp32|fp16|int8
--det_model_dir
+--det_model_dir:./inference/ch_ppocr_mobile_v2.0_det_infer/
--image_dir
+--image_dir:./inference/ch_det_data_50/all-sum-510/
--save_log_path
+--save_log_path:./test/output/
--- a/test/prepare.sh
+++ b/test/prepare.sh
@@ -26,8 +26,10 @@ IFS=$'\n'
 # The training params
 model_name=$(func_parser_value "${lines[0]}")
 train_model_list=$(func_parser_value "${lines[0]}")
 trainer_list=$(func_parser_value "${lines[10]}")
 # MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer']
 MODE=$2
 # prepare pretrained weights and dataset 
@@ -62,8 +64,8 @@ else
    rm -rf ./train_data/icdar2015
    wget -nc -P ./train_data https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
    if [ ${model_name} = "ocr_det" ]; then
-        eval_model_name="ch_ppocr_mobile_v2.0_det_train"
+        eval_model_name="ch_ppocr_mobile_v2.0_det_infer"
-        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar
+        wget -nc  -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
        cd ./inference && tar xf ${eval_model_name}.tar && cd ../
    else 
        eval_model_name="ch_ppocr_mobile_v2.0_rec_train"

--- a/test/test.sh
+++ b/test/test.sh
@@ -41,59 +41,51 @@ gpu_list=$(func_parser_value "${lines[2]}")
 autocast_list=$(func_parser_value "${lines[3]}")
 autocast_key=$(func_parser_key "${lines[3]}")
 epoch_key=$(func_parser_key "${lines[4]}")
+epoch_num=$(func_parser_value "${lines[4]}")
 save_model_key=$(func_parser_key "${lines[5]}")
-save_infer_key=$(func_parser_key "${lines[6]}")
+train_batch_key=$(func_parser_key "${lines[6]}")
-train_batch_key=$(func_parser_key "${lines[7]}")
+train_use_gpu_key=$(func_parser_key "${lines[7]}")
-train_use_gpu_key=$(func_parser_key "${lines[8]}")
+pretrain_model_key=$(func_parser_key "${lines[8]}")
-pretrain_model_key=$(func_parser_key "${lines[9]}")
+pretrain_model_value=$(func_parser_value "${lines[8]}")
-trainer_list=$(func_parser_value "${lines[10]}")
+trainer_list=$(func_parser_value "${lines[9]}")
-norm_trainer=$(func_parser_value "${lines[11]}")
+norm_trainer=$(func_parser_value "${lines[10]}")
-pact_trainer=$(func_parser_value "${lines[12]}")
+pact_trainer=$(func_parser_value "${lines[11]}")
-fpgm_trainer=$(func_parser_value "${lines[13]}")
+fpgm_trainer=$(func_parser_value "${lines[12]}")
-distill_trainer=$(func_parser_value "${lines[14]}")
+distill_trainer=$(func_parser_value "${lines[13]}")
-eval_py=$(func_parser_value "${lines[15]}")
+eval_py=$(func_parser_value "${lines[14]}")
-norm_export=$(func_parser_value "${lines[16]}")
-pact_export=$(func_parser_value "${lines[17]}")
+save_infer_key=$(func_parser_key "${lines[15]}")
-fpgm_export=$(func_parser_value "${lines[18]}")
+export_weight=$(func_parser_key "${lines[16]}")
-distill_export=$(func_parser_value "${lines[19]}")
+norm_export=$(func_parser_value "${lines[17]}")
+pact_export=$(func_parser_value "${lines[18]}")
-inference_py=$(func_parser_value "${lines[20]}")
+fpgm_export=$(func_parser_value "${lines[19]}")
-use_gpu_key=$(func_parser_key "${lines[21]}")
+distill_export=$(func_parser_value "${lines[20]}")
-use_gpu_list=$(func_parser_value "${lines[21]}")
-use_mkldnn_key=$(func_parser_key "${lines[22]}")
+inference_py=$(func_parser_value "${lines[21]}")
-use_mkldnn_list=$(func_parser_value "${lines[22]}")
+use_gpu_key=$(func_parser_key "${lines[22]}")
-cpu_threads_key=$(func_parser_key "${lines[23]}")
+use_gpu_list=$(func_parser_value "${lines[22]}")
-cpu_threads_list=$(func_parser_value "${lines[23]}")
+use_mkldnn_key=$(func_parser_key "${lines[23]}")
-batch_size_key=$(func_parser_key "${lines[24]}")
+use_mkldnn_list=$(func_parser_value "${lines[23]}")
-batch_size_list=$(func_parser_value "${lines[24]}")
+cpu_threads_key=$(func_parser_key "${lines[24]}")
-use_trt_key=$(func_parser_key "${lines[25]}")
+cpu_threads_list=$(func_parser_value "${lines[24]}")
-use_trt_list=$(func_parser_value "${lines[25]}")
+batch_size_key=$(func_parser_key "${lines[25]}")
-precision_key=$(func_parser_key "${lines[26]}")
+batch_size_list=$(func_parser_value "${lines[25]}")
-precision_list=$(func_parser_value "${lines[26]}")
+use_trt_key=$(func_parser_key "${lines[26]}")
-model_dir_key=$(func_parser_key "${lines[27]}")
+use_trt_list=$(func_parser_value "${lines[26]}")
-image_dir_key=$(func_parser_key "${lines[28]}")
+precision_key=$(func_parser_key "${lines[27]}")
-save_log_key=$(func_parser_key "${lines[29]}")
+precision_list=$(func_parser_value "${lines[27]}")
+infer_model_key=$(func_parser_key "${lines[28]}")
+infer_model=$(func_parser_value "${lines[28]}")
+image_dir_key=$(func_parser_key "${lines[29]}")
+infer_img_dir=$(func_parser_value "${lines[29]}")
+save_log_key=$(func_parser_key "${lines[30]}")
 LOG_PATH="./test/output"
 mkdir -p ${LOG_PATH}
 status_log="${LOG_PATH}/results.log"
-if [ ${MODE} = "lite_train_infer" ]; then
-    export infer_img_dir="./train_data/icdar2015/text_localization/ch4_test_images/"
-    export epoch_num=10
-elif [ ${MODE} = "whole_infer" ]; then
-    export infer_img_dir="./train_data/icdar2015/text_localization/ch4_test_images/"
-    export epoch_num=10
-elif [ ${MODE} = "whole_train_infer" ]; then
-    export infer_img_dir="./train_data/icdar2015/text_localization/ch4_test_images/"
-    export epoch_num=300
-else
-    export infer_img_dir="./inference/ch_det_data_50/all-sum-510"
-    export infer_model_dir="./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy"
-fi
 function func_inference(){
    IFS='|'
@@ -109,8 +101,8 @@ function func_inference(){
            for use_mkldnn in ${use_mkldnn_list[*]}; do
                for threads in ${cpu_threads_list[*]}; do
                    for batch_size in ${batch_size_list[*]}; do
-                        _save_log_path="${_log_path}/infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}"
+                        _save_log_path="${_log_path}/infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log"
-                        command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${cpu_threads_key}=${threads} ${model_dir_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir}  ${save_log_key}=${_save_log_path}"
+                        command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${cpu_threads_key}=${threads} ${infer_model_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir}  ${save_log_key}=${_save_log_path} --benchmark=True"
                        eval $command
                        status_check $? "${command}" "${status_log}"
                    done
@@ -123,8 +115,8 @@ function func_inference(){
                        continue
                    fi
                    for batch_size in ${batch_size_list[*]}; do
-                        _save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}"
+                        _save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
-                        command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_trt_key}=${use_trt} ${precision_key}=${precision} ${model_dir_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir}  ${save_log_key}=${_save_log_path}"
+                        command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_trt_key}=${use_trt} ${precision_key}=${precision} ${infer_model_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir}  ${save_log_key}=${_save_log_path}  --benchmark=True"
                        eval $command
                        status_check $? "${command}" "${status_log}"
                    done
@@ -144,6 +136,7 @@ for gpu in ${gpu_list[*]}; do
        env=""
    elif [ ${#gpu} -le 1 ];then
        env="export CUDA_VISIBLE_DEVICES=${gpu}"
+        eval ${env}
    elif [ ${#gpu} -le 15 ];then
        IFS=","
        array=(${gpu})
@@ -155,6 +148,7 @@ for gpu in ${gpu_list[*]}; do
        ips=${array[0]}
        gpu=${array[1]}
        IFS="|"
+        env=" "
    fi
    for autocast in ${autocast_list[*]}; do 
        for trainer in ${trainer_list[*]}; do 
@@ -179,13 +173,32 @@ for gpu in ${gpu_list[*]}; do
                continue
            fi
-            save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
+            # not set autocast when autocast is null
-            if [ ${#gpu} -le 2 ];then  # epoch_num #TODO
+            if [ ${autocast} = "null" ]; then
-                cmd="${python} ${run_train} ${train_use_gpu_key}=${use_gpu} ${autocast_key}=${autocast} ${epoch_key}=${epoch_num} ${save_model_key}=${save_log} "
+                set_autocast=" "
-            elif [ ${#gpu} -le 15 ];then
+            else
-                cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${autocast_key}=${autocast} ${epoch_key}=${epoch_num}  ${save_model_key}=${save_log}"
+                set_autocast="${autocast_key}=${autocast}"
+            fi
+            # not set epoch when whole_train_infer
+            if [ ${MODE} != "whole_train_infer" ]; then
+                set_epoch="${epoch_key}=${epoch_num}"
+            else
+                set_epoch=" "
+            fi
+            # set pretrain
+            if [ ${pretrain_model_value} != "null" ]; then
+                set_pretrain="${pretrain_model_key}=${pretrain_model_value}"
            else
-                cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${autocast_key}=${autocast} ${epoch_key}=${epoch_num} ${save_model_key}=${save_log}"
+                set_pretrain=" "
+            fi
+            save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
+            if [ ${#gpu} -le 2 ];then  # train with cpu or single gpu
+                cmd="${python} ${run_train} ${train_use_gpu_key}=${use_gpu}  ${save_model_key}=${save_log} ${set_epoch} ${set_pretrain} ${set_autocast}"
+            elif [ ${#gpu} -le 15 ];then  # train with multi-gpu
+                cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${save_model_key}=${save_log}  ${set_epoch} ${set_pretrain} ${set_autocast}"
+            else     # train with multi-machine
+                cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${save_model_key}=${save_log} ${set_pretrain} ${set_epoch} ${set_autocast}"
            fi
            # run train
            eval $cmd
@@ -198,24 +211,27 @@ for gpu in ${gpu_list[*]}; do
            # run export model
            save_infer_path="${save_log}"
-            export_cmd="${python} ${run_export} ${save_model_key}=${save_log} ${pretrain_model_key}=${save_log}/latest ${save_infer_key}=${save_infer_path}"
+            export_cmd="${python} ${run_export} ${save_model_key}=${save_log} ${export_weight}=${save_log}/latest ${save_infer_key}=${save_infer_path}"
            eval $export_cmd
            status_check $? "${export_cmd}" "${status_log}"
            #run inference
+            eval $env
            save_infer_path="${save_log}"
            func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${infer_img_dir}"
+            eval "unset CUDA_VISIBLE_DEVICES"
        done
    done
 done
 else
-    save_infer_path="${LOG_PATH}/${MODE}"
+    GPUID=$3
-    run_export=${norm_export}
+    if [ ${#GPUID} -le 0 ];then
-    export_cmd="${python} ${run_export} ${save_model_key}=${save_infer_path} ${pretrain_model_key}=${infer_model_dir} ${save_infer_key}=${save_infer_path}"
+        env=" "
-    eval $export_cmd
+    else
-    status_check $? "${export_cmd}" "${status_log}"
+        env="export CUDA_VISIBLE_DEVICES=${GPUID}"
+    fi
+    echo $env
    #run inference
-    func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${infer_img_dir}"
+    func_inference "${python}" "${inference_py}" "${infer_model}" "${LOG_PATH}" "${infer_img_dir}"
 fi
--- a/test1/table/README_ch.md
+++ b/test1/table/README_ch.md
@@ -19,7 +19,29 @@
 ### 2.1 训练
-TBD
+#### 数据准备  
+训练数据使用公开数据集[PubTabNet](https://arxiv.org/abs/1911.10683)，可以从[官网](https://github.com/ibm-aur-nlp/PubTabNet)下载。PubTabNet数据集包含约50万张表格数据的图像，以及图像对应的html格式的注释。
+#### 启动训练  
+*如果您安装的是cpu版本，请将配置文件中的 `use_gpu` 字段修改为false*
+```shell
+# 单机单卡训练
+python3 tools/train.py -c configs/table/table_mv3.yml
+# 单机多卡训练，通过 --gpus 参数设置使用的GPU ID
+python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/table/table_mv3.yml
+```
+上述指令中，通过-c 选择训练使用configs/table/table_mv3.yml配置文件。有关配置文件的详细解释，请参考[链接](./config.md)。
+#### 断点训练
+如果训练程序中断，如果希望加载训练中断的模型从而恢复训练，可以通过指定Global.checkpoints指定要加载的模型路径：
+```shell
+python3 tools/train.py -c configs/table/table_mv3.yml -o Global.checkpoints=./your/trained/model
+```
+**注意**：`Global.checkpoints`的优先级高于`Global.pretrain_weights`的优先级，即同时指定两个参数时，优先加载`Global.checkpoints`指定的模型，如果`Global.checkpoints`指定的模型路径有误，会加载`Global.pretrain_weights`指定的模型。
 ### 2.2 评估
 先cd到PaddleOCR/ppstructure目录下

--- a/tools/eval.py
+++ b/tools/eval.py
@@ -27,7 +27,7 @@ from ppocr.data import build_dataloader
 from ppocr.modeling.architectures import build_model
 from ppocr.postprocess import build_post_process
 from ppocr.metrics import build_metric
-from ppocr.utils.save_load import init_model
+from ppocr.utils.save_load import init_model, load_pretrained_params
 from ppocr.utils.utility import print_dict
 import tools.program as program
@@ -55,7 +55,10 @@ def main():
    model = build_model(config['Architecture'])
    use_srn = config['Architecture']['algorithm'] == "SRN"
-    model_type = config['Architecture']['model_type']
+    if "model_type" in config['Architecture'].keys():
+        model_type = config['Architecture']['model_type']
+    else:
+        model_type = None
    best_model_dict = init_model(config, model)
    if len(best_model_dict):
@@ -68,7 +71,7 @@ def main():
    # start eval
    metric = program.eval(model, valid_dataloader, post_process_class,
-                          eval_class, model_type, use_srn)
+                        eval_class, model_type, use_srn)
    logger.info('metric eval ***************')
    for k, v in metric.items():
        logger.info('{}:{}'.format(k, v))

--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -106,7 +106,7 @@ class TextDetector(object):
                model_precision=args.precision,
                batch_size=1,
                data_shape="dynamic",
-                save_path="./output/auto_log.lpg",
+                save_path=args.save_log_path,
                inference_config=self.config,
                pids=pid,
                process_name=None,
@@ -174,7 +174,7 @@ class TextDetector(object):
        data = {'image': img}
        st = time.time()
        if self.args.benchmark:
            self.autolog.times.start()
@@ -212,7 +212,7 @@ class TextDetector(object):
        else:
            raise NotImplementedError
-        self.predictor.try_shrink_memory()
+        #self.predictor.try_shrink_memory()
        post_result = self.postprocess_op(preds, shape_list)
        dt_boxes = post_result[0]['points']
        if self.det_algorithm == "SAST" and self.det_sast_polygon:
@@ -262,7 +262,6 @@ if __name__ == "__main__":
                                "det_res_{}".format(img_name_pure))
        cv2.imwrite(img_path, src_im)
        logger.info("The visualized image saved in {}".format(img_path))
    if args.benchmark:
        text_detector.autolog.report()
--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -174,8 +174,6 @@ def main(args):
    logger.info("The predict total time is {}".format(time.time() - _st))
    logger.info("\nThe predict total time is {}".format(total_time))
-    img_num = text_sys.text_detector.det_times.img_num
 if __name__ == "__main__":
    args = utility.parse_args()

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -37,7 +37,7 @@ def init_args():
    parser.add_argument("--use_gpu", type=str2bool, default=True)
    parser.add_argument("--ir_optim", type=str2bool, default=True)
    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
-    parser.add_argument("--min_subgraph_size", type=int, default=3)
+    parser.add_argument("--min_subgraph_size", type=int, default=10)
    parser.add_argument("--precision", type=str, default="fp32")
    parser.add_argument("--gpu_mem", type=int, default=500)
@@ -164,7 +164,7 @@ def create_predictor(args, mode, logger):
        config.enable_use_gpu(args.gpu_mem, 0)
        if args.use_tensorrt:
            config.enable_tensorrt_engine(
-                precision_mode=inference.PrecisionType.Float32,
+                precision_mode=precision,
                max_batch_size=args.max_batch_size,
                min_subgraph_size=args.min_subgraph_size)
            # skip the minmum trt subgraph
@@ -176,6 +176,7 @@ def create_predictor(args, mode, logger):
                "conv2d_59.tmp_0": [1, 96, 20, 20],
                "nearest_interp_v2_1.tmp_0": [1, 96, 10, 10],
                "nearest_interp_v2_2.tmp_0": [1, 96, 20, 20],
+                "conv2d_124.tmp_0": [1, 96, 20, 20],
                "nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
                "nearest_interp_v2_4.tmp_0": [1, 24, 20, 20],
                "nearest_interp_v2_5.tmp_0": [1, 24, 20, 20],
@@ -188,6 +189,7 @@ def create_predictor(args, mode, logger):
                "conv2d_91.tmp_0": [1, 96, 200, 200],
                "conv2d_59.tmp_0": [1, 96, 400, 400],
                "nearest_interp_v2_1.tmp_0": [1, 96, 200, 200],
+                "conv2d_124.tmp_0": [1, 256, 400, 400],
                "nearest_interp_v2_2.tmp_0": [1, 96, 400, 400],
                "nearest_interp_v2_3.tmp_0": [1, 24, 400, 400],
                "nearest_interp_v2_4.tmp_0": [1, 24, 400, 400],
@@ -202,6 +204,7 @@ def create_predictor(args, mode, logger):
                "conv2d_59.tmp_0": [1, 96, 160, 160],
                "nearest_interp_v2_1.tmp_0": [1, 96, 80, 80],
                "nearest_interp_v2_2.tmp_0": [1, 96, 160, 160],
+                "conv2d_124.tmp_0": [1, 256, 160, 160],
                "nearest_interp_v2_3.tmp_0": [1, 24, 160, 160],
                "nearest_interp_v2_4.tmp_0": [1, 24, 160, 160],
                "nearest_interp_v2_5.tmp_0": [1, 24, 160, 160],
@@ -237,7 +240,7 @@ def create_predictor(args, mode, logger):
    # enable memory optim
    config.enable_memory_optim()
-    config.disable_glog_info()
+    #config.disable_glog_info()
    config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
    if mode == 'table':

--- a/tools/program.py
+++ b/tools/program.py
@@ -186,7 +186,10 @@ def train(config,
    model.train()
    use_srn = config['Architecture']['algorithm'] == "SRN"
-    model_type = config['Architecture']['model_type']
+    try: 
+        model_type = config['Architecture']['model_type']
+    except: 
+        model_type = None
    if 'start_epoch' in best_model_dict:
        start_epoch = best_model_dict['start_epoch']

--- a/tools/train.py
+++ b/tools/train.py
@@ -98,7 +98,6 @@ def main(config, device, logger, vdl_writer):
    eval_class = build_metric(config['Metric'])
    # load pretrain model
    pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer)
    logger.info('train dataloader has {} iters'.format(len(train_dataloader)))
    if valid_dataloader is not None:
        logger.info('valid dataloader has {} iters'.format(