Unverified Commit 8ad4e307 authored by MissPenguin's avatar MissPenguin Committed by GitHub
Browse files

Merge branch 'dygraph' into dygraph

parents b1d31f8a 2062b509
...@@ -21,7 +21,7 @@ from ppocr.modeling.backbones import build_backbone ...@@ -21,7 +21,7 @@ from ppocr.modeling.backbones import build_backbone
from ppocr.modeling.necks import build_neck from ppocr.modeling.necks import build_neck
from ppocr.modeling.heads import build_head from ppocr.modeling.heads import build_head
from .base_model import BaseModel from .base_model import BaseModel
from ppocr.utils.save_load import init_model from ppocr.utils.save_load import init_model, load_pretrained_params
__all__ = ['DistillationModel'] __all__ = ['DistillationModel']
...@@ -46,7 +46,7 @@ class DistillationModel(nn.Layer): ...@@ -46,7 +46,7 @@ class DistillationModel(nn.Layer):
pretrained = model_config.pop("pretrained") pretrained = model_config.pop("pretrained")
model = BaseModel(model_config) model = BaseModel(model_config)
if pretrained is not None: if pretrained is not None:
init_model(model, path=pretrained) model = load_pretrained_params(model, pretrained)
if freeze_params: if freeze_params:
for param in model.parameters(): for param in model.parameters():
param.trainable = False param.trainable = False
......
...@@ -12,33 +12,36 @@ ...@@ -12,33 +12,36 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = ['build_backbone'] __all__ = ["build_backbone"]
def build_backbone(config, model_type): def build_backbone(config, model_type):
if model_type == 'det': if model_type == "det":
from .det_mobilenet_v3 import MobileNetV3 from .det_mobilenet_v3 import MobileNetV3
from .det_resnet_vd import ResNet from .det_resnet_vd import ResNet
from .det_resnet_vd_sast import ResNet_SAST from .det_resnet_vd_sast import ResNet_SAST
support_dict = ['MobileNetV3', 'ResNet', 'ResNet_SAST'] support_dict = ["MobileNetV3", "ResNet", "ResNet_SAST"]
elif model_type == 'rec' or model_type == 'cls': elif model_type == "rec" or model_type == "cls":
from .rec_mobilenet_v3 import MobileNetV3 from .rec_mobilenet_v3 import MobileNetV3
from .rec_resnet_vd import ResNet from .rec_resnet_vd import ResNet
from .rec_resnet_fpn import ResNetFPN from .rec_resnet_fpn import ResNetFPN
support_dict = ['MobileNetV3', 'ResNet', 'ResNetFPN'] from .rec_mv1_enhance import MobileNetV1Enhance
elif model_type == 'e2e': support_dict = [
"MobileNetV1Enhance", "MobileNetV3", "ResNet", "ResNetFPN"
]
elif model_type == "e2e":
from .e2e_resnet_vd_pg import ResNet from .e2e_resnet_vd_pg import ResNet
support_dict = ['ResNet'] support_dict = ["ResNet"]
elif model_type == "table": elif model_type == "table":
from .table_resnet_vd import ResNet from .table_resnet_vd import ResNet
from .table_mobilenet_v3 import MobileNetV3 from .table_mobilenet_v3 import MobileNetV3
support_dict = ['ResNet', 'MobileNetV3'] support_dict = ["ResNet", "MobileNetV3"]
else: else:
raise NotImplementedError raise NotImplementedError
module_name = config.pop('name') module_name = config.pop("name")
assert module_name in support_dict, Exception( assert module_name in support_dict, Exception(
'when model typs is {}, backbone only support {}'.format(model_type, "when model typs is {}, backbone only support {}".format(model_type,
support_dict)) support_dict))
module_class = eval(module_name)(**config) module_class = eval(module_name)(**config)
return module_class return module_class
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import KaimingNormal
import math
import numpy as np
import paddle
from paddle import ParamAttr, reshape, transpose, concat, split
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import Conv2D, BatchNorm, Linear, Dropout
from paddle.nn import AdaptiveAvgPool2D, MaxPool2D, AvgPool2D
from paddle.nn.initializer import KaimingNormal
import math
from paddle.nn.functional import hardswish, hardsigmoid
from paddle.regularizer import L2Decay
class ConvBNLayer(nn.Layer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
act='hard_swish'):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
weight_attr=ParamAttr(initializer=KaimingNormal()),
bias_attr=False)
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(regularizer=L2Decay(0.0)),
bias_attr=ParamAttr(regularizer=L2Decay(0.0)))
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class DepthwiseSeparable(nn.Layer):
def __init__(self,
num_channels,
num_filters1,
num_filters2,
num_groups,
stride,
scale,
dw_size=3,
padding=1,
use_se=False):
super(DepthwiseSeparable, self).__init__()
self.use_se = use_se
self._depthwise_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=int(num_filters1 * scale),
filter_size=dw_size,
stride=stride,
padding=padding,
num_groups=int(num_groups * scale))
if use_se:
self._se = SEModule(int(num_filters1 * scale))
self._pointwise_conv = ConvBNLayer(
num_channels=int(num_filters1 * scale),
filter_size=1,
num_filters=int(num_filters2 * scale),
stride=1,
padding=0)
def forward(self, inputs):
y = self._depthwise_conv(inputs)
if self.use_se:
y = self._se(y)
y = self._pointwise_conv(y)
return y
class MobileNetV1Enhance(nn.Layer):
def __init__(self, in_channels=3, scale=0.5, **kwargs):
super().__init__()
self.scale = scale
self.block_list = []
self.conv1 = ConvBNLayer(
num_channels=3,
filter_size=3,
channels=3,
num_filters=int(32 * scale),
stride=2,
padding=1)
conv2_1 = DepthwiseSeparable(
num_channels=int(32 * scale),
num_filters1=32,
num_filters2=64,
num_groups=32,
stride=1,
scale=scale)
self.block_list.append(conv2_1)
conv2_2 = DepthwiseSeparable(
num_channels=int(64 * scale),
num_filters1=64,
num_filters2=128,
num_groups=64,
stride=1,
scale=scale)
self.block_list.append(conv2_2)
conv3_1 = DepthwiseSeparable(
num_channels=int(128 * scale),
num_filters1=128,
num_filters2=128,
num_groups=128,
stride=1,
scale=scale)
self.block_list.append(conv3_1)
conv3_2 = DepthwiseSeparable(
num_channels=int(128 * scale),
num_filters1=128,
num_filters2=256,
num_groups=128,
stride=(2, 1),
scale=scale)
self.block_list.append(conv3_2)
conv4_1 = DepthwiseSeparable(
num_channels=int(256 * scale),
num_filters1=256,
num_filters2=256,
num_groups=256,
stride=1,
scale=scale)
self.block_list.append(conv4_1)
conv4_2 = DepthwiseSeparable(
num_channels=int(256 * scale),
num_filters1=256,
num_filters2=512,
num_groups=256,
stride=(2, 1),
scale=scale)
self.block_list.append(conv4_2)
for _ in range(5):
conv5 = DepthwiseSeparable(
num_channels=int(512 * scale),
num_filters1=512,
num_filters2=512,
num_groups=512,
stride=1,
dw_size=5,
padding=2,
scale=scale,
use_se=False)
self.block_list.append(conv5)
conv5_6 = DepthwiseSeparable(
num_channels=int(512 * scale),
num_filters1=512,
num_filters2=1024,
num_groups=512,
stride=(2, 1),
dw_size=5,
padding=2,
scale=scale,
use_se=True)
self.block_list.append(conv5_6)
conv6 = DepthwiseSeparable(
num_channels=int(1024 * scale),
num_filters1=1024,
num_filters2=1024,
num_groups=1024,
stride=1,
dw_size=5,
padding=2,
use_se=True,
scale=scale)
self.block_list.append(conv6)
self.block_list = nn.Sequential(*self.block_list)
self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
self.out_channels = int(1024 * scale)
def forward(self, inputs):
y = self.conv1(inputs)
y = self.block_list(y)
y = self.pool(y)
return y
class SEModule(nn.Layer):
def __init__(self, channel, reduction=4):
super(SEModule, self).__init__()
self.avg_pool = AdaptiveAvgPool2D(1)
self.conv1 = Conv2D(
in_channels=channel,
out_channels=channel // reduction,
kernel_size=1,
stride=1,
padding=0,
weight_attr=ParamAttr(),
bias_attr=ParamAttr())
self.conv2 = Conv2D(
in_channels=channel // reduction,
out_channels=channel,
kernel_size=1,
stride=1,
padding=0,
weight_attr=ParamAttr(),
bias_attr=ParamAttr())
def forward(self, inputs):
outputs = self.avg_pool(inputs)
outputs = self.conv1(outputs)
outputs = F.relu(outputs)
outputs = self.conv2(outputs)
outputs = hardsigmoid(outputs)
return paddle.multiply(x=inputs, y=outputs)
...@@ -21,7 +21,7 @@ import copy ...@@ -21,7 +21,7 @@ import copy
__all__ = ['build_post_process'] __all__ = ['build_post_process']
from .db_postprocess import DBPostProcess from .db_postprocess import DBPostProcess, DistillationDBPostProcess
from .east_postprocess import EASTPostProcess from .east_postprocess import EASTPostProcess
from .sast_postprocess import SASTPostProcess from .sast_postprocess import SASTPostProcess
from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, \ from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, \
...@@ -34,7 +34,8 @@ def build_post_process(config, global_config=None): ...@@ -34,7 +34,8 @@ def build_post_process(config, global_config=None):
support_dict = [ support_dict = [
'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode', 'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode',
'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess', 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess',
'DistillationCTCLabelDecode', 'TableLabelDecode' 'DistillationCTCLabelDecode', 'TableLabelDecode',
'DistillationDBPostProcess'
] ]
config = copy.deepcopy(config) config = copy.deepcopy(config)
......
...@@ -187,3 +187,29 @@ class DBPostProcess(object): ...@@ -187,3 +187,29 @@ class DBPostProcess(object):
boxes_batch.append({'points': boxes}) boxes_batch.append({'points': boxes})
return boxes_batch return boxes_batch
class DistillationDBPostProcess(object):
def __init__(self, model_name=["student"],
key=None,
thresh=0.3,
box_thresh=0.6,
max_candidates=1000,
unclip_ratio=1.5,
use_dilation=False,
score_mode="fast",
**kwargs):
self.model_name = model_name
self.key = key
self.post_process = DBPostProcess(thresh=thresh,
box_thresh=box_thresh,
max_candidates=max_candidates,
unclip_ratio=unclip_ratio,
use_dilation=use_dilation,
score_mode=score_mode)
def __call__(self, predicts, shape_list):
results = {}
for k in self.model_name:
results[k] = self.post_process(predicts[k], shape_list=shape_list)
return results
...@@ -91,14 +91,14 @@ def init_model(config, model, optimizer=None, lr_scheduler=None): ...@@ -91,14 +91,14 @@ def init_model(config, model, optimizer=None, lr_scheduler=None):
def load_dygraph_params(config, model, logger, optimizer): def load_dygraph_params(config, model, logger, optimizer):
ckp = config['Global']['checkpoints'] ckp = config['Global']['checkpoints']
if ckp and os.path.exists(ckp): if ckp and os.path.exists(ckp + ".pdparams"):
pre_best_model_dict = init_model(config, model, optimizer) pre_best_model_dict = init_model(config, model, optimizer)
return pre_best_model_dict return pre_best_model_dict
else: else:
pm = config['Global']['pretrained_model'] pm = config['Global']['pretrained_model']
if pm is None: if pm is None:
return {} return {}
if not os.path.exists(pm) or not os.path.exists(pm + ".pdparams"): if not os.path.exists(pm) and not os.path.exists(pm + ".pdparams"):
logger.info(f"The pretrained_model {pm} does not exists!") logger.info(f"The pretrained_model {pm} does not exists!")
return {} return {}
pm = pm if pm.endswith('.pdparams') else pm + '.pdparams' pm = pm if pm.endswith('.pdparams') else pm + '.pdparams'
...@@ -116,6 +116,27 @@ def load_dygraph_params(config, model, logger, optimizer): ...@@ -116,6 +116,27 @@ def load_dygraph_params(config, model, logger, optimizer):
logger.info(f"loaded pretrained_model successful from {pm}") logger.info(f"loaded pretrained_model successful from {pm}")
return {} return {}
def load_pretrained_params(model, path):
if path is None:
return False
if not os.path.exists(path) and not os.path.exists(path + ".pdparams"):
print(f"The pretrained_model {path} does not exists!")
return False
path = path if path.endswith('.pdparams') else path + '.pdparams'
params = paddle.load(path)
state_dict = model.state_dict()
new_state_dict = {}
for k1, k2 in zip(state_dict.keys(), params.keys()):
if list(state_dict[k1].shape) == list(params[k2].shape):
new_state_dict[k1] = params[k2]
else:
print(
f"The shape of model params {k1} {state_dict[k1].shape} not matched with loaded params {k2} {params[k2].shape} !"
)
model.set_state_dict(new_state_dict)
print(f"load pretrain successful from {path}")
return model
def save_model(model, def save_model(model,
optimizer, optimizer,
......
model_name:ocr_det model_name:ocr_det
python:python3.7 python:python3.7
gpu_list:-1|0|0,1 gpu_list:0|0,1
Global.auto_cast:False|True Global.auto_cast:null
Global.epoch_num:10 Global.epoch_num:10
Global.save_model_dir:./output/ Global.save_model_dir:./output/
Global.save_inference_dir:./output/
Train.loader.batch_size_per_card: Train.loader.batch_size_per_card:
Global.use_gpu Global.use_gpu:
Global.pretrained_model Global.pretrained_model:null
trainer:norm|pact|fpgm trainer:norm|pact
norm_train:tools/train.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained norm_train:tools/train.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/MobileNetV3_large_x0_5_pretrained
quant_train:deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy quant_train:deploy/slim/quantization/quant.py -c configs/det/det_mv3_db.yml -o Global.pretrained_model=./pretrain_models/det_mv3_db_v2.0_train/best_accuracy
fpgm_train:null fpgm_train:null
...@@ -17,6 +16,8 @@ distill_train:null ...@@ -17,6 +16,8 @@ distill_train:null
eval:tools/eval.py -c configs/det/det_mv3_db.yml -o eval:tools/eval.py -c configs/det/det_mv3_db.yml -o
Global.save_inference_dir:./output/
Global.pretrained_model:
norm_export:tools/export_model.py -c configs/det/det_mv3_db.yml -o norm_export:tools/export_model.py -c configs/det/det_mv3_db.yml -o
quant_export:deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o quant_export:deploy/slim/quantization/export_model.py -c configs/det/det_mv3_db.yml -o
fpgm_export:deploy/slim/prune/export_prune_model.py fpgm_export:deploy/slim/prune/export_prune_model.py
...@@ -29,7 +30,6 @@ inference:tools/infer/predict_det.py ...@@ -29,7 +30,6 @@ inference:tools/infer/predict_det.py
--rec_batch_num:1 --rec_batch_num:1
--use_tensorrt:True|False --use_tensorrt:True|False
--precision:fp32|fp16|int8 --precision:fp32|fp16|int8
--det_model_dir --det_model_dir:./inference/ch_ppocr_mobile_v2.0_det_infer/
--image_dir --image_dir:./inference/ch_det_data_50/all-sum-510/
--save_log_path --save_log_path:./test/output/
...@@ -26,8 +26,10 @@ IFS=$'\n' ...@@ -26,8 +26,10 @@ IFS=$'\n'
# The training params # The training params
model_name=$(func_parser_value "${lines[0]}") model_name=$(func_parser_value "${lines[0]}")
train_model_list=$(func_parser_value "${lines[0]}") train_model_list=$(func_parser_value "${lines[0]}")
trainer_list=$(func_parser_value "${lines[10]}") trainer_list=$(func_parser_value "${lines[10]}")
# MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer'] # MODE be one of ['lite_train_infer' 'whole_infer' 'whole_train_infer']
MODE=$2 MODE=$2
# prepare pretrained weights and dataset # prepare pretrained weights and dataset
...@@ -62,8 +64,8 @@ else ...@@ -62,8 +64,8 @@ else
rm -rf ./train_data/icdar2015 rm -rf ./train_data/icdar2015
wget -nc -P ./train_data https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar wget -nc -P ./train_data https://paddleocr.bj.bcebos.com/dygraph_v2.0/test/ch_det_data_50.tar
if [ ${model_name} = "ocr_det" ]; then if [ ${model_name} = "ocr_det" ]; then
eval_model_name="ch_ppocr_mobile_v2.0_det_train" eval_model_name="ch_ppocr_mobile_v2.0_det_infer"
wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar wget -nc -P ./inference https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar
cd ./inference && tar xf ${eval_model_name}.tar && cd ../ cd ./inference && tar xf ${eval_model_name}.tar && cd ../
else else
eval_model_name="ch_ppocr_mobile_v2.0_rec_train" eval_model_name="ch_ppocr_mobile_v2.0_rec_train"
......
...@@ -41,59 +41,51 @@ gpu_list=$(func_parser_value "${lines[2]}") ...@@ -41,59 +41,51 @@ gpu_list=$(func_parser_value "${lines[2]}")
autocast_list=$(func_parser_value "${lines[3]}") autocast_list=$(func_parser_value "${lines[3]}")
autocast_key=$(func_parser_key "${lines[3]}") autocast_key=$(func_parser_key "${lines[3]}")
epoch_key=$(func_parser_key "${lines[4]}") epoch_key=$(func_parser_key "${lines[4]}")
epoch_num=$(func_parser_value "${lines[4]}")
save_model_key=$(func_parser_key "${lines[5]}") save_model_key=$(func_parser_key "${lines[5]}")
save_infer_key=$(func_parser_key "${lines[6]}") train_batch_key=$(func_parser_key "${lines[6]}")
train_batch_key=$(func_parser_key "${lines[7]}") train_use_gpu_key=$(func_parser_key "${lines[7]}")
train_use_gpu_key=$(func_parser_key "${lines[8]}") pretrain_model_key=$(func_parser_key "${lines[8]}")
pretrain_model_key=$(func_parser_key "${lines[9]}") pretrain_model_value=$(func_parser_value "${lines[8]}")
trainer_list=$(func_parser_value "${lines[10]}") trainer_list=$(func_parser_value "${lines[9]}")
norm_trainer=$(func_parser_value "${lines[11]}") norm_trainer=$(func_parser_value "${lines[10]}")
pact_trainer=$(func_parser_value "${lines[12]}") pact_trainer=$(func_parser_value "${lines[11]}")
fpgm_trainer=$(func_parser_value "${lines[13]}") fpgm_trainer=$(func_parser_value "${lines[12]}")
distill_trainer=$(func_parser_value "${lines[14]}") distill_trainer=$(func_parser_value "${lines[13]}")
eval_py=$(func_parser_value "${lines[15]}") eval_py=$(func_parser_value "${lines[14]}")
norm_export=$(func_parser_value "${lines[16]}")
pact_export=$(func_parser_value "${lines[17]}") save_infer_key=$(func_parser_key "${lines[15]}")
fpgm_export=$(func_parser_value "${lines[18]}") export_weight=$(func_parser_key "${lines[16]}")
distill_export=$(func_parser_value "${lines[19]}") norm_export=$(func_parser_value "${lines[17]}")
pact_export=$(func_parser_value "${lines[18]}")
inference_py=$(func_parser_value "${lines[20]}") fpgm_export=$(func_parser_value "${lines[19]}")
use_gpu_key=$(func_parser_key "${lines[21]}") distill_export=$(func_parser_value "${lines[20]}")
use_gpu_list=$(func_parser_value "${lines[21]}")
use_mkldnn_key=$(func_parser_key "${lines[22]}") inference_py=$(func_parser_value "${lines[21]}")
use_mkldnn_list=$(func_parser_value "${lines[22]}") use_gpu_key=$(func_parser_key "${lines[22]}")
cpu_threads_key=$(func_parser_key "${lines[23]}") use_gpu_list=$(func_parser_value "${lines[22]}")
cpu_threads_list=$(func_parser_value "${lines[23]}") use_mkldnn_key=$(func_parser_key "${lines[23]}")
batch_size_key=$(func_parser_key "${lines[24]}") use_mkldnn_list=$(func_parser_value "${lines[23]}")
batch_size_list=$(func_parser_value "${lines[24]}") cpu_threads_key=$(func_parser_key "${lines[24]}")
use_trt_key=$(func_parser_key "${lines[25]}") cpu_threads_list=$(func_parser_value "${lines[24]}")
use_trt_list=$(func_parser_value "${lines[25]}") batch_size_key=$(func_parser_key "${lines[25]}")
precision_key=$(func_parser_key "${lines[26]}") batch_size_list=$(func_parser_value "${lines[25]}")
precision_list=$(func_parser_value "${lines[26]}") use_trt_key=$(func_parser_key "${lines[26]}")
model_dir_key=$(func_parser_key "${lines[27]}") use_trt_list=$(func_parser_value "${lines[26]}")
image_dir_key=$(func_parser_key "${lines[28]}") precision_key=$(func_parser_key "${lines[27]}")
save_log_key=$(func_parser_key "${lines[29]}") precision_list=$(func_parser_value "${lines[27]}")
infer_model_key=$(func_parser_key "${lines[28]}")
infer_model=$(func_parser_value "${lines[28]}")
image_dir_key=$(func_parser_key "${lines[29]}")
infer_img_dir=$(func_parser_value "${lines[29]}")
save_log_key=$(func_parser_key "${lines[30]}")
LOG_PATH="./test/output" LOG_PATH="./test/output"
mkdir -p ${LOG_PATH} mkdir -p ${LOG_PATH}
status_log="${LOG_PATH}/results.log" status_log="${LOG_PATH}/results.log"
if [ ${MODE} = "lite_train_infer" ]; then
export infer_img_dir="./train_data/icdar2015/text_localization/ch4_test_images/"
export epoch_num=10
elif [ ${MODE} = "whole_infer" ]; then
export infer_img_dir="./train_data/icdar2015/text_localization/ch4_test_images/"
export epoch_num=10
elif [ ${MODE} = "whole_train_infer" ]; then
export infer_img_dir="./train_data/icdar2015/text_localization/ch4_test_images/"
export epoch_num=300
else
export infer_img_dir="./inference/ch_det_data_50/all-sum-510"
export infer_model_dir="./inference/ch_ppocr_mobile_v2.0_det_train/best_accuracy"
fi
function func_inference(){ function func_inference(){
IFS='|' IFS='|'
...@@ -109,8 +101,8 @@ function func_inference(){ ...@@ -109,8 +101,8 @@ function func_inference(){
for use_mkldnn in ${use_mkldnn_list[*]}; do for use_mkldnn in ${use_mkldnn_list[*]}; do
for threads in ${cpu_threads_list[*]}; do for threads in ${cpu_threads_list[*]}; do
for batch_size in ${batch_size_list[*]}; do for batch_size in ${batch_size_list[*]}; do
_save_log_path="${_log_path}/infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}" _save_log_path="${_log_path}/infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_batchsize_${batch_size}.log"
command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${cpu_threads_key}=${threads} ${model_dir_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir} ${save_log_key}=${_save_log_path}" command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${cpu_threads_key}=${threads} ${infer_model_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir} ${save_log_key}=${_save_log_path} --benchmark=True"
eval $command eval $command
status_check $? "${command}" "${status_log}" status_check $? "${command}" "${status_log}"
done done
...@@ -123,8 +115,8 @@ function func_inference(){ ...@@ -123,8 +115,8 @@ function func_inference(){
continue continue
fi fi
for batch_size in ${batch_size_list[*]}; do for batch_size in ${batch_size_list[*]}; do
_save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}" _save_log_path="${_log_path}/infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_trt_key}=${use_trt} ${precision_key}=${precision} ${model_dir_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir} ${save_log_key}=${_save_log_path}" command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_trt_key}=${use_trt} ${precision_key}=${precision} ${infer_model_key}=${_model_dir} ${batch_size_key}=${batch_size} ${image_dir_key}=${_img_dir} ${save_log_key}=${_save_log_path} --benchmark=True"
eval $command eval $command
status_check $? "${command}" "${status_log}" status_check $? "${command}" "${status_log}"
done done
...@@ -144,6 +136,7 @@ for gpu in ${gpu_list[*]}; do ...@@ -144,6 +136,7 @@ for gpu in ${gpu_list[*]}; do
env="" env=""
elif [ ${#gpu} -le 1 ];then elif [ ${#gpu} -le 1 ];then
env="export CUDA_VISIBLE_DEVICES=${gpu}" env="export CUDA_VISIBLE_DEVICES=${gpu}"
eval ${env}
elif [ ${#gpu} -le 15 ];then elif [ ${#gpu} -le 15 ];then
IFS="," IFS=","
array=(${gpu}) array=(${gpu})
...@@ -155,6 +148,7 @@ for gpu in ${gpu_list[*]}; do ...@@ -155,6 +148,7 @@ for gpu in ${gpu_list[*]}; do
ips=${array[0]} ips=${array[0]}
gpu=${array[1]} gpu=${array[1]}
IFS="|" IFS="|"
env=" "
fi fi
for autocast in ${autocast_list[*]}; do for autocast in ${autocast_list[*]}; do
for trainer in ${trainer_list[*]}; do for trainer in ${trainer_list[*]}; do
...@@ -179,13 +173,32 @@ for gpu in ${gpu_list[*]}; do ...@@ -179,13 +173,32 @@ for gpu in ${gpu_list[*]}; do
continue continue
fi fi
save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}" # not set autocast when autocast is null
if [ ${#gpu} -le 2 ];then # epoch_num #TODO if [ ${autocast} = "null" ]; then
cmd="${python} ${run_train} ${train_use_gpu_key}=${use_gpu} ${autocast_key}=${autocast} ${epoch_key}=${epoch_num} ${save_model_key}=${save_log} " set_autocast=" "
elif [ ${#gpu} -le 15 ];then
cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${autocast_key}=${autocast} ${epoch_key}=${epoch_num} ${save_model_key}=${save_log}"
else else
cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${autocast_key}=${autocast} ${epoch_key}=${epoch_num} ${save_model_key}=${save_log}" set_autocast="${autocast_key}=${autocast}"
fi
# not set epoch when whole_train_infer
if [ ${MODE} != "whole_train_infer" ]; then
set_epoch="${epoch_key}=${epoch_num}"
else
set_epoch=" "
fi
# set pretrain
if [ ${pretrain_model_value} != "null" ]; then
set_pretrain="${pretrain_model_key}=${pretrain_model_value}"
else
set_pretrain=" "
fi
save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
if [ ${#gpu} -le 2 ];then # train with cpu or single gpu
cmd="${python} ${run_train} ${train_use_gpu_key}=${use_gpu} ${save_model_key}=${save_log} ${set_epoch} ${set_pretrain} ${set_autocast}"
elif [ ${#gpu} -le 15 ];then # train with multi-gpu
cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${save_model_key}=${save_log} ${set_epoch} ${set_pretrain} ${set_autocast}"
else # train with multi-machine
cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${save_model_key}=${save_log} ${set_pretrain} ${set_epoch} ${set_autocast}"
fi fi
# run train # run train
eval $cmd eval $cmd
...@@ -198,24 +211,27 @@ for gpu in ${gpu_list[*]}; do ...@@ -198,24 +211,27 @@ for gpu in ${gpu_list[*]}; do
# run export model # run export model
save_infer_path="${save_log}" save_infer_path="${save_log}"
export_cmd="${python} ${run_export} ${save_model_key}=${save_log} ${pretrain_model_key}=${save_log}/latest ${save_infer_key}=${save_infer_path}" export_cmd="${python} ${run_export} ${save_model_key}=${save_log} ${export_weight}=${save_log}/latest ${save_infer_key}=${save_infer_path}"
eval $export_cmd eval $export_cmd
status_check $? "${export_cmd}" "${status_log}" status_check $? "${export_cmd}" "${status_log}"
#run inference #run inference
eval $env
save_infer_path="${save_log}" save_infer_path="${save_log}"
func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${infer_img_dir}" func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${infer_img_dir}"
eval "unset CUDA_VISIBLE_DEVICES"
done done
done done
done done
else else
save_infer_path="${LOG_PATH}/${MODE}" GPUID=$3
run_export=${norm_export} if [ ${#GPUID} -le 0 ];then
export_cmd="${python} ${run_export} ${save_model_key}=${save_infer_path} ${pretrain_model_key}=${infer_model_dir} ${save_infer_key}=${save_infer_path}" env=" "
eval $export_cmd else
status_check $? "${export_cmd}" "${status_log}" env="export CUDA_VISIBLE_DEVICES=${GPUID}"
fi
echo $env
#run inference #run inference
func_inference "${python}" "${inference_py}" "${save_infer_path}" "${LOG_PATH}" "${infer_img_dir}" func_inference "${python}" "${inference_py}" "${infer_model}" "${LOG_PATH}" "${infer_img_dir}"
fi fi
...@@ -19,7 +19,29 @@ ...@@ -19,7 +19,29 @@
### 2.1 训练 ### 2.1 训练
TBD #### 数据准备
训练数据使用公开数据集[PubTabNet](https://arxiv.org/abs/1911.10683),可以从[官网](https://github.com/ibm-aur-nlp/PubTabNet)下载。PubTabNet数据集包含约50万张表格数据的图像,以及图像对应的html格式的注释。
#### 启动训练
*如果您安装的是cpu版本,请将配置文件中的 `use_gpu` 字段修改为false*
```shell
# 单机单卡训练
python3 tools/train.py -c configs/table/table_mv3.yml
# 单机多卡训练,通过 --gpus 参数设置使用的GPU ID
python3 -m paddle.distributed.launch --gpus '0,1,2,3' tools/train.py -c configs/table/table_mv3.yml
```
上述指令中,通过-c 选择训练使用configs/table/table_mv3.yml配置文件。有关配置文件的详细解释,请参考[链接](./config.md)
#### 断点训练
如果训练程序中断,如果希望加载训练中断的模型从而恢复训练,可以通过指定Global.checkpoints指定要加载的模型路径:
```shell
python3 tools/train.py -c configs/table/table_mv3.yml -o Global.checkpoints=./your/trained/model
```
**注意**`Global.checkpoints`的优先级高于`Global.pretrain_weights`的优先级,即同时指定两个参数时,优先加载`Global.checkpoints`指定的模型,如果`Global.checkpoints`指定的模型路径有误,会加载`Global.pretrain_weights`指定的模型。
### 2.2 评估 ### 2.2 评估
先cd到PaddleOCR/ppstructure目录下 先cd到PaddleOCR/ppstructure目录下
......
...@@ -27,7 +27,7 @@ from ppocr.data import build_dataloader ...@@ -27,7 +27,7 @@ from ppocr.data import build_dataloader
from ppocr.modeling.architectures import build_model from ppocr.modeling.architectures import build_model
from ppocr.postprocess import build_post_process from ppocr.postprocess import build_post_process
from ppocr.metrics import build_metric from ppocr.metrics import build_metric
from ppocr.utils.save_load import init_model from ppocr.utils.save_load import init_model, load_pretrained_params
from ppocr.utils.utility import print_dict from ppocr.utils.utility import print_dict
import tools.program as program import tools.program as program
...@@ -55,7 +55,10 @@ def main(): ...@@ -55,7 +55,10 @@ def main():
model = build_model(config['Architecture']) model = build_model(config['Architecture'])
use_srn = config['Architecture']['algorithm'] == "SRN" use_srn = config['Architecture']['algorithm'] == "SRN"
if "model_type" in config['Architecture'].keys():
model_type = config['Architecture']['model_type'] model_type = config['Architecture']['model_type']
else:
model_type = None
best_model_dict = init_model(config, model) best_model_dict = init_model(config, model)
if len(best_model_dict): if len(best_model_dict):
......
...@@ -106,7 +106,7 @@ class TextDetector(object): ...@@ -106,7 +106,7 @@ class TextDetector(object):
model_precision=args.precision, model_precision=args.precision,
batch_size=1, batch_size=1,
data_shape="dynamic", data_shape="dynamic",
save_path="./output/auto_log.lpg", save_path=args.save_log_path,
inference_config=self.config, inference_config=self.config,
pids=pid, pids=pid,
process_name=None, process_name=None,
...@@ -212,7 +212,7 @@ class TextDetector(object): ...@@ -212,7 +212,7 @@ class TextDetector(object):
else: else:
raise NotImplementedError raise NotImplementedError
self.predictor.try_shrink_memory() #self.predictor.try_shrink_memory()
post_result = self.postprocess_op(preds, shape_list) post_result = self.postprocess_op(preds, shape_list)
dt_boxes = post_result[0]['points'] dt_boxes = post_result[0]['points']
if self.det_algorithm == "SAST" and self.det_sast_polygon: if self.det_algorithm == "SAST" and self.det_sast_polygon:
...@@ -265,4 +265,3 @@ if __name__ == "__main__": ...@@ -265,4 +265,3 @@ if __name__ == "__main__":
if args.benchmark: if args.benchmark:
text_detector.autolog.report() text_detector.autolog.report()
...@@ -174,8 +174,6 @@ def main(args): ...@@ -174,8 +174,6 @@ def main(args):
logger.info("The predict total time is {}".format(time.time() - _st)) logger.info("The predict total time is {}".format(time.time() - _st))
logger.info("\nThe predict total time is {}".format(total_time)) logger.info("\nThe predict total time is {}".format(total_time))
img_num = text_sys.text_detector.det_times.img_num
if __name__ == "__main__": if __name__ == "__main__":
args = utility.parse_args() args = utility.parse_args()
......
...@@ -37,7 +37,7 @@ def init_args(): ...@@ -37,7 +37,7 @@ def init_args():
parser.add_argument("--use_gpu", type=str2bool, default=True) parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--ir_optim", type=str2bool, default=True) parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False) parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--min_subgraph_size", type=int, default=3) parser.add_argument("--min_subgraph_size", type=int, default=10)
parser.add_argument("--precision", type=str, default="fp32") parser.add_argument("--precision", type=str, default="fp32")
parser.add_argument("--gpu_mem", type=int, default=500) parser.add_argument("--gpu_mem", type=int, default=500)
...@@ -164,7 +164,7 @@ def create_predictor(args, mode, logger): ...@@ -164,7 +164,7 @@ def create_predictor(args, mode, logger):
config.enable_use_gpu(args.gpu_mem, 0) config.enable_use_gpu(args.gpu_mem, 0)
if args.use_tensorrt: if args.use_tensorrt:
config.enable_tensorrt_engine( config.enable_tensorrt_engine(
precision_mode=inference.PrecisionType.Float32, precision_mode=precision,
max_batch_size=args.max_batch_size, max_batch_size=args.max_batch_size,
min_subgraph_size=args.min_subgraph_size) min_subgraph_size=args.min_subgraph_size)
# skip the minmum trt subgraph # skip the minmum trt subgraph
...@@ -176,6 +176,7 @@ def create_predictor(args, mode, logger): ...@@ -176,6 +176,7 @@ def create_predictor(args, mode, logger):
"conv2d_59.tmp_0": [1, 96, 20, 20], "conv2d_59.tmp_0": [1, 96, 20, 20],
"nearest_interp_v2_1.tmp_0": [1, 96, 10, 10], "nearest_interp_v2_1.tmp_0": [1, 96, 10, 10],
"nearest_interp_v2_2.tmp_0": [1, 96, 20, 20], "nearest_interp_v2_2.tmp_0": [1, 96, 20, 20],
"conv2d_124.tmp_0": [1, 96, 20, 20],
"nearest_interp_v2_3.tmp_0": [1, 24, 20, 20], "nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
"nearest_interp_v2_4.tmp_0": [1, 24, 20, 20], "nearest_interp_v2_4.tmp_0": [1, 24, 20, 20],
"nearest_interp_v2_5.tmp_0": [1, 24, 20, 20], "nearest_interp_v2_5.tmp_0": [1, 24, 20, 20],
...@@ -188,6 +189,7 @@ def create_predictor(args, mode, logger): ...@@ -188,6 +189,7 @@ def create_predictor(args, mode, logger):
"conv2d_91.tmp_0": [1, 96, 200, 200], "conv2d_91.tmp_0": [1, 96, 200, 200],
"conv2d_59.tmp_0": [1, 96, 400, 400], "conv2d_59.tmp_0": [1, 96, 400, 400],
"nearest_interp_v2_1.tmp_0": [1, 96, 200, 200], "nearest_interp_v2_1.tmp_0": [1, 96, 200, 200],
"conv2d_124.tmp_0": [1, 256, 400, 400],
"nearest_interp_v2_2.tmp_0": [1, 96, 400, 400], "nearest_interp_v2_2.tmp_0": [1, 96, 400, 400],
"nearest_interp_v2_3.tmp_0": [1, 24, 400, 400], "nearest_interp_v2_3.tmp_0": [1, 24, 400, 400],
"nearest_interp_v2_4.tmp_0": [1, 24, 400, 400], "nearest_interp_v2_4.tmp_0": [1, 24, 400, 400],
...@@ -202,6 +204,7 @@ def create_predictor(args, mode, logger): ...@@ -202,6 +204,7 @@ def create_predictor(args, mode, logger):
"conv2d_59.tmp_0": [1, 96, 160, 160], "conv2d_59.tmp_0": [1, 96, 160, 160],
"nearest_interp_v2_1.tmp_0": [1, 96, 80, 80], "nearest_interp_v2_1.tmp_0": [1, 96, 80, 80],
"nearest_interp_v2_2.tmp_0": [1, 96, 160, 160], "nearest_interp_v2_2.tmp_0": [1, 96, 160, 160],
"conv2d_124.tmp_0": [1, 256, 160, 160],
"nearest_interp_v2_3.tmp_0": [1, 24, 160, 160], "nearest_interp_v2_3.tmp_0": [1, 24, 160, 160],
"nearest_interp_v2_4.tmp_0": [1, 24, 160, 160], "nearest_interp_v2_4.tmp_0": [1, 24, 160, 160],
"nearest_interp_v2_5.tmp_0": [1, 24, 160, 160], "nearest_interp_v2_5.tmp_0": [1, 24, 160, 160],
...@@ -237,7 +240,7 @@ def create_predictor(args, mode, logger): ...@@ -237,7 +240,7 @@ def create_predictor(args, mode, logger):
# enable memory optim # enable memory optim
config.enable_memory_optim() config.enable_memory_optim()
config.disable_glog_info() #config.disable_glog_info()
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
if mode == 'table': if mode == 'table':
......
...@@ -186,7 +186,10 @@ def train(config, ...@@ -186,7 +186,10 @@ def train(config,
model.train() model.train()
use_srn = config['Architecture']['algorithm'] == "SRN" use_srn = config['Architecture']['algorithm'] == "SRN"
try:
model_type = config['Architecture']['model_type'] model_type = config['Architecture']['model_type']
except:
model_type = None
if 'start_epoch' in best_model_dict: if 'start_epoch' in best_model_dict:
start_epoch = best_model_dict['start_epoch'] start_epoch = best_model_dict['start_epoch']
......
...@@ -98,7 +98,6 @@ def main(config, device, logger, vdl_writer): ...@@ -98,7 +98,6 @@ def main(config, device, logger, vdl_writer):
eval_class = build_metric(config['Metric']) eval_class = build_metric(config['Metric'])
# load pretrain model # load pretrain model
pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer) pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer)
logger.info('train dataloader has {} iters'.format(len(train_dataloader))) logger.info('train dataloader has {} iters'.format(len(train_dataloader)))
if valid_dataloader is not None: if valid_dataloader is not None:
logger.info('valid dataloader has {} iters'.format( logger.info('valid dataloader has {} iters'.format(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment