"tools/vscode:/vscode.git/clone" did not exist on "eaf38b9b12cb529daa9eb920b843c7754dac38a2"
Commit d55e9065 authored by WenmuZhou's avatar WenmuZhou
Browse files

Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into doc

parents 0d6a4862 2b6c887a
===========================train_params===========================
model_name:ch_ppocr_server_v2.0_rec
python:python3.7
gpu_list:0|0,1
Global.use_gpu:True|True
Global.auto_cast:amp
Global.epoch_num:lite_train_lite_infer=5|whole_train_whole_infer=100
Global.save_model_dir:./output/
Train.loader.batch_size_per_card:lite_train_lite_infer=128|whole_train_whole_infer=128
Global.pretrained_model:null
train_model_name:latest
train_infer_img_dir:./inference/rec_inference
null:null
##
trainer:norm_train
norm_train:tools/train.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
pact_train:null
fpgm_train:null
distill_train:null
null:null
null:null
##
===========================eval_params===========================
eval:tools/eval.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
null:null
##
===========================infer_params===========================
Global.save_inference_dir:./output/
Global.checkpoints:
norm_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
quant_export:null
fpgm_export:null
distill_export:null
export1:null
export2:null
##
train_model:./inference/ch_ppocr_server_v2.0_rec_train/best_accuracy
infer_export:tools/export_model.py -c test_tipc/configs/ch_ppocr_server_v2.0_rec/rec_icdar15_train.yml -o
infer_quant:False
inference:tools/infer/predict_rec.py
--use_gpu:True|False
--enable_mkldnn:True|False
--cpu_threads:1|6
--rec_batch_num:1|6
--use_tensorrt:True|False
--precision:fp32|int8
--rec_model_dir:
--image_dir:./inference/rec_inference
--save_log_path:./test/output/
--benchmark:True
null:null
===========================infer_benchmark_params==========================
random_infer_input:[{float32,[3,32,100]}]
...@@ -74,9 +74,11 @@ def main(): ...@@ -74,9 +74,11 @@ def main():
model = build_model(config['Architecture']) model = build_model(config['Architecture'])
extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"] extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"]
extra_input = False
if config['Architecture']['algorithm'] == 'Distillation': if config['Architecture']['algorithm'] == 'Distillation':
extra_input = config['Architecture']['Models']['Teacher'][ for key in config['Architecture']["Models"]:
'algorithm'] in extra_input_models extra_input = extra_input or config['Architecture']['Models'][key][
'algorithm'] in extra_input_models
else: else:
extra_input = config['Architecture']['algorithm'] in extra_input_models extra_input = config['Architecture']['algorithm'] in extra_input_models
if "model_type" in config['Architecture'].keys(): if "model_type" in config['Architecture'].keys():
......
...@@ -31,7 +31,7 @@ from ppocr.utils.logging import get_logger ...@@ -31,7 +31,7 @@ from ppocr.utils.logging import get_logger
from tools.program import load_config, merge_config, ArgsParser from tools.program import load_config, merge_config, ArgsParser
def export_single_model(model, arch_config, save_path, logger): def export_single_model(model, arch_config, save_path, logger, quanter=None):
if arch_config["algorithm"] == "SRN": if arch_config["algorithm"] == "SRN":
max_text_length = arch_config["Head"]["max_text_length"] max_text_length = arch_config["Head"]["max_text_length"]
other_shape = [ other_shape = [
...@@ -61,6 +61,11 @@ def export_single_model(model, arch_config, save_path, logger): ...@@ -61,6 +61,11 @@ def export_single_model(model, arch_config, save_path, logger):
paddle.static.InputSpec( paddle.static.InputSpec(
shape=[None, 3, 48, -1], dtype="float32"), shape=[None, 3, 48, -1], dtype="float32"),
] ]
else:
other_shape = [
paddle.static.InputSpec(
shape=[None, 3, 64, 256], dtype="float32"),
]
model = to_static(model, input_spec=other_shape) model = to_static(model, input_spec=other_shape)
elif arch_config["algorithm"] == "PREN": elif arch_config["algorithm"] == "PREN":
other_shape = [ other_shape = [
...@@ -90,7 +95,10 @@ def export_single_model(model, arch_config, save_path, logger): ...@@ -90,7 +95,10 @@ def export_single_model(model, arch_config, save_path, logger):
shape=[None] + infer_shape, dtype="float32") shape=[None] + infer_shape, dtype="float32")
]) ])
paddle.jit.save(model, save_path) if quanter is None:
paddle.jit.save(model, save_path)
else:
quanter.save_quantized_model(model, save_path)
logger.info("inference model is saved to {}".format(save_path)) logger.info("inference model is saved to {}".format(save_path))
return return
...@@ -120,7 +128,6 @@ def main(): ...@@ -120,7 +128,6 @@ def main():
char_num = char_num - 2 char_num = char_num - 2
out_channels_list['CTCLabelDecode'] = char_num out_channels_list['CTCLabelDecode'] = char_num
out_channels_list['SARLabelDecode'] = char_num + 2 out_channels_list['SARLabelDecode'] = char_num + 2
loss_list = config['Loss']['loss_config_list']
config['Architecture']['Models'][key]['Head'][ config['Architecture']['Models'][key]['Head'][
'out_channels_list'] = out_channels_list 'out_channels_list'] = out_channels_list
else: else:
......
...@@ -131,6 +131,17 @@ class TextRecognizer(object): ...@@ -131,6 +131,17 @@ class TextRecognizer(object):
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
padding_im[:, :, 0:resized_w] = resized_image padding_im[:, :, 0:resized_w] = resized_image
return padding_im return padding_im
def resize_norm_img_svtr(self, img, image_shape):
imgC, imgH, imgW = image_shape
resized_image = cv2.resize(
img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
resized_image = resized_image.astype('float32')
resized_image = resized_image.transpose((2, 0, 1)) / 255
resized_image -= 0.5
resized_image /= 0.5
return resized_image
def resize_norm_img_srn(self, img, image_shape): def resize_norm_img_srn(self, img, image_shape):
imgC, imgH, imgW = image_shape imgC, imgH, imgW = image_shape
...@@ -263,12 +274,8 @@ class TextRecognizer(object): ...@@ -263,12 +274,8 @@ class TextRecognizer(object):
wh_ratio = w * 1.0 / h wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio) max_wh_ratio = max(max_wh_ratio, wh_ratio)
for ino in range(beg_img_no, end_img_no): for ino in range(beg_img_no, end_img_no):
if self.rec_algorithm != "SRN" and self.rec_algorithm != "SAR":
norm_img = self.resize_norm_img(img_list[indices[ino]], if self.rec_algorithm == "SAR":
max_wh_ratio)
norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img)
elif self.rec_algorithm == "SAR":
norm_img, _, _, valid_ratio = self.resize_norm_img_sar( norm_img, _, _, valid_ratio = self.resize_norm_img_sar(
img_list[indices[ino]], self.rec_image_shape) img_list[indices[ino]], self.rec_image_shape)
norm_img = norm_img[np.newaxis, :] norm_img = norm_img[np.newaxis, :]
...@@ -276,7 +283,7 @@ class TextRecognizer(object): ...@@ -276,7 +283,7 @@ class TextRecognizer(object):
valid_ratios = [] valid_ratios = []
valid_ratios.append(valid_ratio) valid_ratios.append(valid_ratio)
norm_img_batch.append(norm_img) norm_img_batch.append(norm_img)
else: elif self.rec_algorithm == "SRN":
norm_img = self.process_image_srn( norm_img = self.process_image_srn(
img_list[indices[ino]], self.rec_image_shape, 8, 25) img_list[indices[ino]], self.rec_image_shape, 8, 25)
encoder_word_pos_list = [] encoder_word_pos_list = []
...@@ -288,6 +295,16 @@ class TextRecognizer(object): ...@@ -288,6 +295,16 @@ class TextRecognizer(object):
gsrm_slf_attn_bias1_list.append(norm_img[3]) gsrm_slf_attn_bias1_list.append(norm_img[3])
gsrm_slf_attn_bias2_list.append(norm_img[4]) gsrm_slf_attn_bias2_list.append(norm_img[4])
norm_img_batch.append(norm_img[0]) norm_img_batch.append(norm_img[0])
elif self.rec_algorithm == "SVTR":
norm_img = self.resize_norm_img_svtr(
img_list[indices[ino]], self.rec_image_shape)
norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img)
else:
norm_img = self.resize_norm_img(img_list[indices[ino]],
max_wh_ratio)
norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img)
norm_img_batch = np.concatenate(norm_img_batch) norm_img_batch = np.concatenate(norm_img_batch)
norm_img_batch = norm_img_batch.copy() norm_img_batch = norm_img_batch.copy()
if self.benchmark: if self.benchmark:
......
...@@ -271,9 +271,10 @@ def create_predictor(args, mode, logger): ...@@ -271,9 +271,10 @@ def create_predictor(args, mode, logger):
elif mode == "rec": elif mode == "rec":
if args.rec_algorithm != "CRNN": if args.rec_algorithm != "CRNN":
use_dynamic_shape = False use_dynamic_shape = False
min_input_shape = {"x": [1, 3, 32, 10]} imgH = int(args.rec_image_shape.split(',')[-2])
max_input_shape = {"x": [args.rec_batch_num, 3, 32, 1536]} min_input_shape = {"x": [1, 3, imgH, 10]}
opt_input_shape = {"x": [args.rec_batch_num, 3, 32, 320]} max_input_shape = {"x": [args.rec_batch_num, 3, imgH, 1536]}
opt_input_shape = {"x": [args.rec_batch_num, 3, imgH, 320]}
elif mode == "cls": elif mode == "cls":
min_input_shape = {"x": [1, 3, 48, 10]} min_input_shape = {"x": [1, 3, 48, 10]}
max_input_shape = {"x": [args.rec_batch_num, 3, 48, 1024]} max_input_shape = {"x": [args.rec_batch_num, 3, 48, 1024]}
...@@ -300,8 +301,8 @@ def create_predictor(args, mode, logger): ...@@ -300,8 +301,8 @@ def create_predictor(args, mode, logger):
# enable memory optim # enable memory optim
config.enable_memory_optim() config.enable_memory_optim()
config.disable_glog_info() config.disable_glog_info()
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
config.delete_pass("matmul_transpose_reshape_fuse_pass")
if mode == 'table': if mode == 'table':
config.delete_pass("fc_fuse_pass") # not supported for table config.delete_pass("fc_fuse_pass") # not supported for table
config.switch_use_feed_fetch_ops(False) config.switch_use_feed_fetch_ops(False)
......
...@@ -57,6 +57,8 @@ def main(): ...@@ -57,6 +57,8 @@ def main():
continue continue
elif op_name == 'KeepKeys': elif op_name == 'KeepKeys':
op[op_name]['keep_keys'] = ['image'] op[op_name]['keep_keys'] = ['image']
elif op_name == "SSLRotateResize":
op[op_name]["mode"] = "test"
transforms.append(op) transforms.append(op)
global_config['infer_mode'] = True global_config['infer_mode'] = True
ops = create_operators(transforms, global_config) ops = create_operators(transforms, global_config)
......
...@@ -202,9 +202,11 @@ def train(config, ...@@ -202,9 +202,11 @@ def train(config,
use_srn = config['Architecture']['algorithm'] == "SRN" use_srn = config['Architecture']['algorithm'] == "SRN"
extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"] extra_input_models = ["SRN", "NRTR", "SAR", "SEED", "SVTR"]
extra_input = False
if config['Architecture']['algorithm'] == 'Distillation': if config['Architecture']['algorithm'] == 'Distillation':
extra_input = config['Architecture']['Models']['Teacher'][ for key in config['Architecture']["Models"]:
'algorithm'] in extra_input_models extra_input = extra_input or config['Architecture']['Models'][key][
'algorithm'] in extra_input_models
else: else:
extra_input = config['Architecture']['algorithm'] in extra_input_models extra_input = config['Architecture']['algorithm'] in extra_input_models
try: try:
......
...@@ -129,7 +129,7 @@ def main(config, device, logger, vdl_writer): ...@@ -129,7 +129,7 @@ def main(config, device, logger, vdl_writer):
config['Optimizer'], config['Optimizer'],
epochs=config['Global']['epoch_num'], epochs=config['Global']['epoch_num'],
step_each_epoch=len(train_dataloader), step_each_epoch=len(train_dataloader),
parameters=model.parameters()) model=model)
# build metric # build metric
eval_class = build_metric(config['Metric']) eval_class = build_metric(config['Metric'])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment