Commit 338ba3ee authored by LDOUBLEV's avatar LDOUBLEV
Browse files

upload PaddleOCR code

parent bc93c549
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import numpy as np
import datetime
__all__ = ['TrainingStats', 'Time']
class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a
window or the global series average.
"""
def __init__(self, window_size):
self.deque = collections.deque(maxlen=window_size)
def add_value(self, value):
self.deque.append(value)
def get_median_value(self):
return np.median(self.deque)
def Time():
return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')
class TrainingStats(object):
def __init__(self, window_size, stats_keys):
self.smoothed_losses_and_metrics = {
key: SmoothedValue(window_size)
for key in stats_keys
}
def update(self, stats):
for k, v in self.smoothed_losses_and_metrics.items():
v.add_value(stats[k])
def get(self, extras=None):
stats = collections.OrderedDict()
if extras:
for k, v in extras.items():
stats[k] = v
for k, v in self.smoothed_losses_and_metrics.items():
stats[k] = round(v.get_median_value(), 6)
return stats
def log(self, extras=None):
d = self.get(extras)
strs = ', '.join(str(dict({x: y})).strip('{}') for x, y in d.items())
return strs
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
def initial_logger():
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
return logger
import importlib
def create_module(module_str):
tmpss = module_str.split(",")
assert len(tmpss) == 2, "Error formate\
of the module path: {}".format(module_str)
module_name, function_name = tmpss[0], tmpss[1]
somemodule = importlib.import_module(module_name, __package__)
function = getattr(somemodule, function_name)
return function
def get_check_global_params(mode):
check_params = ['use_gpu', 'max_text_length', 'image_shape',\
'image_shape', 'character_type', 'loss_type']
if mode == "train_eval":
check_params = check_params + [\
'train_batch_size_per_card', 'test_batch_size_per_card']
elif mode == "test":
check_params = check_params + ['test_batch_size_per_card']
return check_params
def get_check_reader_params(mode):
check_params = []
if mode == "train_eval":
check_params = ['TrainReader', 'EvalReader']
elif mode == "test":
check_params = ['TestReader']
return check_params
from paddle import fluid
def create_multi_devices_program(program, loss_var_name):
build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = True
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_iteration_per_drop_scope = 1
compile_program = fluid.CompiledProgram(program).with_data_parallel(
loss_name=loss_var_name,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
return compile_program
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
import program
from paddle import fluid
from ppocr.utils.utility import initial_logger
logger = initial_logger()
from ppocr.data.reader_main import reader_main
from ppocr.utils.save_load import init_model
from eval_utils.eval_det_utils import eval_det_run
from eval_utils.eval_rec_utils import test_rec_benchmark
from eval_utils.eval_rec_utils import eval_rec_run
from ppocr.utils.character import CharacterOps
def main():
config = program.load_config(FLAGS.config)
program.merge_config(FLAGS.opt)
logger.info(config)
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu = config['Global']['use_gpu']
program.check_gpu(True)
alg = config['Global']['algorithm']
assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
config['Global']['char_ops'] = CharacterOps(config['Global'])
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
startup_prog = fluid.Program()
eval_program = fluid.Program()
eval_build_outputs = program.build(
config, eval_program, startup_prog, mode='test')
eval_fetch_name_list = eval_build_outputs[1]
eval_fetch_varname_list = eval_build_outputs[2]
eval_program = eval_program.clone(for_test=True)
exe = fluid.Executor(place)
exe.run(startup_prog)
init_model(config, eval_program, exe)
if alg in ['EAST', 'DB']:
eval_reader = reader_main(config=config, mode="test")
eval_info_dict = {'program':eval_program,\
'reader':eval_reader,\
'fetch_name_list':eval_fetch_name_list,\
'fetch_varname_list':eval_fetch_varname_list}
metrics = eval_det_run(exe, config, eval_info_dict, "test")
else:
dataset = config['Global']['dataset']
assert dataset in ['lmdb', 'common']
if dataset == 'common':
eval_reader = reader_main(config=config, mode="eval")
eval_info_dict = {'program': eval_program, \
'reader': eval_reader, \
'fetch_name_list': eval_fetch_name_list, \
'fetch_varname_list': eval_fetch_varname_list}
metrics = eval_rec_run(exe, config, eval_info_dict, "eval")
print("Eval result:", metrics)
else:
eval_info_dict = {'program':eval_program,\
'fetch_name_list':eval_fetch_name_list,\
'fetch_varname_list':eval_fetch_varname_list}
test_rec_benchmark(exe, config, eval_info_dict)
if __name__ == '__main__':
parser = program.ArgsParser()
FLAGS = parser.parse_args()
main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from collections import namedtuple
import numpy as np
from shapely.geometry import Polygon
class DetectionIoUEvaluator(object):
def __init__(self, iou_constraint=0.5, area_precision_constraint=0.5):
self.iou_constraint = iou_constraint
self.area_precision_constraint = area_precision_constraint
def evaluate_image(self, gt, pred):
def get_union(pD, pG):
return Polygon(pD).union(Polygon(pG)).area
def get_intersection_over_union(pD, pG):
return get_intersection(pD, pG) / get_union(pD, pG)
def get_intersection(pD, pG):
return Polygon(pD).intersection(Polygon(pG)).area
def compute_ap(confList, matchList, numGtCare):
correct = 0
AP = 0
if len(confList) > 0:
confList = np.array(confList)
matchList = np.array(matchList)
sorted_ind = np.argsort(-confList)
confList = confList[sorted_ind]
matchList = matchList[sorted_ind]
for n in range(len(confList)):
match = matchList[n]
if match:
correct += 1
AP += float(correct) / (n + 1)
if numGtCare > 0:
AP /= numGtCare
return AP
perSampleMetrics = {}
matchedSum = 0
Rectangle = namedtuple('Rectangle', 'xmin ymin xmax ymax')
numGlobalCareGt = 0
numGlobalCareDet = 0
arrGlobalConfidences = []
arrGlobalMatches = []
recall = 0
precision = 0
hmean = 0
detMatched = 0
iouMat = np.empty([1, 1])
gtPols = []
detPols = []
gtPolPoints = []
detPolPoints = []
# Array of Ground Truth Polygons' keys marked as don't Care
gtDontCarePolsNum = []
# Array of Detected Polygons' matched with a don't Care GT
detDontCarePolsNum = []
pairs = []
detMatchedNums = []
arrSampleConfidences = []
arrSampleMatch = []
evaluationLog = ""
# print(len(gt))
for n in range(len(gt)):
points = gt[n]['points']
# transcription = gt[n]['text']
dontCare = gt[n]['ignore']
points = Polygon(points)
points = points.buffer(0)
if not Polygon(points).is_valid or not Polygon(points).is_simple:
continue
gtPol = points
gtPols.append(gtPol)
gtPolPoints.append(points)
if dontCare:
gtDontCarePolsNum.append(len(gtPols) - 1)
evaluationLog += "GT polygons: " + str(len(gtPols)) + (
" (" + str(len(gtDontCarePolsNum)) + " don't care)\n"
if len(gtDontCarePolsNum) > 0 else "\n")
for n in range(len(pred)):
points = pred[n]['points']
points = Polygon(points)
points = points.buffer(0)
if not Polygon(points).is_valid or not Polygon(points).is_simple:
continue
detPol = points
detPols.append(detPol)
detPolPoints.append(points)
if len(gtDontCarePolsNum) > 0:
for dontCarePol in gtDontCarePolsNum:
dontCarePol = gtPols[dontCarePol]
intersected_area = get_intersection(dontCarePol, detPol)
pdDimensions = Polygon(detPol).area
precision = 0 if pdDimensions == 0 else intersected_area / pdDimensions
if (precision > self.area_precision_constraint):
detDontCarePolsNum.append(len(detPols) - 1)
break
evaluationLog += "DET polygons: " + str(len(detPols)) + (
" (" + str(len(detDontCarePolsNum)) + " don't care)\n"
if len(detDontCarePolsNum) > 0 else "\n")
if len(gtPols) > 0 and len(detPols) > 0:
# Calculate IoU and precision matrixs
outputShape = [len(gtPols), len(detPols)]
iouMat = np.empty(outputShape)
gtRectMat = np.zeros(len(gtPols), np.int8)
detRectMat = np.zeros(len(detPols), np.int8)
for gtNum in range(len(gtPols)):
for detNum in range(len(detPols)):
pG = gtPols[gtNum]
pD = detPols[detNum]
iouMat[gtNum, detNum] = get_intersection_over_union(pD, pG)
for gtNum in range(len(gtPols)):
for detNum in range(len(detPols)):
if gtRectMat[gtNum] == 0 and detRectMat[
detNum] == 0 and gtNum not in gtDontCarePolsNum and detNum not in detDontCarePolsNum:
if iouMat[gtNum, detNum] > self.iou_constraint:
gtRectMat[gtNum] = 1
detRectMat[detNum] = 1
detMatched += 1
pairs.append({'gt': gtNum, 'det': detNum})
detMatchedNums.append(detNum)
evaluationLog += "Match GT #" + \
str(gtNum) + " with Det #" + str(detNum) + "\n"
numGtCare = (len(gtPols) - len(gtDontCarePolsNum))
numDetCare = (len(detPols) - len(detDontCarePolsNum))
if numGtCare == 0:
recall = float(1)
precision = float(0) if numDetCare > 0 else float(1)
else:
recall = float(detMatched) / numGtCare
precision = 0 if numDetCare == 0 else float(detMatched) / numDetCare
hmean = 0 if (precision + recall) == 0 else 2.0 * \
precision * recall / (precision + recall)
matchedSum += detMatched
numGlobalCareGt += numGtCare
numGlobalCareDet += numDetCare
perSampleMetrics = {
'precision': precision,
'recall': recall,
'hmean': hmean,
'pairs': pairs,
'iouMat': [] if len(detPols) > 100 else iouMat.tolist(),
'gtPolPoints': gtPolPoints,
'detPolPoints': detPolPoints,
'gtCare': numGtCare,
'detCare': numDetCare,
'gtDontCare': gtDontCarePolsNum,
'detDontCare': detDontCarePolsNum,
'detMatched': detMatched,
'evaluationLog': evaluationLog
}
return perSampleMetrics
def combine_results(self, results):
numGlobalCareGt = 0
numGlobalCareDet = 0
matchedSum = 0
for result in results:
numGlobalCareGt += result['gtCare']
numGlobalCareDet += result['detCare']
matchedSum += result['detMatched']
methodRecall = 0 if numGlobalCareGt == 0 else float(
matchedSum) / numGlobalCareGt
methodPrecision = 0 if numGlobalCareDet == 0 else float(
matchedSum) / numGlobalCareDet
methodHmean = 0 if methodRecall + methodPrecision == 0 else 2 * \
methodRecall * methodPrecision / (methodRecall + methodPrecision)
# print(methodRecall, methodPrecision, methodHmean)
# sys.exit(-1)
methodMetrics = {
'precision': methodPrecision,
'recall': methodRecall,
'hmean': methodHmean
}
return methodMetrics
if __name__ == '__main__':
evaluator = DetectionIoUEvaluator()
gts = [[{
'points': [(0, 0), (1, 0), (1, 1), (0, 1)],
'text': 1234,
'ignore': False,
}, {
'points': [(2, 2), (3, 2), (3, 3), (2, 3)],
'text': 5678,
'ignore': False,
}]]
preds = [[{
'points': [(0.1, 0.1), (1, 0), (1, 1), (0, 1)],
'text': 123,
'ignore': False,
}]]
results = []
for gt, pred in zip(gts, preds):
results.append(evaluator.evaluate_image(gt, pred))
metrics = evaluator.combine_results(results)
print(metrics)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import paddle.fluid as fluid
__all__ = ['eval_det_run']
import logging
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
from ppocr.utils.utility import create_module
from .eval_det_iou import DetectionIoUEvaluator
import json
from copy import deepcopy
import cv2
from ppocr.data.reader_main import reader_main
def cal_det_res(exe, config, eval_info_dict):
global_params = config['Global']
save_res_path = global_params['save_res_path']
postprocess_params = deepcopy(config["PostProcess"])
postprocess_params.update(global_params)
postprocess = create_module(postprocess_params['function']) \
(params=postprocess_params)
with open(save_res_path, "wb") as fout:
tackling_num = 0
for data in eval_info_dict['reader']():
img_num = len(data)
tackling_num = tackling_num + img_num
logger.info("test tackling num:%d", tackling_num)
img_list = []
ratio_list = []
img_name_list = []
for ino in range(img_num):
img_list.append(data[ino][0])
ratio_list.append(data[ino][1])
img_name_list.append(data[ino][2])
img_list = np.concatenate(img_list, axis=0)
outs = exe.run(eval_info_dict['program'], \
feed={'image': img_list}, \
fetch_list=eval_info_dict['fetch_varname_list'])
outs_dict = {}
for tno in range(len(outs)):
fetch_name = eval_info_dict['fetch_name_list'][tno]
fetch_value = np.array(outs[tno])
outs_dict[fetch_name] = fetch_value
dt_boxes_list = postprocess(outs_dict, ratio_list)
for ino in range(img_num):
dt_boxes = dt_boxes_list[ino]
img_name = img_name_list[ino]
dt_boxes_json = []
for box in dt_boxes:
tmp_json = {"transcription": ""}
tmp_json['points'] = box.tolist()
dt_boxes_json.append(tmp_json)
otstr = img_name + "\t" + json.dumps(dt_boxes_json) + "\n"
fout.write(otstr.encode())
return
def load_label_infor(label_file_path, do_ignore=False):
img_name_label_dict = {}
with open(label_file_path, "rb") as fin:
lines = fin.readlines()
for line in lines:
substr = line.decode().strip("\n").split("\t")
bbox_infor = json.loads(substr[1])
bbox_num = len(bbox_infor)
for bno in range(bbox_num):
text = bbox_infor[bno]['transcription']
ignore = False
if text == "###" and do_ignore:
ignore = True
bbox_infor[bno]['ignore'] = ignore
img_name_label_dict[substr[0]] = bbox_infor
return img_name_label_dict
def cal_det_metrics(gt_label_path, save_res_path):
evaluator = DetectionIoUEvaluator()
gt_label_infor = load_label_infor(gt_label_path, do_ignore=True)
dt_label_infor = load_label_infor(save_res_path)
results = []
for img_name in gt_label_infor:
gt_label = gt_label_infor[img_name]
if img_name not in dt_label_infor:
dt_label = []
else:
dt_label = dt_label_infor[img_name]
result = evaluator.evaluate_image(gt_label, dt_label)
results.append(result)
methodMetrics = evaluator.combine_results(results)
return methodMetrics
def eval_det_run(exe, config, eval_info_dict, mode):
cal_det_res(exe, config, eval_info_dict)
save_res_path = config['Global']['save_res_path']
if mode == "eval":
gt_label_path = config['EvalReader']['label_file_path']
metrics = cal_det_metrics(gt_label_path, save_res_path)
else:
gt_label_path = config['TestReader']['label_file_path']
do_eval = config['TestReader']['do_eval']
if do_eval:
metrics = cal_det_metrics(gt_label_path, save_res_path)
else:
metrics = {}
return metrics
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import paddle.fluid as fluid
__all__ = ['eval_rec_run', 'test_rec_benchmark']
import logging
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
from ppocr.utils.character import cal_predicts_accuracy
from ppocr.utils.character import convert_rec_label_to_lod
from ppocr.utils.character import convert_rec_attention_infer_res
from ppocr.utils.utility import create_module
import json
from copy import deepcopy
import cv2
from ppocr.data.reader_main import reader_main
def eval_rec_run(exe, config, eval_info_dict, mode):
"""
Run evaluation program, return program outputs.
"""
char_ops = config['Global']['char_ops']
total_loss = 0
total_sample_num = 0
total_acc_num = 0
total_batch_num = 0
if mode == "eval":
is_remove_duplicate = False
else:
is_remove_duplicate = True
for data in eval_info_dict['reader']():
img_num = len(data)
img_list = []
label_list = []
for ino in range(img_num):
img_list.append(data[ino][0])
label_list.append(data[ino][1])
img_list = np.concatenate(img_list, axis=0)
outs = exe.run(eval_info_dict['program'], \
feed={'image': img_list}, \
fetch_list=eval_info_dict['fetch_varname_list'], \
return_numpy=False)
preds = np.array(outs[0])
if preds.shape[1] != 1:
preds, preds_lod = convert_rec_attention_infer_res(preds)
else:
preds_lod = outs[0].lod()[0]
labels, labels_lod = convert_rec_label_to_lod(label_list)
acc, acc_num, sample_num = cal_predicts_accuracy(
char_ops, preds, preds_lod, labels, labels_lod, is_remove_duplicate)
total_acc_num += acc_num
total_sample_num += sample_num
total_batch_num += 1
avg_acc = total_acc_num * 1.0 / total_sample_num
metrics = {'avg_acc': avg_acc, "total_acc_num": total_acc_num, \
"total_sample_num": total_sample_num}
return metrics
def test_rec_benchmark(exe, config, eval_info_dict):
" 评估lmdb 数据"
eval_data_list = ['IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867', \
'IC13_857', 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80']
eval_data_dir = config['TestReader']['lmdb_sets_dir']
total_evaluation_data_number = 0
total_correct_number = 0
eval_data_acc_info = {}
for eval_data in eval_data_list:
config['TestReader']['lmdb_sets_dir'] = \
eval_data_dir + "/" + eval_data
eval_reader = reader_main(config=config, mode="test")
eval_info_dict['reader'] = eval_reader
metrics = eval_rec_run(exe, config, eval_info_dict, "test")
total_evaluation_data_number += metrics['total_sample_num']
total_correct_number += metrics['total_acc_num']
eval_data_acc_info[eval_data] = metrics
avg_acc = total_correct_number * 1.0 / total_evaluation_data_number
logger.info('-' * 50)
strs = ""
for eval_data in eval_data_list:
eval_acc = eval_data_acc_info[eval_data]['avg_acc']
strs += "\n {}, accuracy:{:.6f}".format(eval_data, eval_acc)
strs += "\n average, accuracy:{:.6f}".format(avg_acc)
logger.info(strs)
logger.info('-' * 50)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import time
import multiprocessing
import numpy as np
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
import program
from paddle import fluid
from ppocr.utils.utility import initial_logger
logger = initial_logger()
from ppocr.utils.save_load import init_model
from ppocr.utils.character import CharacterOps
from ppocr.utils.utility import create_module
def main():
config = program.load_config(FLAGS.config)
program.merge_config(FLAGS.opt)
logger.info(config)
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu = config['Global']['use_gpu']
program.check_gpu(True)
alg = config['Global']['algorithm']
assert alg in ['EAST', 'DB', 'Rosetta', 'CRNN', 'STARNet', 'RARE']
if alg in ['Rosetta', 'CRNN', 'STARNet', 'RARE']:
config['Global']['char_ops'] = CharacterOps(config['Global'])
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
startup_prog = fluid.Program()
eval_program = fluid.Program()
feeded_var_names, target_vars, fetches_var_name = program.build_export(
config, eval_program, startup_prog)
eval_program = eval_program.clone(for_test=True)
exe = fluid.Executor(place)
exe.run(startup_prog)
init_model(config, eval_program, exe)
fluid.io.save_inference_model(
dirname="./output/",
feeded_var_names=feeded_var_names,
main_program=eval_program,
target_vars=target_vars,
executor=exe,
model_filename='model',
params_filename='params')
print("save success, output_name_list:", fetches_var_name)
if __name__ == '__main__':
parser = program.ArgsParser()
FLAGS = parser.parse_args()
main()
<paddle.fluid.core_avx.ProgramDesc object at 0x10d15fab0>
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import utility
from ppocr.utils.utility import initial_logger
logger = initial_logger()
import cv2
from ppocr.data.det.east_process import EASTProcessTest
from ppocr.data.det.db_process import DBProcessTest
from ppocr.postprocess.db_postprocess import DBPostProcess
from ppocr.postprocess.east_postprocess import EASTPostPocess
import copy
import numpy as np
import math
import time
class TextDetector(object):
def __init__(self, args):
max_side_len = args.det_max_side_len
self.det_algorithm = args.det_algorithm
preprocess_params = {'max_side_len': max_side_len}
postprocess_params = {}
if self.det_algorithm == "DB":
self.preprocess_op = DBProcessTest(preprocess_params)
postprocess_params["thresh"] = args.det_db_thresh
postprocess_params["box_thresh"] = args.det_db_box_thresh
postprocess_params["max_candidates"] = 1000
self.postprocess_op = DBPostProcess(postprocess_params)
elif self.det_algorithm == "EAST":
self.preprocess_op = EASTProcessTest(preprocess_params)
postprocess_params["score_thresh"] = args.det_east_score_thresh
postprocess_params["cover_thresh"] = args.det_east_cover_thresh
postprocess_params["nms_thresh"] = args.det_east_nms_thresh
self.postprocess_op = EASTPostPocess(postprocess_params)
else:
logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
sys.exit(0)
self.predictor, self.input_tensor, self.output_tensors =\
utility.create_predictor(args, mode="det")
def order_points_clockwise(self, pts):
#######
## https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
########
# sort the points based on their x-coordinates
xSorted = pts[np.argsort(pts[:, 0]), :]
# grab the left-most and right-most points from the sorted
# x-roodinate points
leftMost = xSorted[:2, :]
rightMost = xSorted[2:, :]
# now, sort the left-most coordinates according to their
# y-coordinates so we can grab the top-left and bottom-left
# points, respectively
leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
(tl, bl) = leftMost
rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
(tr, br) = rightMost
rect = np.array([tl, tr, br, bl], dtype="float32")
return rect
def expand_det_res(self, points, bbox_height, bbox_width, img_height,
img_width):
if bbox_height * 1.0 / bbox_width >= 2.0:
expand_w = bbox_width * 0.20
expand_h = bbox_width * 0.20
elif bbox_width * 1.0 / bbox_height >= 3.0:
expand_w = bbox_height * 0.20
expand_h = bbox_height * 0.20
else:
expand_w = bbox_height * 0.1
expand_h = bbox_height * 0.1
points[0, 0] = int(max((points[0, 0] - expand_w), 0))
points[1, 0] = int(min((points[1, 0] + expand_w), img_width))
points[3, 0] = int(max((points[3, 0] - expand_w), 0))
points[2, 0] = int(min((points[2, 0] + expand_w), img_width))
points[0, 1] = int(max((points[0, 1] - expand_h), 0))
points[1, 1] = int(max((points[1, 1] - expand_h), 0))
points[3, 1] = int(min((points[3, 1] + expand_h), img_height))
points[2, 1] = int(min((points[2, 1] + expand_h), img_height))
return points
def filter_tag_det_res(self, dt_boxes, image_shape):
img_height, img_width = image_shape[0:2]
dt_boxes_new = []
for box in dt_boxes:
box = self.order_points_clockwise(box)
left = int(np.min(box[:, 0]))
right = int(np.max(box[:, 0]))
top = int(np.min(box[:, 1]))
bottom = int(np.max(box[:, 1]))
bbox_height = bottom - top
bbox_width = right - left
diffh = math.fabs(box[0, 1] - box[1, 1])
diffw = math.fabs(box[0, 0] - box[3, 0])
rect_width = int(np.linalg.norm(box[0] - box[1]))
rect_height = int(np.linalg.norm(box[0] - box[3]))
if rect_width <= 10 or rect_height <= 10:
continue
if diffh <= 10 and diffw <= 10:
box = self.expand_det_res(
copy.deepcopy(box), bbox_height, bbox_width, img_height,
img_width)
dt_boxes_new.append(box)
dt_boxes = np.array(dt_boxes_new)
return dt_boxes
def __call__(self, img):
ori_im = img.copy()
im, ratio_list = self.preprocess_op(img)
if im is None:
return None, 0
im = im.copy()
starttime = time.time()
self.input_tensor.copy_from_cpu(im)
self.predictor.zero_copy_run()
outputs = []
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
outs_dict = {}
if self.det_algorithm == "EAST":
outs_dict['f_score'] = outputs[0]
outs_dict['f_geo'] = outputs[1]
else:
outs_dict['maps'] = [outputs[0]]
dt_boxes_list = self.postprocess_op(outs_dict, [ratio_list])
dt_boxes = dt_boxes_list[0]
dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
elapse = time.time() - starttime
return dt_boxes, elapse
if __name__ == "__main__":
args = utility.parse_args()
image_file_list = utility.get_image_file_list(args.image_dir)
text_detector = TextDetector(args)
count = 0
total_time = 0
for image_file in image_file_list:
img = cv2.imread(image_file)
if img is None:
logger.info("error in loading image:{}".format(image_file))
continue
dt_boxes, elapse = text_detector(img)
if count > 0:
total_time += elapse
count += 1
print("Predict time of %s:" % image_file, elapse)
utility.draw_text_det_res(dt_boxes, image_file)
print("Avg Time:", total_time / (count - 1))
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import utility
from ppocr.utils.utility import initial_logger
logger = initial_logger()
import cv2
import predict_system
import copy
import numpy as np
import math
import time
import json
if __name__ == "__main__":
args = utility.parse_args()
text_sys = predict_system.TextSystem(args)
image_file_list = []
label_file_path = "./eval_perform/gt_res/test_chinese_ic15_500_4pts.txt"
img_set_path = "./eval_perform/"
with open(label_file_path, "rb") as fin:
lines = fin.readlines()
for line in lines:
substr = line.decode('utf-8').strip("\n").split("\t")
if "lsvt" in substr[0]:
continue
image_file_list.append(substr[0])
total_time_all = 0
count = 0
save_path = "./output/predict.txt"
fout = open(save_path, "wb")
for image_name in image_file_list:
image_file = img_set_path + image_name
img = cv2.imread(image_file)
if img is None:
logger.info("error in loading image:{}".format(image_file))
continue
count += 1
total_time = 0
starttime = time.time()
dt_boxes, rec_res = text_sys(img)
elapse = time.time() - starttime
total_time_all += elapse
print("Predict time of %s(%d): %.3fs" % (image_file, count, elapse))
dt_num = len(dt_boxes)
bbox_list = []
for dno in range(dt_num):
box = dt_boxes[dno]
text, score = rec_res[dno]
points = []
for tno in range(len(box)):
points.append([box[tno][0] * 1.0, box[tno][1] * 1.0])
bbox_list.append({
"transcription": text,
"points": points,
"scores": score * 1.0
})
otstr = image_name + "\t" + json.dumps(bbox_list) + "\n"
fout.write(otstr.encode('utf-8'))
avg_time = total_time_all / count
logger.info("avg_time: {0}".format(avg_time))
logger.info("avg_fps: {0}".format(1.0 / avg_time))
fout.close()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import utility
from ppocr.utils.utility import initial_logger
logger = initial_logger()
import cv2
import predict_system
import copy
import numpy as np
import math
import time
import json
import os
if __name__ == "__main__":
args = utility.parse_args()
text_sys = predict_system.TextSystem(args)
image_file_list = []
img_set_path = "/paddle/code/dyn/test_imgs/rctw_samples/"
image_file_list = os.listdir(img_set_path)
total_time_all = 0
count = 0
save_path = "./output/predict.txt"
fout = open(save_path, "wb")
for image_name in image_file_list:
image_file = img_set_path + image_name
img = cv2.imread(image_file)
if img is None:
logger.info("error in loading image:{}".format(image_file))
continue
count += 1
starttime = time.time()
dt_boxes, rec_res = text_sys(img)
if dt_boxes is None:
count -= 1
continue
elapse = time.time() - starttime
total_time_all += elapse
print("Predict time of %s(%d): %.3fs" % (image_file, count, elapse))
dt_num = len(dt_boxes)
bbox_list = []
for dno in range(dt_num):
box = dt_boxes[dno]
text, score = rec_res[dno]
points = []
for tno in range(len(box)):
points.append([box[tno][0] * 1.0, box[tno][1] * 1.0])
bbox_list.append({
"transcription": text,
"points": points,
"scores": score * 1.0
})
otstr = image_name + "\t" + json.dumps(bbox_list) + "\n"
fout.write(otstr.encode('utf-8'))
avg_time = total_time_all / count
logger.info("avg_time: {0}".format(avg_time))
logger.info("avg_fps: {0}".format(1.0 / avg_time))
fout.close()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import utility
from ppocr.utils.utility import initial_logger
logger = initial_logger()
import cv2
import copy
import numpy as np
import math
import time
from ppocr.utils.character import CharacterOps
class TextRecognizer(object):
def __init__(self, args):
self.predictor, self.input_tensor, self.output_tensors =\
utility.create_predictor(args, mode="rec")
image_shape = [int(v) for v in args.rec_image_shape.split(",")]
self.rec_image_shape = image_shape
char_ops_params = {}
char_ops_params["character_type"] = args.rec_char_type
char_ops_params["character_dict_path"] = args.rec_char_dict_path
char_ops_params['loss_type'] = 'ctc'
self.char_ops = CharacterOps(char_ops_params)
def resize_norm_img(self, img):
imgC, imgH, imgW = self.rec_image_shape
h = img.shape[0]
w = img.shape[1]
ratio = w / float(h)
if math.ceil(imgH * ratio) > imgW:
resized_w = imgW
else:
resized_w = int(math.ceil(imgH * ratio))
resized_image = cv2.resize(img, (resized_w, imgH))
resized_image = resized_image.astype('float32')
resized_image = resized_image.transpose((2, 0, 1)) / 255
resized_image -= 0.5
resized_image /= 0.5
padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
padding_im[:, :, 0:resized_w] = resized_image
return padding_im
def __call__(self, img_list):
img_num = len(img_list)
batch_num = 15
rec_res = []
predict_time = 0
for beg_img_no in range(0, img_num, batch_num):
end_img_no = min(img_num, beg_img_no + batch_num)
norm_img_batch = []
for ino in range(beg_img_no, end_img_no):
norm_img = self.resize_norm_img(img_list[ino])
norm_img = norm_img[np.newaxis, :]
norm_img_batch.append(norm_img)
norm_img_batch = np.concatenate(norm_img_batch)
norm_img_batch = norm_img_batch.copy()
starttime = time.time()
self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.zero_copy_run()
rec_idx_batch = self.output_tensors[0].copy_to_cpu()
rec_idx_lod = self.output_tensors[0].lod()[0]
predict_batch = self.output_tensors[1].copy_to_cpu()
predict_lod = self.output_tensors[1].lod()[0]
elapse = time.time() - starttime
predict_time += elapse
starttime = time.time()
for rno in range(len(rec_idx_lod) - 1):
beg = rec_idx_lod[rno]
end = rec_idx_lod[rno + 1]
rec_idx_tmp = rec_idx_batch[beg:end, 0]
preds_text = self.char_ops.decode(rec_idx_tmp)
beg = predict_lod[rno]
end = predict_lod[rno + 1]
probs = predict_batch[beg:end, :]
ind = np.argmax(probs, axis=1)
blank = probs.shape[1]
valid_ind = np.where(ind != (blank - 1))[0]
score = np.mean(probs[valid_ind, ind[valid_ind]])
rec_res.append([preds_text, score])
return rec_res, predict_time
if __name__ == "__main__":
args = utility.parse_args()
image_file_list = utility.get_image_file_list(args.image_dir)
text_recognizer = TextRecognizer(args)
valid_image_file_list = []
img_list = []
for image_file in image_file_list:
img = cv2.imread(image_file)
if img is None:
logger.info("error in loading image:{}".format(image_file))
continue
valid_image_file_list.append(image_file)
img_list.append(img)
rec_res, predict_time = text_recognizer(img_list)
rec_res, predict_time = text_recognizer(img_list)
for ino in range(len(img_list)):
print("Predicts of %s:%s" % (valid_image_file_list[ino], rec_res[ino]))
print("Total predict time for %d images:%.3f" %
(len(img_list), predict_time))
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import utility
from ppocr.utils.utility import initial_logger
logger = initial_logger()
import cv2
import predict_det
import predict_rec
import copy
import numpy as np
import math
import time
class TextSystem(object):
def __init__(self, args):
self.text_detector = predict_det.TextDetector(args)
self.text_recognizer = predict_rec.TextRecognizer(args)
def get_rotate_crop_image(self, img, points):
img_height, img_width = img.shape[0:2]
left = int(np.min(points[:, 0]))
right = int(np.max(points[:, 0]))
top = int(np.min(points[:, 1]))
bottom = int(np.max(points[:, 1]))
img_crop = img[top:bottom, left:right, :].copy()
points[:, 0] = points[:, 0] - left
points[:, 1] = points[:, 1] - top
img_crop_width = int(np.linalg.norm(points[0] - points[1]))
img_crop_height = int(np.linalg.norm(points[0] - points[3]))
pts_std = np.float32([[0, 0], [img_crop_width, 0],\
[img_crop_width, img_crop_height], [0, img_crop_height]])
M = cv2.getPerspectiveTransform(points, pts_std)
dst_img = cv2.warpPerspective(
img_crop,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE)
dst_img_height, dst_img_width = dst_img.shape[0:2]
if dst_img_height * 1.0 / dst_img_width >= 1.5:
dst_img = np.rot90(dst_img)
return dst_img
def print_draw_crop_rec_res(self, img_crop_list, rec_res):
bbox_num = len(img_crop_list)
for bno in range(bbox_num):
cv2.imwrite("./output/img_crop_%d.jpg" % bno, img_crop_list[bno])
print(bno, rec_res[bno])
def __call__(self, img):
ori_im = img.copy()
dt_boxes, elapse = self.text_detector(img)
if dt_boxes is None:
return None, None
img_crop_list = []
for bno in range(len(dt_boxes)):
tmp_box = copy.deepcopy(dt_boxes[bno])
img_crop = self.get_rotate_crop_image(ori_im, tmp_box)
img_crop_list.append(img_crop)
rec_res, elapse = self.text_recognizer(img_crop_list)
# self.print_draw_crop_rec_res(img_crop_list, rec_res)
return dt_boxes, rec_res
if __name__ == "__main__":
args = utility.parse_args()
image_file_list = utility.get_image_file_list(args.image_dir)
text_sys = TextSystem(args)
for image_file in image_file_list:
img = cv2.imread(image_file)
if img is None:
logger.info("error in loading image:{}".format(image_file))
continue
starttime = time.time()
dt_boxes, rec_res = text_sys(img)
elapse = time.time() - starttime
print("Predict time of %s: %.3fs" % (image_file, elapse))
dt_num = len(dt_boxes)
dt_boxes_final = []
for dno in range(dt_num):
text, score = rec_res[dno]
if score >= 0:
text_str = "%s, %.3f" % (text, score)
print(text_str)
dt_boxes_final.append(dt_boxes[dno])
utility.draw_text_det_res(dt_boxes_final, image_file)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os, sys
from ppocr.utils.utility import initial_logger
logger = initial_logger()
from paddle.fluid.core import PaddleTensor
from paddle.fluid.core import AnalysisConfig
from paddle.fluid.core import create_paddle_predictor
import cv2
import numpy as np
def parse_args():
def str2bool(v):
return v.lower() in ("true", "t", "1")
parser = argparse.ArgumentParser()
#params for prediction engine
parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--gpu_mem", type=int, default=8000)
#params for text detector
parser.add_argument("--image_dir", type=str)
parser.add_argument("--det_algorithm", type=str, default='DB')
parser.add_argument("--det_model_dir", type=str)
parser.add_argument("--det_max_side_len", type=float, default=960)
#DB parmas
parser.add_argument("--det_db_thresh", type=float, default=0.3)
parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
#EAST parmas
parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
#params for text recognizer
parser.add_argument("--rec_algorithm", type=str, default='CRNN')
parser.add_argument("--rec_model_dir", type=str)
parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
parser.add_argument("--rec_char_type", type=str, default='ch')
parser.add_argument(
"--rec_char_dict_path",
type=str,
default="./ppocr/utils/ppocr_keys_v1.txt")
return parser.parse_args()
def get_image_file_list(image_dir):
image_file_list = []
if image_dir is None:
return image_file_list
if os.path.isfile(image_dir):
image_file_list = [image_dir]
elif os.path.isdir(image_dir):
for single_file in os.listdir(image_dir):
image_file_list.append(os.path.join(image_dir, single_file))
return image_file_list
def create_predictor(args, mode):
if mode == "det":
model_dir = args.det_model_dir
else:
model_dir = args.rec_model_dir
if model_dir is None:
logger.info("not find {} model file path {}".format(mode, model_dir))
sys.exit(0)
model_file_path = model_dir + "/model"
params_file_path = model_dir + "/params"
if not os.path.exists(model_file_path):
logger.info("not find model file path {}".format(model_file_path))
sys.exit(0)
if not os.path.exists(params_file_path):
logger.info("not find params file path {}".format(params_file_path))
sys.exit(0)
config = AnalysisConfig(model_file_path, params_file_path)
if args.use_gpu:
config.enable_use_gpu(args.gpu_mem, 0)
else:
config.disable_gpu()
config.disable_glog_info()
config.switch_ir_optim(args.ir_optim)
# if args.use_tensorrt:
# config.enable_tensorrt_engine(
# precision_mode=AnalysisConfig.Precision.Half
# if args.use_fp16 else AnalysisConfig.Precision.Float32,
# max_batch_size=args.batch_size)
config.enable_memory_optim()
# use zero copy
config.switch_use_feed_fetch_ops(False)
predictor = create_paddle_predictor(config)
input_names = predictor.get_input_names()
input_tensor = predictor.get_input_tensor(input_names[0])
output_names = predictor.get_output_names()
output_tensors = []
for output_name in output_names:
output_tensor = predictor.get_output_tensor(output_name)
output_tensors.append(output_tensor)
return predictor, input_tensor, output_tensors
def draw_text_det_res(dt_boxes, img_path):
src_im = cv2.imread(img_path)
for box in dt_boxes:
box = np.array(box).astype(np.int32).reshape(-1, 2)
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
img_name_pure = img_path.split("/")[-1]
cv2.imwrite("./output/%s" % img_name_pure, src_im)
if __name__ == '__main__':
args = parse_args()
args.use_gpu = False
root_path = "/Users/liuweiwei06/Desktop/TEST_CODES/icode/baidu/personal-code/PaddleOCR/"
args.det_model_dir = root_path + "test_models/public_v1/ch_det_mv3_db"
predictor, input_tensor, output_tensors = create_predictor(args, mode='det')
print(predictor.get_input_names())
print(predictor.get_output_names())
print(predictor.program(), file=open("det_program.txt", 'w'))
args.rec_model_dir = root_path + "test_models/public_v1/ch_rec_mv3_crnn/"
rec_predictor, input_tensor, output_tensors = create_predictor(
args, mode='rec')
print(rec_predictor.get_input_names())
print(rec_predictor.get_output_names())
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import multiprocessing
import numpy as np
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
from paddle import fluid
# from ppocr.utils.utility import load_config, merge_config
from ppocr.data.reader_main import test_reader
import program
from paddle import fluid
from ppocr.utils.utility import initial_logger
logger = initial_logger()
from ppocr.data.reader_main import reader_main
from ppocr.utils.save_load import init_model
from ppocr.utils.character import CharacterOps
from ppocr.utils.utility import create_module
logger = initial_logger()
def main():
config = program.load_config(FLAGS.config)
program.merge_config(FLAGS.opt)
logger.info(config)
char_ops = CharacterOps(config['Global'])
config['Global']['char_ops'] = char_ops
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu = config['Global']['use_gpu']
# check_gpu(use_gpu)
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
rec_model = create_module(config['Architecture']['function'])(params=config)
startup_prog = fluid.Program()
eval_prog = fluid.Program()
with fluid.program_guard(eval_prog, startup_prog):
with fluid.unique_name.guard():
_, outputs = rec_model(mode="test")
fetch_name_list = list(outputs.keys())
fetch_varname_list = [outputs[v].name for v in fetch_name_list]
eval_prog = eval_prog.clone(for_test=True)
exe.run(startup_prog)
init_model(config, eval_prog, exe)
blobs = reader_main(config, 'test')
imgs = next(blobs())
for img in imgs:
predict = exe.run(program=eval_prog,
feed={"image": img},
fetch_list=fetch_varname_list,
return_numpy=False)
preds = np.array(predict[0])
if preds.shape[1] == 1:
preds = preds.reshape(-1)
preds_lod = predict[0].lod()[0]
preds_text = char_ops.decode(preds)
else:
end_pos = np.where(preds[0, :] == 1)[0]
if len(end_pos) <= 1:
preds_text = preds[0, 1:]
else:
preds_text = preds[0, 1:end_pos[1]]
preds_text = preds_text.reshape(-1)
preds_text = char_ops.decode(preds_text)
print(preds)
print(preds_text)
# save for inference model
target_var = []
for key, values in outputs.items():
target_var.append(values)
fluid.io.save_inference_model(
"./output/",
feeded_var_names=['image'],
target_vars=target_var,
executor=exe,
main_program=eval_prog,
model_filename="model",
params_filename="params")
if __name__ == '__main__':
parser = program.ArgsParser()
FLAGS = parser.parse_args()
main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import sys
import yaml
import os
from ppocr.utils.utility import create_module
from ppocr.utils.utility import initial_logger
logger = initial_logger()
import paddle.fluid as fluid
import time
from ppocr.utils.stats import TrainingStats
from eval_utils.eval_det_utils import eval_det_run
from eval_utils.eval_rec_utils import eval_rec_run
from ppocr.utils.save_load import save_model
import numpy as np
from ppocr.utils.character import cal_predicts_accuracy
class ArgsParser(ArgumentParser):
def __init__(self):
super(ArgsParser, self).__init__(
formatter_class=RawDescriptionHelpFormatter)
self.add_argument("-c", "--config", help="configuration file to use")
self.add_argument(
"-o", "--opt", nargs='+', help="set configuration options")
def parse_args(self, argv=None):
args = super(ArgsParser, self).parse_args(argv)
assert args.config is not None, \
"Please specify --config=configure_file_path."
args.opt = self._parse_opt(args.opt)
return args
def _parse_opt(self, opts):
config = {}
if not opts:
return config
for s in opts:
s = s.strip()
k, v = s.split('=')
config[k] = yaml.load(v, Loader=yaml.Loader)
return config
class AttrDict(dict):
"""Single level attribute dict, NOT recursive"""
def __init__(self, **kwargs):
super(AttrDict, self).__init__()
super(AttrDict, self).update(kwargs)
def __getattr__(self, key):
if key in self:
return self[key]
raise AttributeError("object has no attribute '{}'".format(key))
global_config = AttrDict()
def load_config(file_path):
"""
Load config from yml/yaml file.
Args:
file_path (str): Path of the config file to be loaded.
Returns: global config
"""
_, ext = os.path.splitext(file_path)
assert ext in ['.yml', '.yaml'], "only support yaml files for now"
merge_config(yaml.load(open(file_path), Loader=yaml.Loader))
assert "reader_yml" in global_config['Global'],\
"absence reader_yml in global"
reader_file_path = global_config['Global']['reader_yml']
_, ext = os.path.splitext(reader_file_path)
assert ext in ['.yml', '.yaml'], "only support yaml files for reader"
merge_config(yaml.load(open(reader_file_path), Loader=yaml.Loader))
return global_config
def merge_config(config):
"""
Merge config into global config.
Args:
config (dict): Config to be merged.
Returns: global config
"""
for key, value in config.items():
if "." not in key:
if isinstance(value, dict) and key in global_config:
global_config[key].update(value)
else:
global_config[key] = value
else:
sub_keys = key.split('.')
assert (sub_keys[0] in global_config)
cur = global_config[sub_keys[0]]
for idx, sub_key in enumerate(sub_keys[1:]):
assert (sub_key in cur)
if idx == len(sub_keys) - 2:
cur[sub_key] = value
else:
cur = cur[sub_key]
def check_gpu(use_gpu):
"""
Log error and exit when set use_gpu=true in paddlepaddle
cpu version.
"""
err = "Config use_gpu cannot be set as true while you are " \
"using paddlepaddle cpu version ! \nPlease try: \n" \
"\t1. Install paddlepaddle-gpu to run model on GPU \n" \
"\t2. Set use_gpu as false in config file to run " \
"model on CPU"
try:
if use_gpu and not fluid.is_compiled_with_cuda():
logger.error(err)
sys.exit(1)
except Exception as e:
pass
def build(config, main_prog, startup_prog, mode):
"""
Build a program using a model and an optimizer
1. create feeds
2. create a dataloader
3. create a model
4. create fetchs
5. create an optimizer
Args:
config(dict): config
main_prog(): main program
startup_prog(): startup program
is_train(bool): train or valid
Returns:
dataloader(): a bridge between the model and the data
fetchs(dict): dict of model outputs(included loss and measures)
"""
with fluid.program_guard(main_prog, startup_prog):
with fluid.unique_name.guard():
func_infor = config['Architecture']['function']
model = create_module(func_infor)(params=config)
dataloader, outputs = model(mode=mode)
fetch_name_list = list(outputs.keys())
fetch_varname_list = [outputs[v].name for v in fetch_name_list]
opt_loss_name = None
if mode == "train":
opt_loss = outputs['total_loss']
opt_params = config['Optimizer']
optimizer = create_module(opt_params['function'])(opt_params)
optimizer.minimize(opt_loss)
opt_loss_name = opt_loss.name
global_lr = optimizer._global_learning_rate()
global_lr.persistable = True
fetch_name_list.insert(0, "lr")
fetch_varname_list.insert(0, global_lr.name)
return (dataloader, fetch_name_list, fetch_varname_list, opt_loss_name)
def build_export(config, main_prog, startup_prog):
"""
Build a program using a model and an optimizer
1. create feeds
2. create a dataloader
3. create a model
4. create fetchs
5. create an optimizer
Args:
config(dict): config
main_prog(): main program
startup_prog(): startup program
is_train(bool): train or valid
Returns:
dataloader(): a bridge between the model and the data
fetchs(dict): dict of model outputs(included loss and measures)
"""
with fluid.program_guard(main_prog, startup_prog):
with fluid.unique_name.guard():
func_infor = config['Architecture']['function']
model = create_module(func_infor)(params=config)
image, outputs = model(mode='export')
fetches_var = [outputs[name] for name in outputs]
fetches_var_name = [name for name in outputs]
feeded_var_names = [image.name]
target_vars = fetches_var
return feeded_var_names, target_vars, fetches_var_name
def create_multi_devices_program(program, loss_var_name):
build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = True
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_iteration_per_drop_scope = 1
compile_program = fluid.CompiledProgram(program).with_data_parallel(
loss_name=loss_var_name,
build_strategy=build_strategy,
exec_strategy=exec_strategy)
return compile_program
def train_eval_det_run(config, exe, train_info_dict, eval_info_dict):
train_batch_id = 0
log_smooth_window = config['Global']['log_smooth_window']
epoch_num = config['Global']['epoch_num']
print_batch_step = config['Global']['print_batch_step']
eval_batch_step = config['Global']['eval_batch_step']
save_epoch_step = config['Global']['save_epoch_step']
save_model_dir = config['Global']['save_model_dir']
train_stats = TrainingStats(log_smooth_window,
train_info_dict['fetch_name_list'])
best_eval_hmean = -1
best_batch_id = 0
best_epoch = 0
train_loader = train_info_dict['reader']
for epoch in range(epoch_num):
train_loader.start()
try:
while True:
t1 = time.time()
train_outs = exe.run(
program=train_info_dict['compile_program'],
fetch_list=train_info_dict['fetch_varname_list'],
return_numpy=False)
stats = {}
for tno in range(len(train_outs)):
fetch_name = train_info_dict['fetch_name_list'][tno]
fetch_value = np.mean(np.array(train_outs[tno]))
stats[fetch_name] = fetch_value
t2 = time.time()
train_batch_elapse = t2 - t1
train_stats.update(stats)
if train_batch_id > 0 and train_batch_id \
% print_batch_step == 0:
logs = train_stats.log()
strs = 'epoch: {}, iter: {}, {}, time: {:.3f}'.format(
epoch, train_batch_id, logs, train_batch_elapse)
logger.info(strs)
if train_batch_id > 0 and\
train_batch_id % eval_batch_step == 0:
metrics = eval_det_run(exe, config, eval_info_dict, "eval")
hmean = metrics['hmean']
if hmean >= best_eval_hmean:
best_eval_hmean = hmean
best_batch_id = train_batch_id
best_epoch = epoch
save_path = save_model_dir + "/best_accuracy"
save_model(train_info_dict['train_program'], save_path)
strs = 'Test iter: {}, metrics:{}, best_hmean:{:.6f}, best_epoch:{}, best_batch_id:{}'.format(
train_batch_id, metrics, best_eval_hmean, best_epoch,
best_batch_id)
logger.info(strs)
train_batch_id += 1
except fluid.core.EOFException:
train_loader.reset()
if epoch > 0 and epoch % save_epoch_step == 0:
save_path = save_model_dir + "/iter_epoch_%d" % (epoch)
save_model(train_info_dict['train_program'], save_path)
return
def train_eval_rec_run(config, exe, train_info_dict, eval_info_dict):
train_batch_id = 0
log_smooth_window = config['Global']['log_smooth_window']
epoch_num = config['Global']['epoch_num']
print_batch_step = config['Global']['print_batch_step']
eval_batch_step = config['Global']['eval_batch_step']
save_epoch_step = config['Global']['save_epoch_step']
save_model_dir = config['Global']['save_model_dir']
train_stats = TrainingStats(log_smooth_window, ['loss', 'acc'])
best_eval_acc = -1
best_batch_id = 0
best_epoch = 0
train_loader = train_info_dict['reader']
for epoch in range(epoch_num):
train_loader.start()
try:
while True:
t1 = time.time()
train_outs = exe.run(
program=train_info_dict['compile_program'],
fetch_list=train_info_dict['fetch_varname_list'],
return_numpy=False)
fetch_map = dict(
zip(train_info_dict['fetch_name_list'],
range(len(train_outs))))
loss = np.mean(np.array(train_outs[fetch_map['total_loss']]))
lr = np.mean(np.array(train_outs[fetch_map['lr']]))
preds_idx = fetch_map['decoded_out']
preds = np.array(train_outs[preds_idx])
preds_lod = train_outs[preds_idx].lod()[0]
labels_idx = fetch_map['label']
labels = np.array(train_outs[labels_idx])
labels_lod = train_outs[labels_idx].lod()[0]
acc, acc_num, img_num = cal_predicts_accuracy(
config['Global']['char_ops'], preds, preds_lod, labels,
labels_lod)
t2 = time.time()
train_batch_elapse = t2 - t1
stats = {'loss': loss, 'acc': acc}
train_stats.update(stats)
if train_batch_id > 0 and train_batch_id \
% print_batch_step == 0:
logs = train_stats.log()
strs = 'epoch: {}, iter: {}, lr: {:.6f}, {}, time: {:.3f}'.format(
epoch, train_batch_id, lr, logs, train_batch_elapse)
logger.info(strs)
if train_batch_id > 0 and\
train_batch_id % eval_batch_step == 0:
metrics = eval_rec_run(exe, config, eval_info_dict, "eval")
eval_acc = metrics['avg_acc']
eval_sample_num = metrics['total_sample_num']
if eval_acc > best_eval_acc:
best_eval_acc = eval_acc
best_batch_id = train_batch_id
best_epoch = epoch
save_path = save_model_dir + "/best_accuracy"
save_model(train_info_dict['train_program'], save_path)
strs = 'Test iter: {}, acc:{:.6f}, best_acc:{:.6f}, best_epoch:{}, best_batch_id:{}, eval_sample_num:{}'.format(
train_batch_id, eval_acc, best_eval_acc, best_epoch,
best_batch_id, eval_sample_num)
logger.info(strs)
train_batch_id += 1
except fluid.core.EOFException:
train_loader.reset()
if epoch > 0 and epoch % save_epoch_step == 0:
save_path = save_model_dir + "/iter_epoch_%d" % (epoch)
save_model(train_info_dict['train_program'], save_path)
return
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import time
import numpy as np
from copy import deepcopy
import json
# from paddle.fluid.contrib.model_stat import summary
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
from paddle import fluid
from ppocr.utils.utility import create_module
from ppocr.utils.utility import load_config, merge_config
import ppocr.data.det.reader_main as reader
from ppocr.utils.utility import ArgsParser
from ppocr.utils.check import check_gpu
from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model
from ppocr.utils.utility import initial_logger
logger = initial_logger()
from ppocr.utils.eval_utils import eval_det_run
def draw_det_res(dt_boxes, config, img_name, ino):
if len(dt_boxes) > 0:
img_set_path = config['TestReader']['img_set_dir']
img_path = img_set_path + img_name
import cv2
src_im = cv2.imread(img_path)
for box in dt_boxes:
box = box.astype(np.int32).reshape((-1, 1, 2))
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
cv2.imwrite("tmp%d.jpg" % ino, src_im)
def main():
config = load_config(FLAGS.config)
merge_config(FLAGS.opt)
print(config)
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu = config['Global']['use_gpu']
check_gpu(use_gpu)
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
det_model = create_module(config['Architecture']['function'])(params=config)
startup_prog = fluid.Program()
eval_prog = fluid.Program()
with fluid.program_guard(eval_prog, startup_prog):
with fluid.unique_name.guard():
eval_loader, eval_outputs = det_model(mode="test")
eval_fetch_list = [v.name for v in eval_outputs]
eval_prog = eval_prog.clone(for_test=True)
exe.run(startup_prog)
pretrain_weights = config['Global']['pretrain_weights']
if pretrain_weights is not None:
load_pretrain(exe, eval_prog, pretrain_weights)
# fluid.load(eval_prog, pretrain_weights)
# def if_exist(var):
# return os.path.exists(os.path.join(pretrain_weights, var.name))
# fluid.io.load_vars(exe, pretrain_weights, predicate=if_exist, main_program=eval_prog)
else:
logger.info("Not find pretrain_weights:%s" % pretrain_weights)
sys.exit(0)
# fluid.io.save_inference_model("./output/", feeded_var_names=['image'],
# target_vars=eval_outputs, executor=exe, main_program=eval_prog,
# model_filename="model", params_filename="params")
# sys.exit(-1)
metrics = eval_det_run(exe, eval_prog, eval_fetch_list, config, "test")
logger.info("metrics:{}".format(metrics))
logger.info("success!")
def test_reader():
config = load_config(FLAGS.config)
merge_config(FLAGS.opt)
print(config)
tmp_reader = reader.test_reader(config=config)
count = 0
print_count = 0
import time
starttime = time.time()
for data in tmp_reader():
count += len(data)
print_count += 1
if print_count % 10 == 0:
batch_time = (time.time() - starttime) / print_count
print("reader:", count, len(data), batch_time)
print("finish reader:", count)
print("success")
if __name__ == '__main__':
parser = ArgsParser()
FLAGS = parser.parse_args()
main()
# test_reader()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import time
import numpy as np
from copy import deepcopy
import json
# from paddle.fluid.contrib.model_stat import summary
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
from paddle import fluid
from ppocr.utils.utility import create_module
from ppocr.utils.utility import load_config, merge_config
import ppocr.data.det.reader_main as reader
from ppocr.utils.utility import ArgsParser
from ppocr.utils.check import check_gpu
from ppocr.utils.checkpoint import load_pretrain, load_checkpoint, save, save_model
from ppocr.utils.utility import initial_logger
logger = initial_logger()
from ppocr.utils.eval_utils import eval_det_run
def draw_det_res(dt_boxes, config, img_name, ino):
if len(dt_boxes) > 0:
img_set_path = config['TestReader']['img_set_dir']
img_path = img_set_path + img_name
import cv2
src_im = cv2.imread(img_path)
for box in dt_boxes:
box = box.astype(np.int32).reshape((-1, 1, 2))
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
cv2.imwrite("tmp%d.jpg" % ino, src_im)
def main():
config = load_config(FLAGS.config)
merge_config(FLAGS.opt)
print(config)
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu = config['Global']['use_gpu']
check_gpu(use_gpu)
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
det_model = create_module(config['Architecture']['function'])(params=config)
startup_prog = fluid.Program()
eval_prog = fluid.Program()
with fluid.program_guard(eval_prog, startup_prog):
with fluid.unique_name.guard():
eval_outputs = det_model(mode="test")
eval_fetch_list = [v.name for v in eval_outputs]
eval_prog = eval_prog.clone(for_test=True)
exe.run(startup_prog)
pretrain_weights = config['Global']['pretrain_weights']
if pretrain_weights is not None:
fluid.load(eval_prog, pretrain_weights)
else:
logger.info("Not find pretrain_weights:%s" % pretrain_weights)
sys.exit(0)
save_res_path = config['Global']['save_res_path']
with open(save_res_path, "wb") as fout:
test_reader = reader.test_reader(config=config)
tackling_num = 0
for data in test_reader():
img_num = len(data)
tackling_num = tackling_num + img_num
logger.info("tackling_num:%d", tackling_num)
img_list = []
ratio_list = []
img_name_list = []
for ino in range(img_num):
img_list.append(data[ino][0])
ratio_list.append(data[ino][1])
img_name_list.append(data[ino][2])
img_list = np.concatenate(img_list, axis=0)
outs = exe.run(eval_prog,\
feed={'image': img_list},\
fetch_list=eval_fetch_list)
global_params = config['Global']
postprocess_params = deepcopy(config["PostProcess"])
postprocess_params.update(global_params)
postprocess = create_module(postprocess_params['function'])\
(params=postprocess_params)
dt_boxes_list = postprocess(outs, ratio_list)
for ino in range(img_num):
dt_boxes = dt_boxes_list[ino]
img_name = img_name_list[ino]
dt_boxes_json = []
for box in dt_boxes:
tmp_json = {"transcription": ""}
tmp_json['points'] = box.tolist()
dt_boxes_json.append(tmp_json)
otstr = img_name + "\t" + json.dumps(dt_boxes_json) + "\n"
fout.write(otstr.encode())
#draw_det_res(dt_boxes, config, img_name, ino)
logger.info("success!")
def test_reader():
config = load_config(FLAGS.config)
merge_config(FLAGS.opt)
print(config)
tmp_reader = reader.test_reader(config=config)
count = 0
print_count = 0
import time
starttime = time.time()
for data in tmp_reader():
count += len(data)
print_count += 1
if print_count % 10 == 0:
batch_time = (time.time() - starttime) / print_count
print("reader:", count, len(data), batch_time)
print("finish reader:", count)
print("success")
if __name__ == '__main__':
parser = ArgsParser()
FLAGS = parser.parse_args()
main()
# test_reader()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import multiprocessing
import numpy as np
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be
# set before `import paddle`. Otherwise, it would
# not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
from paddle import fluid
from ppocr.utils.utility import load_config, merge_config
from ppocr.data.rec.reader_main import test_reader
from ppocr.utils.utility import ArgsParser
from ppocr.utils.character import CharacterOps, cal_predicts_accuracy
from ppocr.utils.check import check_gpu
from ppocr.utils.utility import create_module
from ppocr.utils.utility import initial_logger
logger = initial_logger()
def main():
config = load_config(FLAGS.config)
merge_config(FLAGS.opt)
char_ops = CharacterOps(config['Global'])
config['Global']['char_num'] = char_ops.get_char_num()
# check if set use_gpu=True in paddlepaddle cpu version
use_gpu = config['Global']['use_gpu']
check_gpu(use_gpu)
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
rec_model = create_module(config['Architecture']['function'])(params=config)
startup_prog = fluid.Program()
eval_prog = fluid.Program()
with fluid.program_guard(eval_prog, startup_prog):
with fluid.unique_name.guard():
eval_outputs = rec_model(mode="test")
eval_fetch_list = [v.name for v in eval_outputs]
eval_prog = eval_prog.clone(for_test=True)
exe.run(startup_prog)
pretrain_weights = config['Global']['pretrain_weights']
if pretrain_weights is not None:
fluid.load(eval_prog, pretrain_weights)
test_img_path = config['test_img_path']
image_shape = config['Global']['image_shape']
blobs = test_reader(image_shape, test_img_path)
predict = exe.run(program=eval_prog,
feed={"image": blobs},
fetch_list=eval_fetch_list,
return_numpy=False)
preds = np.array(predict[0])
if preds.shape[1] == 1:
preds = preds.reshape(-1)
preds_lod = predict[0].lod()[0]
preds_text = char_ops.decode(preds)
else:
end_pos = np.where(preds[0, :] == 1)[0]
if len(end_pos) <= 1:
preds_text = preds[0, 1:]
else:
preds_text = preds[0, 1:end_pos[1]]
preds_text = preds_text.reshape(-1)
preds_text = char_ops.decode(preds_text)
fluid.io.save_inference_model(
"./output/",
feeded_var_names=['image'],
target_vars=eval_outputs,
executor=exe,
main_program=eval_prog,
model_filename="model",
params_filename="params")
print(preds)
print(preds_text)
if __name__ == '__main__':
parser = ArgsParser()
FLAGS = parser.parse_args()
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment