Commit d9c5148f authored by Leif's avatar Leif
Browse files

Merge remote-tracking branch 'origin/dygraph' into dygraph

parents 3998e131 b7c8bfb4
......@@ -31,8 +31,6 @@ from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from ppocr.data import create_operators, transform
from ppocr.postprocess import build_post_process
# import tools.infer.benchmark_utils as benchmark_utils
logger = get_logger()
......@@ -100,6 +98,24 @@ class TextDetector(object):
self.predictor, self.input_tensor, self.output_tensors, self.config = utility.create_predictor(
args, 'det', logger)
if args.benchmark:
import auto_log
pid = os.getpid()
self.autolog = auto_log.AutoLogger(
model_name="det",
model_precision=args.precision,
batch_size=1,
data_shape="dynamic",
save_path=args.save_log_path,
inference_config=self.config,
pids=pid,
process_name=None,
gpu_ids=0,
time_keys=[
'preprocess_time', 'inference_time', 'postprocess_time'
],
warmup=10)
def order_points_clockwise(self, pts):
"""
reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
......@@ -158,6 +174,10 @@ class TextDetector(object):
data = {'image': img}
st = time.time()
if args.benchmark:
self.autolog.times.start()
data = transform(data, self.preprocess_op)
img, shape_list = data
if img is None:
......@@ -166,12 +186,17 @@ class TextDetector(object):
shape_list = np.expand_dims(shape_list, axis=0)
img = img.copy()
if args.benchmark:
self.autolog.times.stamp()
self.input_tensor.copy_from_cpu(img)
self.predictor.run()
outputs = []
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
if args.benchmark:
self.autolog.times.stamp()
preds = {}
if self.det_algorithm == "EAST":
......@@ -187,7 +212,7 @@ class TextDetector(object):
else:
raise NotImplementedError
self.predictor.try_shrink_memory()
#self.predictor.try_shrink_memory()
post_result = self.postprocess_op(preds, shape_list)
dt_boxes = post_result[0]['points']
if self.det_algorithm == "SAST" and self.det_sast_polygon:
......@@ -195,6 +220,8 @@ class TextDetector(object):
else:
dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
if args.benchmark:
self.autolog.times.end(stamp=True)
et = time.time()
return dt_boxes, et - st
......@@ -212,8 +239,6 @@ if __name__ == "__main__":
for i in range(10):
res = text_detector(img)
cpu_mem, gpu_mem, gpu_util = 0, 0, 0
if not os.path.exists(draw_img_save):
os.makedirs(draw_img_save)
for image_file in image_file_list:
......@@ -237,3 +262,6 @@ if __name__ == "__main__":
"det_res_{}".format(img_name_pure))
cv2.imwrite(img_path, src_im)
logger.info("The visualized image saved in {}".format(img_path))
if args.benchmark:
text_detector.autolog.report()
......@@ -28,7 +28,6 @@ import traceback
import paddle
import tools.infer.utility as utility
import tools.infer.benchmark_utils as benchmark_utils
from ppocr.postprocess import build_post_process
from ppocr.utils.logging import get_logger
from ppocr.utils.utility import get_image_file_list, check_and_read_gif
......@@ -66,8 +65,6 @@ class TextRecognizer(object):
self.predictor, self.input_tensor, self.output_tensors, self.config = \
utility.create_predictor(args, 'rec', logger)
self.rec_times = utility.Timer()
def resize_norm_img(self, img, max_wh_ratio):
imgC, imgH, imgW = self.rec_image_shape
assert imgC == img.shape[2]
......@@ -168,14 +165,13 @@ class TextRecognizer(object):
width_list.append(img.shape[1] / float(img.shape[0]))
# Sorting can speed up the recognition process
indices = np.argsort(np.array(width_list))
self.rec_times.total_time.start()
rec_res = [['', 0.0]] * img_num
batch_num = self.rec_batch_num
st = time.time()
for beg_img_no in range(0, img_num, batch_num):
end_img_no = min(img_num, beg_img_no + batch_num)
norm_img_batch = []
max_wh_ratio = 0
self.rec_times.preprocess_time.start()
for ino in range(beg_img_no, end_img_no):
h, w = img_list[indices[ino]].shape[0:2]
wh_ratio = w * 1.0 / h
......@@ -216,23 +212,18 @@ class TextRecognizer(object):
gsrm_slf_attn_bias1_list,
gsrm_slf_attn_bias2_list,
]
self.rec_times.preprocess_time.end()
self.rec_times.inference_time.start()
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[
i])
input_tensor.copy_from_cpu(inputs[i])
self.predictor.run()
self.rec_times.inference_time.end()
outputs = []
for output_tensor in self.output_tensors:
output = output_tensor.copy_to_cpu()
outputs.append(output)
preds = {"predict": outputs[2]}
else:
self.rec_times.preprocess_time.end()
self.rec_times.inference_time.start()
self.input_tensor.copy_from_cpu(norm_img_batch)
self.predictor.run()
......@@ -241,15 +232,11 @@ class TextRecognizer(object):
output = output_tensor.copy_to_cpu()
outputs.append(output)
preds = outputs[0]
self.rec_times.inference_time.end()
self.rec_times.postprocess_time.start()
rec_result = self.postprocess_op(preds)
for rno in range(len(rec_result)):
rec_res[indices[beg_img_no + rno]] = rec_result[rno]
self.rec_times.postprocess_time.end()
self.rec_times.img_num += int(norm_img_batch.shape[0])
self.rec_times.total_time.end()
return rec_res, self.rec_times.total_time.value()
return rec_res, time.time() - st
def main(args):
......@@ -278,12 +265,6 @@ def main(args):
img_list.append(img)
try:
rec_res, _ = text_recognizer(img_list)
if args.benchmark:
cm, gm, gu = utility.get_current_memory_mb(0)
cpu_mem += cm
gpu_mem += gm
gpu_util += gu
count += 1
except Exception as E:
logger.info(traceback.format_exc())
......@@ -292,38 +273,6 @@ def main(args):
for ino in range(len(img_list)):
logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
rec_res[ino]))
if args.benchmark:
mems = {
'cpu_rss_mb': cpu_mem / count,
'gpu_rss_mb': gpu_mem / count,
'gpu_util': gpu_util * 100 / count
}
else:
mems = None
logger.info("The predict time about recognizer module is as follows: ")
rec_time_dict = text_recognizer.rec_times.report(average=True)
rec_model_name = args.rec_model_dir
if args.benchmark:
# construct log information
model_info = {
'model_name': args.rec_model_dir.split('/')[-1],
'precision': args.precision
}
data_info = {
'batch_size': args.rec_batch_num,
'shape': 'dynamic_shape',
'data_num': rec_time_dict['img_num']
}
perf_info = {
'preprocess_time_s': rec_time_dict['preprocess_time'],
'inference_time_s': rec_time_dict['inference_time'],
'postprocess_time_s': rec_time_dict['postprocess_time'],
'total_time_s': rec_time_dict['total_time']
}
benchmark_log = benchmark_utils.PaddleInferBenchmark(
text_recognizer.config, model_info, data_info, perf_info, mems)
benchmark_log("Rec")
if __name__ == "__main__":
......
......@@ -33,8 +33,7 @@ import tools.infer.predict_det as predict_det
import tools.infer.predict_cls as predict_cls
from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from ppocr.utils.logging import get_logger
from tools.infer.utility import draw_ocr_box_txt, get_current_memory_mb
import tools.infer.benchmark_utils as benchmark_utils
from tools.infer.utility import draw_ocr_box_txt, get_rotate_crop_image
logger = get_logger()
......@@ -50,39 +49,6 @@ class TextSystem(object):
if self.use_angle_cls:
self.text_classifier = predict_cls.TextClassifier(args)
def get_rotate_crop_image(self, img, points):
'''
img_height, img_width = img.shape[0:2]
left = int(np.min(points[:, 0]))
right = int(np.max(points[:, 0]))
top = int(np.min(points[:, 1]))
bottom = int(np.max(points[:, 1]))
img_crop = img[top:bottom, left:right, :].copy()
points[:, 0] = points[:, 0] - left
points[:, 1] = points[:, 1] - top
'''
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
pts_std = np.float32([[0, 0], [img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]])
M = cv2.getPerspectiveTransform(points, pts_std)
dst_img = cv2.warpPerspective(
img,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC)
dst_img_height, dst_img_width = dst_img.shape[0:2]
if dst_img_height * 1.0 / dst_img_width >= 1.5:
dst_img = np.rot90(dst_img)
return dst_img
def print_draw_crop_rec_res(self, img_crop_list, rec_res):
bbox_num = len(img_crop_list)
for bno in range(bbox_num):
......@@ -103,7 +69,7 @@ class TextSystem(object):
for bno in range(len(dt_boxes)):
tmp_box = copy.deepcopy(dt_boxes[bno])
img_crop = self.get_rotate_crop_image(ori_im, tmp_box)
img_crop = get_rotate_crop_image(ori_im, tmp_box)
img_crop_list.append(img_crop)
if self.use_angle_cls and cls:
img_crop_list, angle_list, elapse = self.text_classifier(
......@@ -158,7 +124,7 @@ def main(args):
img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8)
for i in range(10):
res = text_sys(img)
total_time = 0
cpu_mem, gpu_mem, gpu_util = 0, 0, 0
_st = time.time()
......@@ -175,12 +141,6 @@ def main(args):
dt_boxes, rec_res = text_sys(img)
elapse = time.time() - starttime
total_time += elapse
if args.benchmark and idx % 20 == 0:
cm, gm, gu = get_current_memory_mb(0)
cpu_mem += cm
gpu_mem += gm
gpu_util += gu
count += 1
logger.info(
str(idx) + " Predict time of %s: %.3fs" % (image_file, elapse))
......@@ -215,61 +175,6 @@ def main(args):
logger.info("\nThe predict total time is {}".format(total_time))
img_num = text_sys.text_detector.det_times.img_num
if args.benchmark:
mems = {
'cpu_rss_mb': cpu_mem / count,
'gpu_rss_mb': gpu_mem / count,
'gpu_util': gpu_util * 100 / count
}
else:
mems = None
det_time_dict = text_sys.text_detector.det_times.report(average=True)
rec_time_dict = text_sys.text_recognizer.rec_times.report(average=True)
det_model_name = args.det_model_dir
rec_model_name = args.rec_model_dir
# construct det log information
model_info = {
'model_name': args.det_model_dir.split('/')[-1],
'precision': args.precision
}
data_info = {
'batch_size': 1,
'shape': 'dynamic_shape',
'data_num': det_time_dict['img_num']
}
perf_info = {
'preprocess_time_s': det_time_dict['preprocess_time'],
'inference_time_s': det_time_dict['inference_time'],
'postprocess_time_s': det_time_dict['postprocess_time'],
'total_time_s': det_time_dict['total_time']
}
benchmark_log = benchmark_utils.PaddleInferBenchmark(
text_sys.text_detector.config, model_info, data_info, perf_info, mems,
args.save_log_path)
benchmark_log("Det")
# construct rec log information
model_info = {
'model_name': args.rec_model_dir.split('/')[-1],
'precision': args.precision
}
data_info = {
'batch_size': args.rec_batch_num,
'shape': 'dynamic_shape',
'data_num': rec_time_dict['img_num']
}
perf_info = {
'preprocess_time_s': rec_time_dict['preprocess_time'],
'inference_time_s': rec_time_dict['inference_time'],
'postprocess_time_s': rec_time_dict['postprocess_time'],
'total_time_s': rec_time_dict['total_time']
}
benchmark_log = benchmark_utils.PaddleInferBenchmark(
text_sys.text_recognizer.config, model_info, data_info, perf_info, mems,
args.save_log_path)
benchmark_log("Rec")
if __name__ == "__main__":
......
......@@ -37,6 +37,7 @@ def init_args():
parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--ir_optim", type=str2bool, default=True)
parser.add_argument("--use_tensorrt", type=str2bool, default=False)
parser.add_argument("--min_subgraph_size", type=int, default=3)
parser.add_argument("--precision", type=str, default="fp32")
parser.add_argument("--gpu_mem", type=int, default=500)
......@@ -124,76 +125,6 @@ def parse_args():
return parser.parse_args()
class Times(object):
def __init__(self):
self.time = 0.
self.st = 0.
self.et = 0.
def start(self):
self.st = time.time()
def end(self, accumulative=True):
self.et = time.time()
if accumulative:
self.time += self.et - self.st
else:
self.time = self.et - self.st
def reset(self):
self.time = 0.
self.st = 0.
self.et = 0.
def value(self):
return round(self.time, 4)
class Timer(Times):
def __init__(self):
super(Timer, self).__init__()
self.total_time = Times()
self.preprocess_time = Times()
self.inference_time = Times()
self.postprocess_time = Times()
self.img_num = 0
def info(self, average=False):
logger.info("----------------------- Perf info -----------------------")
logger.info("total_time: {}, img_num: {}".format(self.total_time.value(
), self.img_num))
preprocess_time = round(self.preprocess_time.value() / self.img_num,
4) if average else self.preprocess_time.value()
postprocess_time = round(
self.postprocess_time.value() / self.img_num,
4) if average else self.postprocess_time.value()
inference_time = round(self.inference_time.value() / self.img_num,
4) if average else self.inference_time.value()
average_latency = self.total_time.value() / self.img_num
logger.info("average_latency(ms): {:.2f}, QPS: {:2f}".format(
average_latency * 1000, 1 / average_latency))
logger.info(
"preprocess_latency(ms): {:.2f}, inference_latency(ms): {:.2f}, postprocess_latency(ms): {:.2f}".
format(preprocess_time * 1000, inference_time * 1000,
postprocess_time * 1000))
def report(self, average=False):
dic = {}
dic['preprocess_time'] = round(
self.preprocess_time.value() / self.img_num,
4) if average else self.preprocess_time.value()
dic['postprocess_time'] = round(
self.postprocess_time.value() / self.img_num,
4) if average else self.postprocess_time.value()
dic['inference_time'] = round(
self.inference_time.value() / self.img_num,
4) if average else self.inference_time.value()
dic['img_num'] = self.img_num
dic['total_time'] = round(self.total_time.value(), 4)
return dic
def create_predictor(args, mode, logger):
if mode == "det":
model_dir = args.det_model_dir
......@@ -212,11 +143,10 @@ def create_predictor(args, mode, logger):
model_file_path = model_dir + "/inference.pdmodel"
params_file_path = model_dir + "/inference.pdiparams"
if not os.path.exists(model_file_path):
logger.info("not find model file path {}".format(model_file_path))
sys.exit(0)
raise ValueError("not find model file path {}".format(model_file_path))
if not os.path.exists(params_file_path):
logger.info("not find params file path {}".format(params_file_path))
sys.exit(0)
raise ValueError("not find params file path {}".format(
params_file_path))
config = inference.Config(model_file_path, params_file_path)
......@@ -236,14 +166,17 @@ def create_predictor(args, mode, logger):
config.enable_tensorrt_engine(
precision_mode=inference.PrecisionType.Float32,
max_batch_size=args.max_batch_size,
min_subgraph_size=3) # skip the minmum trt subgraph
if mode == "det" and "mobile" in model_file_path:
min_subgraph_size=args.min_subgraph_size)
# skip the minmum trt subgraph
if mode == "det":
min_input_shape = {
"x": [1, 3, 50, 50],
"conv2d_92.tmp_0": [1, 96, 20, 20],
"conv2d_91.tmp_0": [1, 96, 10, 10],
"conv2d_59.tmp_0": [1, 96, 20, 20],
"nearest_interp_v2_1.tmp_0": [1, 96, 10, 10],
"nearest_interp_v2_2.tmp_0": [1, 96, 20, 20],
"conv2d_124.tmp_0": [1, 96, 20, 20],
"nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
"nearest_interp_v2_4.tmp_0": [1, 24, 20, 20],
"nearest_interp_v2_5.tmp_0": [1, 24, 20, 20],
......@@ -254,7 +187,9 @@ def create_predictor(args, mode, logger):
"x": [1, 3, 2000, 2000],
"conv2d_92.tmp_0": [1, 96, 400, 400],
"conv2d_91.tmp_0": [1, 96, 200, 200],
"conv2d_59.tmp_0": [1, 96, 400, 400],
"nearest_interp_v2_1.tmp_0": [1, 96, 200, 200],
"conv2d_124.tmp_0": [1, 256, 400, 400],
"nearest_interp_v2_2.tmp_0": [1, 96, 400, 400],
"nearest_interp_v2_3.tmp_0": [1, 24, 400, 400],
"nearest_interp_v2_4.tmp_0": [1, 24, 400, 400],
......@@ -266,39 +201,16 @@ def create_predictor(args, mode, logger):
"x": [1, 3, 640, 640],
"conv2d_92.tmp_0": [1, 96, 160, 160],
"conv2d_91.tmp_0": [1, 96, 80, 80],
"conv2d_59.tmp_0": [1, 96, 160, 160],
"nearest_interp_v2_1.tmp_0": [1, 96, 80, 80],
"nearest_interp_v2_2.tmp_0": [1, 96, 160, 160],
"conv2d_124.tmp_0": [1, 256, 160, 160],
"nearest_interp_v2_3.tmp_0": [1, 24, 160, 160],
"nearest_interp_v2_4.tmp_0": [1, 24, 160, 160],
"nearest_interp_v2_5.tmp_0": [1, 24, 160, 160],
"elementwise_add_7": [1, 56, 40, 40],
"nearest_interp_v2_0.tmp_0": [1, 96, 40, 40]
}
if mode == "det" and "server" in model_file_path:
min_input_shape = {
"x": [1, 3, 50, 50],
"conv2d_59.tmp_0": [1, 96, 20, 20],
"nearest_interp_v2_2.tmp_0": [1, 96, 20, 20],
"nearest_interp_v2_3.tmp_0": [1, 24, 20, 20],
"nearest_interp_v2_4.tmp_0": [1, 24, 20, 20],
"nearest_interp_v2_5.tmp_0": [1, 24, 20, 20]
}
max_input_shape = {
"x": [1, 3, 2000, 2000],
"conv2d_59.tmp_0": [1, 96, 400, 400],
"nearest_interp_v2_2.tmp_0": [1, 96, 400, 400],
"nearest_interp_v2_3.tmp_0": [1, 24, 400, 400],
"nearest_interp_v2_4.tmp_0": [1, 24, 400, 400],
"nearest_interp_v2_5.tmp_0": [1, 24, 400, 400]
}
opt_input_shape = {
"x": [1, 3, 640, 640],
"conv2d_59.tmp_0": [1, 96, 160, 160],
"nearest_interp_v2_2.tmp_0": [1, 96, 160, 160],
"nearest_interp_v2_3.tmp_0": [1, 24, 160, 160],
"nearest_interp_v2_4.tmp_0": [1, 24, 160, 160],
"nearest_interp_v2_5.tmp_0": [1, 24, 160, 160]
}
elif mode == "rec":
min_input_shape = {"x": [args.rec_batch_num, 3, 32, 10]}
max_input_shape = {"x": [args.rec_batch_num, 3, 32, 2000]}
......@@ -328,11 +240,11 @@ def create_predictor(args, mode, logger):
# enable memory optim
config.enable_memory_optim()
config.disable_glog_info()
#config.disable_glog_info()
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
if mode == 'table':
config.delete_pass("fc_fuse_pass") # not supported for table
config.delete_pass("fc_fuse_pass") # not supported for table
config.switch_use_feed_fetch_ops(False)
config.switch_ir_optim(True)
......@@ -597,29 +509,39 @@ def draw_boxes(image, boxes, scores=None, drop_score=0.5):
return image
def get_current_memory_mb(gpu_id=None):
"""
It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
And this function Current program is time-consuming.
"""
import pynvml
import psutil
import GPUtil
pid = os.getpid()
p = psutil.Process(pid)
info = p.memory_full_info()
cpu_mem = info.uss / 1024. / 1024.
gpu_mem = 0
gpu_percent = 0
if gpu_id is not None:
GPUs = GPUtil.getGPUs()
gpu_load = GPUs[gpu_id].load
gpu_percent = gpu_load
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_mem = meminfo.used / 1024. / 1024.
return round(cpu_mem, 4), round(gpu_mem, 4), round(gpu_percent, 4)
def get_rotate_crop_image(img, points):
'''
img_height, img_width = img.shape[0:2]
left = int(np.min(points[:, 0]))
right = int(np.max(points[:, 0]))
top = int(np.min(points[:, 1]))
bottom = int(np.max(points[:, 1]))
img_crop = img[top:bottom, left:right, :].copy()
points[:, 0] = points[:, 0] - left
points[:, 1] = points[:, 1] - top
'''
assert len(points) == 4, "shape of points must be 4*2"
img_crop_width = int(
max(
np.linalg.norm(points[0] - points[1]),
np.linalg.norm(points[2] - points[3])))
img_crop_height = int(
max(
np.linalg.norm(points[0] - points[3]),
np.linalg.norm(points[1] - points[2])))
pts_std = np.float32([[0, 0], [img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]])
M = cv2.getPerspectiveTransform(points, pts_std)
dst_img = cv2.warpPerspective(
img,
M, (img_crop_width, img_crop_height),
borderMode=cv2.BORDER_REPLICATE,
flags=cv2.INTER_CUBIC)
dst_img_height, dst_img_width = dst_img.shape[0:2]
if dst_img_height * 1.0 / dst_img_width >= 1.5:
dst_img = np.rot90(dst_img)
return dst_img
if __name__ == '__main__':
......
......@@ -35,7 +35,7 @@ from ppocr.losses import build_loss
from ppocr.optimizer import build_optimizer
from ppocr.postprocess import build_post_process
from ppocr.metrics import build_metric
from ppocr.utils.save_load import init_model
from ppocr.utils.save_load import init_model, load_dygraph_params
import tools.program as program
dist.get_world_size()
......@@ -97,7 +97,7 @@ def main(config, device, logger, vdl_writer):
# build metric
eval_class = build_metric(config['Metric'])
# load pretrain model
pre_best_model_dict = init_model(config, model, optimizer)
pre_best_model_dict = load_dygraph_params(config, model, logger, optimizer)
logger.info('train dataloader has {} iters'.format(len(train_dataloader)))
if valid_dataloader is not None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment