"megatron/data/ict_dataset.py" did not exist on "efcee15843f93e6cb9dd6451653c2dcabc1d7ae2"
Commit fccfdfa5 authored by dlyrm's avatar dlyrm
Browse files

update code

parent dcc7bf4f
Pipeline #681 canceled with stages
metric: COCO
num_classes: 80
# Datset configuration
TrainDataset:
!COCODataSet
image_dir: train2017
anno_path: annotations/instances_train2017.json
dataset_dir: dataset/coco/
EvalDataset:
!COCODataSet
image_dir: val2017
anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco/
worker_num: 0
# preprocess reader in test
TestReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [640, 640], keep_ratio: True, interp: 1}
- Pad: {size: [640, 640], fill_value: [114., 114., 114.]}
- Permute: {}
batch_size: 1
Global:
reader_config: configs/yolov5_reader.yml
include_nms: True
Evaluation: True
model_dir: ./yolov5_s_300e_coco
model_filename: model.pdmodel
params_filename: model.pdiparams
Distillation:
alpha: 1.0
loss: soft_label
QuantAware:
use_pact: true
activation_quantize_type: 'moving_average_abs_max'
quantize_op_types:
- conv2d
- depthwise_conv2d
TrainConfig:
train_iter: 3000
eval_iter: 1000
learning_rate: 0.00001
optimizer_builder:
optimizer:
type: SGD
weight_decay: 4.0e-05
target_metric: 0.365
Global:
reader_config: configs/yolov5_reader.yml
include_nms: True
Evaluation: True
model_dir: ./yolov6mt_s_400e_coco
model_filename: model.pdmodel
params_filename: model.pdiparams
Distillation:
alpha: 1.0
loss: soft_label
QuantAware:
activation_quantize_type: 'moving_average_abs_max'
quantize_op_types:
- conv2d
- depthwise_conv2d
TrainConfig:
train_iter: 8000
eval_iter: 1000
learning_rate:
type: CosineAnnealingDecay
learning_rate: 0.00003
T_max: 8000
optimizer_builder:
optimizer:
type: SGD
weight_decay: 0.00004
Global:
reader_config: configs/yolov5_reader.yml
include_nms: True
Evaluation: True
model_dir: ./yolov7_l_300e_coco
model_filename: model.pdmodel
params_filename: model.pdiparams
Distillation:
alpha: 1.0
loss: soft_label
QuantAware:
activation_quantize_type: 'moving_average_abs_max'
quantize_op_types:
- conv2d
- depthwise_conv2d
TrainConfig:
train_iter: 8000
eval_iter: 1000
learning_rate:
type: CosineAnnealingDecay
learning_rate: 0.00003
T_max: 8000
optimizer_builder:
optimizer:
type: SGD
weight_decay: 0.00004
\ No newline at end of file
metric: COCO
num_classes: 80
# Dataset configuration
TrainDataset:
!COCODataSet
image_dir: train2017
anno_path: annotations/instances_train2017.json
dataset_dir: dataset/coco/
EvalDataset:
!COCODataSet
image_dir: val2017
anno_path: annotations/instances_val2017.json
dataset_dir: dataset/coco/
worker_num: 0
# preprocess reader in test
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [640, 640], keep_ratio: True, interp: 1}
- Pad: {size: [640, 640], fill_value: [114., 114., 114.]}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 4
Global:
reader_config: configs/yolov8_reader.yml
include_nms: True
Evaluation: True
model_dir: ./yolov8_s_500e_coco_trt_nms/
model_filename: model.pdmodel
params_filename: model.pdiparams
Distillation:
alpha: 1.0
loss: soft_label
QuantAware:
onnx_format: true
activation_quantize_type: 'moving_average_abs_max'
quantize_op_types:
- conv2d
- depthwise_conv2d
TrainConfig:
train_iter: 8000
eval_iter: 1000
learning_rate:
type: CosineAnnealingDecay
learning_rate: 0.00003
T_max: 10000
optimizer_builder:
optimizer:
type: SGD
weight_decay: 4.0e-05
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import numpy as np
import argparse
import paddle
from ppdet.core.workspace import load_config, merge_config
from ppdet.core.workspace import create
from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval
from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
from post_process import PPYOLOEPostProcess
def argsparser():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--config_path',
type=str,
default=None,
help="path of compression strategy config.",
required=True)
parser.add_argument(
'--devices',
type=str,
default='gpu',
help="which device used to compress.")
return parser
def reader_wrapper(reader, input_list):
def gen():
for data in reader:
in_dict = {}
if isinstance(input_list, list):
for input_name in input_list:
in_dict[input_name] = data[input_name]
elif isinstance(input_list, dict):
for input_name in input_list.keys():
in_dict[input_list[input_name]] = data[input_name]
yield in_dict
return gen
def convert_numpy_data(data, metric):
data_all = {}
data_all = {k: np.array(v) for k, v in data.items()}
if isinstance(metric, VOCMetric):
for k, v in data_all.items():
if not isinstance(v[0], np.ndarray):
tmp_list = []
for t in v:
tmp_list.append(np.array(t))
data_all[k] = np.array(tmp_list)
else:
data_all = {k: np.array(v) for k, v in data.items()}
return data_all
def eval():
place = paddle.CUDAPlace(0) if FLAGS.devices == 'gpu' else paddle.CPUPlace()
exe = paddle.static.Executor(place)
val_program, feed_target_names, fetch_targets = paddle.static.load_inference_model(
global_config["model_dir"].rstrip('/'),
exe,
model_filename=global_config["model_filename"],
params_filename=global_config["params_filename"])
print('Loaded model from: {}'.format(global_config["model_dir"]))
metric = global_config['metric']
for batch_id, data in enumerate(val_loader):
data_all = convert_numpy_data(data, metric)
data_input = {}
for k, v in data.items():
if isinstance(global_config['input_list'], list):
if k in global_config['input_list']:
data_input[k] = np.array(v)
elif isinstance(global_config['input_list'], dict):
if k in global_config['input_list'].keys():
data_input[global_config['input_list'][k]] = np.array(v)
outs = exe.run(val_program,
feed=data_input,
fetch_list=fetch_targets,
return_numpy=False)
res = {}
if 'arch' in global_config and global_config['arch'] == 'PPYOLOE':
postprocess = PPYOLOEPostProcess(
score_threshold=0.01, nms_threshold=0.6)
res = postprocess(np.array(outs[0]), data_all['scale_factor'])
else:
for out in outs:
v = np.array(out)
if len(v.shape) > 1:
res['bbox'] = v
else:
res['bbox_num'] = v
metric.update(data_all, res)
if batch_id % 100 == 0:
print('Eval iter:', batch_id)
metric.accumulate()
metric.log()
metric.reset()
def main():
global global_config
all_config = load_slim_config(FLAGS.config_path)
assert "Global" in all_config, "Key 'Global' not found in config file."
global_config = all_config["Global"]
reader_cfg = load_config(global_config['reader_config'])
dataset = reader_cfg['EvalDataset']
global val_loader
val_loader = create('EvalReader')(reader_cfg['EvalDataset'],
reader_cfg['worker_num'],
return_list=True)
metric = None
if reader_cfg['metric'] == 'COCO':
clsid2catid = {v: k for k, v in dataset.catid2clsid.items()}
anno_file = dataset.get_anno()
metric = COCOMetric(
anno_file=anno_file, clsid2catid=clsid2catid, IouType='bbox')
elif reader_cfg['metric'] == 'VOC':
metric = VOCMetric(
label_list=dataset.get_label_list(),
class_num=reader_cfg['num_classes'],
map_type=reader_cfg['map_type'])
elif reader_cfg['metric'] == 'KeyPointTopDownCOCOEval':
anno_file = dataset.get_anno()
metric = KeyPointTopDownCOCOEval(anno_file,
len(dataset), 17, 'output_eval')
else:
raise ValueError("metric currently only supports COCO and VOC.")
global_config['metric'] = metric
eval()
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
paddle.set_device(FLAGS.devices)
main()
#opyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import argparse
import time
import sys
import cv2
import numpy as np
import paddle
from paddle.inference import Config
from paddle.inference import create_predictor
from ppdet.core.workspace import load_config, create
from ppdet.metrics import COCOMetric
from post_process import PPYOLOEPostProcess
def argsparser():
"""
argsparser func
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_path", type=str, help="inference model filepath")
parser.add_argument(
"--image_file",
type=str,
default=None,
help="image path, if set image_file, it will not eval coco.")
parser.add_argument(
"--reader_config",
type=str,
default=None,
help="path of datset and reader config.")
parser.add_argument(
"--benchmark",
type=bool,
default=False,
help="Whether run benchmark or not.")
parser.add_argument(
"--use_trt",
type=bool,
default=False,
help="Whether use TensorRT or not.")
parser.add_argument(
"--precision",
type=str,
default="paddle",
help="mode of running(fp32/fp16/int8)")
parser.add_argument(
"--device",
type=str,
default="GPU",
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is GPU",
)
parser.add_argument(
"--use_dynamic_shape",
type=bool,
default=True,
help="Whether use dynamic shape or not.")
parser.add_argument(
"--use_mkldnn",
type=bool,
default=False,
help="Whether use mkldnn or not.")
parser.add_argument(
"--cpu_threads", type=int, default=10, help="Num of cpu threads.")
parser.add_argument("--img_shape", type=int, default=640, help="input_size")
parser.add_argument(
'--include_nms',
type=bool,
default=True,
help="Whether include nms or not.")
return parser
CLASS_LABEL = [
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite',
'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon',
'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant',
'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush'
]
def generate_scale(im, target_shape, keep_ratio=True):
"""
Args:
im (np.ndarray): image (np.ndarray)
Returns:
im_scale_x: the resize ratio of X
im_scale_y: the resize ratio of Y
"""
origin_shape = im.shape[:2]
if keep_ratio:
im_size_min = np.min(origin_shape)
im_size_max = np.max(origin_shape)
target_size_min = np.min(target_shape)
target_size_max = np.max(target_shape)
im_scale = float(target_size_min) / float(im_size_min)
if np.round(im_scale * im_size_max) > target_size_max:
im_scale = float(target_size_max) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
else:
resize_h, resize_w = target_shape
im_scale_y = resize_h / float(origin_shape[0])
im_scale_x = resize_w / float(origin_shape[1])
return im_scale_y, im_scale_x
def image_preprocess(img_path, target_shape):
"""
image_preprocess func
"""
img = cv2.imread(img_path)
im_scale_y, im_scale_x = generate_scale(img, target_shape, keep_ratio=False)
img = cv2.resize(
img, (target_shape[0], target_shape[0]),
interpolation=cv2.INTER_LANCZOS4)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = np.transpose(img, [2, 0, 1]) / 255
img = np.expand_dims(img, 0)
img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
img -= img_mean
img /= img_std
scale_factor = np.array([[im_scale_y, im_scale_x]])
return img.astype(np.float32), scale_factor.astype(np.float32)
def get_color_map_list(num_classes):
"""
get_color_map_list func
"""
color_map = num_classes * [0, 0, 0]
for i in range(0, num_classes):
j = 0
lab = i
while lab:
color_map[i * 3] |= ((lab >> 0) & 1) << (7 - j)
color_map[i * 3 + 1] |= ((lab >> 1) & 1) << (7 - j)
color_map[i * 3 + 2] |= ((lab >> 2) & 1) << (7 - j)
j += 1
lab >>= 3
color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
return color_map
def draw_box(image_file, results, class_label, threshold=0.5):
"""
draw_box func
"""
srcimg = cv2.imread(image_file, 1)
for i in range(len(results)):
color_list = get_color_map_list(len(class_label))
clsid2color = {}
classid, conf = int(results[i, 0]), results[i, 1]
if conf < threshold:
continue
xmin, ymin, xmax, ymax = int(results[i, 2]), int(results[i, 3]), int(
results[i, 4]), int(results[i, 5])
if classid not in clsid2color:
clsid2color[classid] = color_list[classid]
color = tuple(clsid2color[classid])
cv2.rectangle(srcimg, (xmin, ymin), (xmax, ymax), color, thickness=2)
print(class_label[classid] + ": " + str(round(conf, 3)))
cv2.putText(
srcimg,
class_label[classid] + ":" + str(round(conf, 3)),
(xmin, ymin - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,
(0, 255, 0),
thickness=2, )
return srcimg
def load_predictor(
model_dir,
precision="fp32",
use_trt=False,
use_mkldnn=False,
batch_size=1,
device="CPU",
min_subgraph_size=3,
use_dynamic_shape=False,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
cpu_threads=1, ):
"""set AnalysisConfig, generate AnalysisPredictor
Args:
model_dir (str): root path of __model__ and __params__
precision (str): mode of running(fp32/fp16/int8)
use_trt (bool): whether use TensorRT or not.
use_mkldnn (bool): whether use MKLDNN or not in CPU.
device (str): Choose the device you want to run, it can be: CPU/GPU, default is CPU
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
Returns:
predictor (PaddlePredictor): AnalysisPredictor
Raises:
ValueError: predict by TensorRT need device == 'GPU'.
"""
rerun_flag = False
if device != "GPU" and use_trt:
raise ValueError(
"Predict by TensorRT mode: {}, expect device=='GPU', but device == {}".
format(precision, device))
config = Config(
os.path.join(model_dir, "model.pdmodel"),
os.path.join(model_dir, "model.pdiparams"))
if device == "GPU":
# initial GPU memory(M), device ID
config.enable_use_gpu(200, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads)
config.switch_ir_optim()
if use_mkldnn:
config.enable_mkldnn()
if precision == "int8":
config.enable_mkldnn_int8(
{"conv2d", "depthwise_conv2d", "transpose2", "pool2d"})
precision_map = {
"int8": Config.Precision.Int8,
"fp32": Config.Precision.Float32,
"fp16": Config.Precision.Half,
}
if precision in precision_map.keys() and use_trt:
config.enable_tensorrt_engine(
workspace_size=(1 << 25) * batch_size,
max_batch_size=batch_size,
min_subgraph_size=min_subgraph_size,
precision_mode=precision_map[precision],
use_static=True,
use_calib_mode=False, )
if use_dynamic_shape:
dynamic_shape_file = os.path.join(FLAGS.model_path,
"dynamic_shape.txt")
if os.path.exists(dynamic_shape_file):
config.enable_tuned_tensorrt_dynamic_shape(dynamic_shape_file,
True)
print("trt set dynamic shape done!")
else:
config.collect_shape_range_info(dynamic_shape_file)
print("Start collect dynamic shape...")
rerun_flag = True
# enable shared memory
config.enable_memory_optim()
predictor = create_predictor(config)
return predictor, rerun_flag
def get_current_memory_mb():
"""
It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
And this function Current program is time-consuming.
"""
try:
pkg.require('pynvml')
except:
from pip._internal import main
main(['install', 'pynvml'])
try:
pkg.require('psutil')
except:
from pip._internal import main
main(['install', 'psutil'])
try:
pkg.require('GPUtil')
except:
from pip._internal import main
main(['install', 'GPUtil'])
import pynvml
import psutil
import GPUtil
gpu_id = int(os.environ.get("CUDA_VISIBLE_DEVICES", 0))
pid = os.getpid()
p = psutil.Process(pid)
info = p.memory_full_info()
cpu_mem = info.uss / 1024.0 / 1024.0
gpu_mem = 0
gpu_percent = 0
gpus = GPUtil.getGPUs()
if gpu_id is not None and len(gpus) > 0:
gpu_percent = gpus[gpu_id].load
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_mem = meminfo.used / 1024.0 / 1024.0
return round(cpu_mem, 4), round(gpu_mem, 4)
def predict_image(predictor,
image_file,
image_shape=[640, 640],
warmup=1,
repeats=1,
threshold=0.5):
"""
predict image main func
"""
img, scale_factor = image_preprocess(image_file, image_shape)
inputs = {}
inputs["image"] = img
if FLAGS.include_nms:
inputs['scale_factor'] = scale_factor
input_names = predictor.get_input_names()
for i, _ in enumerate(input_names):
input_tensor = predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]])
for i in range(warmup):
predictor.run()
np_boxes, np_boxes_num = None, None
cpu_mems, gpu_mems = 0, 0
predict_time = 0.0
time_min = float("inf")
time_max = float("-inf")
for i in range(repeats):
start_time = time.time()
predictor.run()
output_names = predictor.get_output_names()
boxes_tensor = predictor.get_output_handle(output_names[0])
np_boxes = boxes_tensor.copy_to_cpu()
if FLAGS.include_nms:
boxes_num = predictor.get_output_handle(output_names[1])
np_boxes_num = boxes_num.copy_to_cpu()
end_time = time.time()
timed = end_time - start_time
time_min = min(time_min, timed)
time_max = max(time_max, timed)
predict_time += timed
cpu_mem, gpu_mem = get_current_memory_mb()
cpu_mems += cpu_mem
gpu_mems += gpu_mem
time_avg = predict_time / repeats
print("[Benchmark]Avg cpu_mem:{} MB, avg gpu_mem: {} MB".format(
cpu_mems / repeats, gpu_mems / repeats))
print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
round(time_min * 1000, 2),
round(time_max * 1000, 1), round(time_avg * 1000, 1)))
if not FLAGS.include_nms:
postprocess = PPYOLOEPostProcess(score_threshold=0.3, nms_threshold=0.6)
res = postprocess(np_boxes, scale_factor)
else:
res = {'bbox': np_boxes, 'bbox_num': np_boxes_num}
res_img = draw_box(
image_file, res["bbox"], CLASS_LABEL, threshold=threshold)
cv2.imwrite("result.jpg", res_img)
def eval(predictor, val_loader, metric, rerun_flag=False):
"""
eval main func
"""
cpu_mems, gpu_mems = 0, 0
predict_time = 0.0
time_min = float("inf")
time_max = float("-inf")
sample_nums = len(val_loader)
input_names = predictor.get_input_names()
output_names = predictor.get_output_names()
boxes_tensor = predictor.get_output_handle(output_names[0])
if FLAGS.include_nms:
boxes_num = predictor.get_output_handle(output_names[1])
for batch_id, data in enumerate(val_loader):
data_all = {k: np.array(v) for k, v in data.items()}
for i, _ in enumerate(input_names):
input_tensor = predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(data_all[input_names[i]])
start_time = time.time()
predictor.run()
np_boxes = boxes_tensor.copy_to_cpu()
if FLAGS.include_nms:
np_boxes_num = boxes_num.copy_to_cpu()
if rerun_flag:
return
end_time = time.time()
timed = end_time - start_time
time_min = min(time_min, timed)
time_max = max(time_max, timed)
predict_time += timed
cpu_mem, gpu_mem = get_current_memory_mb()
cpu_mems += cpu_mem
gpu_mems += gpu_mem
if not FLAGS.include_nms:
postprocess = PPYOLOEPostProcess(
score_threshold=0.3, nms_threshold=0.6)
res = postprocess(np_boxes, data_all['scale_factor'])
else:
res = {'bbox': np_boxes, 'bbox_num': np_boxes_num}
metric.update(data_all, res)
if batch_id % 100 == 0:
print("Eval iter:", batch_id)
sys.stdout.flush()
metric.accumulate()
metric.log()
map_res = metric.get_results()
metric.reset()
time_avg = predict_time / sample_nums
print("[Benchmark]Avg cpu_mem:{} MB, avg gpu_mem: {} MB".format(
cpu_mems / sample_nums, gpu_mems / sample_nums))
print("[Benchmark]Inference time(ms): min={}, max={}, avg={}".format(
round(time_min * 1000, 2),
round(time_max * 1000, 1), round(time_avg * 1000, 1)))
print("[Benchmark] COCO mAP: {}".format(map_res["bbox"][0]))
sys.stdout.flush()
def main():
"""
main func
"""
predictor, rerun_flag = load_predictor(
FLAGS.model_path,
device=FLAGS.device,
use_trt=FLAGS.use_trt,
use_mkldnn=FLAGS.use_mkldnn,
precision=FLAGS.precision,
use_dynamic_shape=FLAGS.use_dynamic_shape,
cpu_threads=FLAGS.cpu_threads)
if FLAGS.image_file:
warmup, repeats = 1, 1
if FLAGS.benchmark:
warmup, repeats = 50, 100
predict_image(
predictor,
FLAGS.image_file,
image_shape=[FLAGS.img_shape, FLAGS.img_shape],
warmup=warmup,
repeats=repeats)
else:
reader_cfg = load_config(FLAGS.reader_config)
dataset = reader_cfg["EvalDataset"]
global val_loader
val_loader = create("EvalReader")(reader_cfg["EvalDataset"],
reader_cfg["worker_num"],
return_list=True)
clsid2catid = {v: k for k, v in dataset.catid2clsid.items()}
anno_file = dataset.get_anno()
metric = COCOMetric(
anno_file=anno_file, clsid2catid=clsid2catid, IouType="bbox")
eval(predictor, val_loader, metric, rerun_flag=rerun_flag)
if rerun_flag:
print(
"***** Collect dynamic shape done, Please rerun the program to get correct results. *****"
)
if __name__ == "__main__":
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
# DataLoader need run on cpu
paddle.set_device("cpu")
main()
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import cv2
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
"""
Args:
box_scores (N, 5): boxes in corner-form and probabilities.
iou_threshold: intersection over union threshold.
top_k: keep top_k results. If k <= 0, keep all the results.
candidate_size: only consider the candidates with the highest scores.
Returns:
picked: a list of indexes of the kept boxes
"""
scores = box_scores[:, -1]
boxes = box_scores[:, :-1]
picked = []
indexes = np.argsort(scores)
indexes = indexes[-candidate_size:]
while len(indexes) > 0:
current = indexes[-1]
picked.append(current)
if 0 < top_k == len(picked) or len(indexes) == 1:
break
current_box = boxes[current, :]
indexes = indexes[:-1]
rest_boxes = boxes[indexes, :]
iou = iou_of(
rest_boxes,
np.expand_dims(
current_box, axis=0), )
indexes = indexes[iou <= iou_threshold]
return box_scores[picked, :]
def iou_of(boxes0, boxes1, eps=1e-5):
"""Return intersection-over-union (Jaccard index) of boxes.
Args:
boxes0 (N, 4): ground truth boxes.
boxes1 (N or 1, 4): predicted boxes.
eps: a small number to avoid 0 as denominator.
Returns:
iou (N): IoU values.
"""
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
overlap_area = area_of(overlap_left_top, overlap_right_bottom)
area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
return overlap_area / (area0 + area1 - overlap_area + eps)
def area_of(left_top, right_bottom):
"""Compute the areas of rectangles given two corners.
Args:
left_top (N, 2): left top corner.
right_bottom (N, 2): right bottom corner.
Returns:
area (N): return the area.
"""
hw = np.clip(right_bottom - left_top, 0.0, None)
return hw[..., 0] * hw[..., 1]
class PPYOLOEPostProcess(object):
"""
Args:
input_shape (int): network input image size
scale_factor (float): scale factor of ori image
"""
def __init__(self,
score_threshold=0.4,
nms_threshold=0.5,
nms_top_k=10000,
keep_top_k=300):
self.score_threshold = score_threshold
self.nms_threshold = nms_threshold
self.nms_top_k = nms_top_k
self.keep_top_k = keep_top_k
def _non_max_suppression(self, prediction, scale_factor):
batch_size = prediction.shape[0]
out_boxes_list = []
box_num_list = []
for batch_id in range(batch_size):
bboxes, confidences = prediction[batch_id][..., :4], prediction[
batch_id][..., 4:]
# nms
picked_box_probs = []
picked_labels = []
for class_index in range(0, confidences.shape[1]):
probs = confidences[:, class_index]
mask = probs > self.score_threshold
probs = probs[mask]
if probs.shape[0] == 0:
continue
subset_boxes = bboxes[mask, :]
box_probs = np.concatenate(
[subset_boxes, probs.reshape(-1, 1)], axis=1)
box_probs = hard_nms(
box_probs,
iou_threshold=self.nms_threshold,
top_k=self.nms_top_k)
picked_box_probs.append(box_probs)
picked_labels.extend([class_index] * box_probs.shape[0])
if len(picked_box_probs) == 0:
out_boxes_list.append(np.empty((0, 4)))
else:
picked_box_probs = np.concatenate(picked_box_probs)
# resize output boxes
picked_box_probs[:, 0] /= scale_factor[batch_id][1]
picked_box_probs[:, 2] /= scale_factor[batch_id][1]
picked_box_probs[:, 1] /= scale_factor[batch_id][0]
picked_box_probs[:, 3] /= scale_factor[batch_id][0]
# clas score box
out_box = np.concatenate(
[
np.expand_dims(
np.array(picked_labels), axis=-1), np.expand_dims(
picked_box_probs[:, 4], axis=-1),
picked_box_probs[:, :4]
],
axis=1)
if out_box.shape[0] > self.keep_top_k:
out_box = out_box[out_box[:, 1].argsort()[::-1]
[:self.keep_top_k]]
out_boxes_list.append(out_box)
box_num_list.append(out_box.shape[0])
out_boxes_list = np.concatenate(out_boxes_list, axis=0)
box_num_list = np.array(box_num_list)
return out_boxes_list, box_num_list
def __call__(self, outs, scale_factor):
out_boxes_list, box_num_list = self._non_max_suppression(outs,
scale_factor)
return {'bbox': out_boxes_list, 'bbox_num': box_num_list}
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import numpy as np
import argparse
import paddle
from ppdet.core.workspace import load_config, merge_config
from ppdet.core.workspace import create
from ppdet.metrics import COCOMetric, VOCMetric, KeyPointTopDownCOCOEval
from paddleslim.auto_compression.config_helpers import load_config as load_slim_config
from paddleslim.auto_compression import AutoCompression
from post_process import PPYOLOEPostProcess
from paddleslim.common.dataloader import get_feed_vars
def argsparser():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--config_path',
type=str,
default=None,
help="path of compression strategy config.",
required=True)
parser.add_argument(
'--save_dir',
type=str,
default='output',
help="directory to save compressed model.")
parser.add_argument(
'--devices',
type=str,
default='gpu',
help="which device used to compress.")
return parser
def reader_wrapper(reader, input_list):
def gen():
for data in reader:
in_dict = {}
if isinstance(input_list, list):
for input_name in input_list:
in_dict[input_name] = data[input_name]
elif isinstance(input_list, dict):
for input_name in input_list.keys():
in_dict[input_list[input_name]] = data[input_name]
yield in_dict
return gen
def convert_numpy_data(data, metric):
data_all = {}
data_all = {k: np.array(v) for k, v in data.items()}
if isinstance(metric, VOCMetric):
for k, v in data_all.items():
if not isinstance(v[0], np.ndarray):
tmp_list = []
for t in v:
tmp_list.append(np.array(t))
data_all[k] = np.array(tmp_list)
else:
data_all = {k: np.array(v) for k, v in data.items()}
return data_all
def eval_function(exe, compiled_test_program, test_feed_names, test_fetch_list):
metric = global_config['metric']
for batch_id, data in enumerate(val_loader):
data_all = convert_numpy_data(data, metric)
data_input = {}
for k, v in data.items():
if isinstance(global_config['input_list'], list):
if k in test_feed_names:
data_input[k] = np.array(v)
elif isinstance(global_config['input_list'], dict):
if k in global_config['input_list'].keys():
data_input[global_config['input_list'][k]] = np.array(v)
outs = exe.run(compiled_test_program,
feed=data_input,
fetch_list=test_fetch_list,
return_numpy=False)
res = {}
if 'include_nms' in global_config and not global_config['include_nms']:
if 'arch' in global_config and global_config['arch'] == 'PPYOLOE':
postprocess = PPYOLOEPostProcess(
score_threshold=0.01, nms_threshold=0.6)
else:
assert "Not support arch={} now.".format(global_config['arch'])
res = postprocess(np.array(outs[0]), data_all['scale_factor'])
else:
for out in outs:
v = np.array(out)
if len(v.shape) > 1:
res['bbox'] = v
else:
res['bbox_num'] = v
metric.update(data_all, res)
if batch_id % 100 == 0:
print('Eval iter:', batch_id)
metric.accumulate()
metric.log()
map_res = metric.get_results()
metric.reset()
map_key = 'keypoint' if 'arch' in global_config and global_config[
'arch'] == 'keypoint' else 'bbox'
return map_res[map_key][0]
def main():
global global_config
all_config = load_slim_config(FLAGS.config_path)
assert "Global" in all_config, "Key 'Global' not found in config file."
global_config = all_config["Global"]
reader_cfg = load_config(global_config['reader_config'])
train_loader = create('EvalReader')(reader_cfg['TrainDataset'],
reader_cfg['worker_num'],
return_list=True)
if global_config.get('input_list') is None:
global_config['input_list'] = get_feed_vars(
global_config['model_dir'], global_config['model_filename'],
global_config['params_filename'])
train_loader = reader_wrapper(train_loader, global_config['input_list'])
if 'Evaluation' in global_config.keys() and global_config[
'Evaluation'] and paddle.distributed.get_rank() == 0:
eval_func = eval_function
dataset = reader_cfg['EvalDataset']
global val_loader
_eval_batch_sampler = paddle.io.BatchSampler(
dataset, batch_size=reader_cfg['EvalReader']['batch_size'])
val_loader = create('EvalReader')(dataset,
reader_cfg['worker_num'],
batch_sampler=_eval_batch_sampler,
return_list=True)
metric = None
if reader_cfg['metric'] == 'COCO':
clsid2catid = {v: k for k, v in dataset.catid2clsid.items()}
anno_file = dataset.get_anno()
metric = COCOMetric(
anno_file=anno_file, clsid2catid=clsid2catid, IouType='bbox')
elif reader_cfg['metric'] == 'VOC':
metric = VOCMetric(
label_list=dataset.get_label_list(),
class_num=reader_cfg['num_classes'],
map_type=reader_cfg['map_type'])
elif reader_cfg['metric'] == 'KeyPointTopDownCOCOEval':
anno_file = dataset.get_anno()
metric = KeyPointTopDownCOCOEval(anno_file,
len(dataset), 17, 'output_eval')
else:
raise ValueError("metric currently only supports COCO and VOC.")
global_config['metric'] = metric
else:
eval_func = None
ac = AutoCompression(
model_dir=global_config["model_dir"],
model_filename=global_config["model_filename"],
params_filename=global_config["params_filename"],
save_dir=FLAGS.save_dir,
config=all_config,
train_dataloader=train_loader,
eval_callback=eval_func)
ac.compress()
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
assert FLAGS.devices in ['cpu', 'gpu', 'xpu', 'npu']
paddle.set_device(FLAGS.devices)
main()
# All rights `PaddleDetection` reserved
#!/bin/bash
model_dir=$1
model_name=$2
export img_dir="demo"
export log_path="output_pipeline"
echo "model_dir : ${model_dir}"
echo "img_dir: ${img_dir}"
# TODO: support batch size>1
for use_mkldnn in "True" "False"; do
for threads in "1" "6"; do
echo "${model_name} ${model_dir}, use_mkldnn: ${use_mkldnn} threads: ${threads}"
python deploy/python/infer.py \
--model_dir=${model_dir} \
--run_benchmark True \
--enable_mkldnn=${use_mkldnn} \
--device=CPU \
--cpu_threads=${threads} \
--image_dir=${img_dir} 2>&1 | tee ${log_path}/${model_name}_cpu_usemkldnn_${use_mkldnn}_cputhreads_${threads}_bs1_infer.log
done
done
for run_mode in "fluid" "trt_fp32" "trt_fp16"; do
echo "${model_name} ${model_dir}, run_mode: ${run_mode}"
python deploy/python/infer.py \
--model_dir=${model_dir} \
--run_benchmark=True \
--device=GPU \
--run_mode=${run_mode} \
--image_dir=${img_dir} 2>&1 | tee ${log_path}/${model_name}_gpu_runmode_${run_mode}_bs1_infer.log
done
# All rights `PaddleDetection` reserved
#!/bin/bash
model_dir=$1
model_name=$2
export img_dir="demo"
export log_path="output_pipeline"
echo "model_dir : ${model_dir}"
echo "img_dir: ${img_dir}"
# TODO: support batch size>1
for run_mode in "trt_int8"; do
echo "${model_name} ${model_dir}, run_mode: ${run_mode}"
python deploy/python/infer.py \
--model_dir=${model_dir} \
--run_benchmark=True \
--device=GPU \
--run_mode=${run_mode} \
--image_dir=${img_dir} 2>&1 | tee ${log_path}/${model_name}_gpu_runmode_${run_mode}_bs1_infer.log
done
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re
import argparse
import pandas as pd
def parse_args():
"""
parse input args
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"--log_path",
type=str,
default="./output_pipeline",
help="benchmark log path")
parser.add_argument(
"--output_name",
type=str,
default="benchmark_excel.xlsx",
help="output excel file name")
parser.add_argument(
"--analysis_trt", dest="analysis_trt", action='store_true')
parser.add_argument(
"--analysis_mkl", dest="analysis_mkl", action='store_true')
return parser.parse_args()
def find_all_logs(path_walk):
"""
find all .log files from target dir
"""
for root, ds, files in os.walk(path_walk):
for file_name in files:
if re.match(r'.*.log', file_name):
full_path = os.path.join(root, file_name)
yield file_name, full_path
def process_log(file_name):
"""
process log to dict
"""
output_dict = {}
with open(file_name, 'r') as f:
for i, data in enumerate(f.readlines()):
if i == 0:
continue
line_lists = data.split(" ")
# conf info
if "runtime_device:" in line_lists:
pos_buf = line_lists.index("runtime_device:")
output_dict["runtime_device"] = line_lists[pos_buf + 1].strip()
if "ir_optim:" in line_lists:
pos_buf = line_lists.index("ir_optim:")
output_dict["ir_optim"] = line_lists[pos_buf + 1].strip()
if "enable_memory_optim:" in line_lists:
pos_buf = line_lists.index("enable_memory_optim:")
output_dict["enable_memory_optim"] = line_lists[pos_buf +
1].strip()
if "enable_tensorrt:" in line_lists:
pos_buf = line_lists.index("enable_tensorrt:")
output_dict["enable_tensorrt"] = line_lists[pos_buf + 1].strip()
if "precision:" in line_lists:
pos_buf = line_lists.index("precision:")
output_dict["precision"] = line_lists[pos_buf + 1].strip()
if "enable_mkldnn:" in line_lists:
pos_buf = line_lists.index("enable_mkldnn:")
output_dict["enable_mkldnn"] = line_lists[pos_buf + 1].strip()
if "cpu_math_library_num_threads:" in line_lists:
pos_buf = line_lists.index("cpu_math_library_num_threads:")
output_dict["cpu_math_library_num_threads"] = line_lists[
pos_buf + 1].strip()
# model info
if "model_name:" in line_lists:
pos_buf = line_lists.index("model_name:")
output_dict["model_name"] = list(
filter(None, line_lists[pos_buf + 1].strip().split('/')))[
-1]
# data info
if "batch_size:" in line_lists:
pos_buf = line_lists.index("batch_size:")
output_dict["batch_size"] = line_lists[pos_buf + 1].strip()
if "input_shape:" in line_lists:
pos_buf = line_lists.index("input_shape:")
output_dict["input_shape"] = line_lists[pos_buf + 1].strip()
# perf info
if "cpu_rss(MB):" in line_lists:
pos_buf = line_lists.index("cpu_rss(MB):")
output_dict["cpu_rss(MB)"] = line_lists[pos_buf + 1].strip(
).split(',')[0]
if "gpu_rss(MB):" in line_lists:
pos_buf = line_lists.index("gpu_rss(MB):")
output_dict["gpu_rss(MB)"] = line_lists[pos_buf + 1].strip(
).split(',')[0]
if "gpu_util:" in line_lists:
pos_buf = line_lists.index("gpu_util:")
output_dict["gpu_util"] = line_lists[pos_buf + 1].strip().split(
',')[0]
if "preproce_time(ms):" in line_lists:
pos_buf = line_lists.index("preproce_time(ms):")
output_dict["preproce_time(ms)"] = line_lists[
pos_buf + 1].strip().split(',')[0]
if "inference_time(ms):" in line_lists:
pos_buf = line_lists.index("inference_time(ms):")
output_dict["inference_time(ms)"] = line_lists[
pos_buf + 1].strip().split(',')[0]
if "postprocess_time(ms):" in line_lists:
pos_buf = line_lists.index("postprocess_time(ms):")
output_dict["postprocess_time(ms)"] = line_lists[
pos_buf + 1].strip().split(',')[0]
return output_dict
def filter_df_merge(cpu_df, filter_column=None):
"""
process cpu data frame, merge by 'model_name', 'batch_size'
Args:
cpu_df ([type]): [description]
"""
if not filter_column:
raise Exception(
"please assign filter_column for filter_df_merge function")
df_lists = []
filter_column_lists = []
for k, v in cpu_df.groupby(filter_column, dropna=True):
filter_column_lists.append(k)
df_lists.append(v)
final_output_df = df_lists[-1]
# merge same model
for i in range(len(df_lists) - 1):
left_suffix = cpu_df[filter_column].unique()[0]
right_suffix = df_lists[i][filter_column].unique()[0]
print(left_suffix, right_suffix)
if not pd.isnull(right_suffix):
final_output_df = pd.merge(
final_output_df,
df_lists[i],
how='left',
left_on=['model_name', 'batch_size'],
right_on=['model_name', 'batch_size'],
suffixes=('', '_{0}_{1}'.format(filter_column, right_suffix)))
# rename default df columns
origin_column_names = list(cpu_df.columns.values)
origin_column_names.remove(filter_column)
suffix = final_output_df[filter_column].unique()[0]
for name in origin_column_names:
final_output_df.rename(
columns={name: "{0}_{1}_{2}".format(name, filter_column, suffix)},
inplace=True)
final_output_df.rename(
columns={
filter_column: "{0}_{1}_{2}".format(filter_column, filter_column,
suffix)
},
inplace=True)
final_output_df.sort_values(
by=[
"model_name_{0}_{1}".format(filter_column, suffix),
"batch_size_{0}_{1}".format(filter_column, suffix)
],
inplace=True)
return final_output_df
def trt_perf_analysis(raw_df):
"""
sperate raw dataframe to a list of dataframe
compare tensorrt percision performance
"""
# filter df by gpu, compare tensorrt and gpu
# define default dataframe for gpu performance analysis
gpu_df = raw_df.loc[raw_df['runtime_device'] == 'gpu']
new_df = filter_df_merge(gpu_df, "precision")
# calculate qps diff percentile
infer_fp32 = "inference_time(ms)_precision_fp32"
infer_fp16 = "inference_time(ms)_precision_fp16"
infer_int8 = "inference_time(ms)_precision_int8"
new_df["fp32_fp16_diff"] = new_df[[infer_fp32, infer_fp16]].apply(
lambda x: (float(x[infer_fp16]) - float(x[infer_fp32])) / float(x[infer_fp32]),
axis=1)
new_df["fp32_gpu_diff"] = new_df[["inference_time(ms)", infer_fp32]].apply(
lambda x: (float(x[infer_fp32]) - float(x[infer_fp32])) / float(x["inference_time(ms)"]),
axis=1)
new_df["fp16_int8_diff"] = new_df[[infer_fp16, infer_int8]].apply(
lambda x: (float(x[infer_int8]) - float(x[infer_fp16])) / float(x[infer_fp16]),
axis=1)
return new_df
def mkl_perf_analysis(raw_df):
"""
sperate raw dataframe to a list of dataframe
compare mkldnn performance with not enable mkldnn
"""
# filter df by cpu, compare mkl and cpu
# define default dataframe for cpu mkldnn analysis
cpu_df = raw_df.loc[raw_df['runtime_device'] == 'cpu']
mkl_compare_df = cpu_df.loc[cpu_df['cpu_math_library_num_threads'] == '1']
thread_compare_df = cpu_df.loc[cpu_df['enable_mkldnn'] == 'True']
# define dataframe need to be analyzed
output_mkl_df = filter_df_merge(mkl_compare_df, 'enable_mkldnn')
output_thread_df = filter_df_merge(thread_compare_df,
'cpu_math_library_num_threads')
# calculate performance diff percentile
# compare mkl performance with cpu
enable_mkldnn = "inference_time(ms)_enable_mkldnn_True"
disable_mkldnn = "inference_time(ms)_enable_mkldnn_False"
output_mkl_df["mkl_infer_diff"] = output_mkl_df[[
enable_mkldnn, disable_mkldnn
]].apply(
lambda x: (float(x[enable_mkldnn]) - float(x[disable_mkldnn])) / float(x[disable_mkldnn]),
axis=1)
cpu_enable_mkldnn = "cpu_rss(MB)_enable_mkldnn_True"
cpu_disable_mkldnn = "cpu_rss(MB)_enable_mkldnn_False"
output_mkl_df["mkl_cpu_rss_diff"] = output_mkl_df[[
cpu_enable_mkldnn, cpu_disable_mkldnn
]].apply(
lambda x: (float(x[cpu_enable_mkldnn]) - float(x[cpu_disable_mkldnn])) / float(x[cpu_disable_mkldnn]),
axis=1)
# compare cpu_multi_thread performance with cpu
num_threads_1 = "inference_time(ms)_cpu_math_library_num_threads_1"
num_threads_6 = "inference_time(ms)_cpu_math_library_num_threads_6"
output_thread_df["mkl_infer_diff"] = output_thread_df[[
num_threads_6, num_threads_1
]].apply(
lambda x: (float(x[num_threads_6]) - float(x[num_threads_1])) / float(x[num_threads_1]),
axis=1)
cpu_num_threads_1 = "cpu_rss(MB)_cpu_math_library_num_threads_1"
cpu_num_threads_6 = "cpu_rss(MB)_cpu_math_library_num_threads_6"
output_thread_df["mkl_cpu_rss_diff"] = output_thread_df[[
cpu_num_threads_6, cpu_num_threads_1
]].apply(
lambda x: (float(x[cpu_num_threads_6]) - float(x[cpu_num_threads_1])) / float(x[cpu_num_threads_1]),
axis=1)
return output_mkl_df, output_thread_df
def main():
"""
main
"""
args = parse_args()
# create empty DataFrame
origin_df = pd.DataFrame(columns=[
"model_name", "batch_size", "input_shape", "runtime_device", "ir_optim",
"enable_memory_optim", "enable_tensorrt", "precision", "enable_mkldnn",
"cpu_math_library_num_threads", "preproce_time(ms)",
"inference_time(ms)", "postprocess_time(ms)", "cpu_rss(MB)",
"gpu_rss(MB)", "gpu_util"
])
for file_name, full_path in find_all_logs(args.log_path):
dict_log = process_log(full_path)
origin_df = origin_df.append(dict_log, ignore_index=True)
raw_df = origin_df.sort_values(by='model_name')
raw_df.sort_values(by=["model_name", "batch_size"], inplace=True)
raw_df.to_excel(args.output_name)
if args.analysis_trt:
trt_df = trt_perf_analysis(raw_df)
trt_df.to_excel("trt_analysis_{}".format(args.output_name))
if args.analysis_mkl:
mkl_df, thread_df = mkl_perf_analysis(raw_df)
mkl_df.to_excel("mkl_enable_analysis_{}".format(args.output_name))
thread_df.to_excel("mkl_threads_analysis_{}".format(args.output_name))
if __name__ == "__main__":
main()
cmake_minimum_required(VERSION 3.0)
project(PaddleObjectDetector CXX C)
option(WITH_MKL "Compile demo with MKL/OpenBlas support,defaultuseMKL." ON)
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." ON)
option(WITH_TENSORRT "Compile demo with TensorRT." OFF)
option(WITH_KEYPOINT "Whether to Compile KeyPoint detector" OFF)
option(WITH_MOT "Whether to Compile MOT detector" OFF)
SET(PADDLE_DIR "" CACHE PATH "Location of libraries")
SET(PADDLE_LIB_NAME "" CACHE STRING "libpaddle_inference")
SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
SET(CUDA_LIB "" CACHE PATH "Location of libraries")
SET(CUDNN_LIB "" CACHE PATH "Location of libraries")
SET(TENSORRT_INC_DIR "" CACHE PATH "Compile demo with TensorRT")
SET(TENSORRT_LIB_DIR "" CACHE PATH "Compile demo with TensorRT")
include(cmake/yaml-cpp.cmake)
include_directories("${CMAKE_SOURCE_DIR}/")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include")
link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib")
if (WITH_KEYPOINT)
set(SRCS src/main_keypoint.cc src/preprocess_op.cc src/object_detector.cc src/picodet_postprocess.cc src/utils.cc src/keypoint_detector.cc src/keypoint_postprocess.cc)
elseif (WITH_MOT)
set(SRCS src/main_jde.cc src/preprocess_op.cc src/object_detector.cc src/jde_detector.cc src/tracker.cc src/trajectory.cc src/lapjv.cpp src/picodet_postprocess.cc src/utils.cc)
else ()
set(SRCS src/main.cc src/preprocess_op.cc src/object_detector.cc src/picodet_postprocess.cc src/utils.cc)
endif()
macro(safe_set_static_flag)
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif(${flag_var} MATCHES "/MD")
endforeach(flag_var)
endmacro()
if (WITH_MKL)
ADD_DEFINITIONS(-DUSE_MKL)
endif()
if (NOT DEFINED PADDLE_DIR OR ${PADDLE_DIR} STREQUAL "")
message(FATAL_ERROR "please set PADDLE_DIR with -DPADDLE_DIR=/path/paddle_influence_dir")
endif()
message("PADDLE_DIR IS:" ${PADDLE_DIR})
if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "")
message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv")
endif()
include_directories("${CMAKE_SOURCE_DIR}/")
include_directories("${PADDLE_DIR}/")
include_directories("${PADDLE_DIR}/third_party/install/protobuf/include")
include_directories("${PADDLE_DIR}/third_party/install/glog/include")
include_directories("${PADDLE_DIR}/third_party/install/gflags/include")
include_directories("${PADDLE_DIR}/third_party/install/xxhash/include")
if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/include")
include_directories("${PADDLE_DIR}/third_party/install/snappy/include")
endif()
if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include")
include_directories("${PADDLE_DIR}/third_party/install/snappystream/include")
endif()
include_directories("${PADDLE_DIR}/third_party/boost")
include_directories("${PADDLE_DIR}/third_party/eigen3")
if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
link_directories("${PADDLE_DIR}/third_party/install/snappy/lib")
endif()
if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib")
endif()
link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib")
link_directories("${PADDLE_DIR}/third_party/install/glog/lib")
link_directories("${PADDLE_DIR}/third_party/install/gflags/lib")
link_directories("${PADDLE_DIR}/third_party/install/xxhash/lib")
link_directories("${PADDLE_DIR}/third_party/install/paddle2onnx/lib")
link_directories("${PADDLE_DIR}/third_party/install/onnxruntime/lib")
link_directories("${PADDLE_DIR}/paddle/lib/")
link_directories("${CMAKE_CURRENT_BINARY_DIR}")
if (WIN32)
include_directories("${PADDLE_DIR}/paddle/fluid/inference")
include_directories("${PADDLE_DIR}/paddle/include")
link_directories("${PADDLE_DIR}/paddle/fluid/inference")
find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/build/ NO_DEFAULT_PATH)
else ()
find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/share/OpenCV NO_DEFAULT_PATH)
include_directories("${PADDLE_DIR}/paddle/include")
link_directories("${PADDLE_DIR}/paddle/lib")
endif ()
include_directories(${OpenCV_INCLUDE_DIRS})
if (WIN32)
add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -o2 -fopenmp -std=c++11")
set(CMAKE_STATIC_LIBRARY_PREFIX "")
endif()
# TODO let users define cuda lib path
if (WITH_GPU)
if (NOT DEFINED CUDA_LIB OR ${CUDA_LIB} STREQUAL "")
message(FATAL_ERROR "please set CUDA_LIB with -DCUDA_LIB=/path/cuda-8.0/lib64")
endif()
if (NOT WIN32)
if (NOT DEFINED CUDNN_LIB)
message(FATAL_ERROR "please set CUDNN_LIB with -DCUDNN_LIB=/path/cudnn_v7.4/cuda/lib64")
endif()
endif(NOT WIN32)
endif()
if (NOT WIN32)
if (WITH_TENSORRT AND WITH_GPU)
include_directories("${TENSORRT_INC_DIR}/")
link_directories("${TENSORRT_LIB_DIR}/")
endif()
endif(NOT WIN32)
if (NOT WIN32)
set(NGRAPH_PATH "${PADDLE_DIR}/third_party/install/ngraph")
if(EXISTS ${NGRAPH_PATH})
include(GNUInstallDirs)
include_directories("${NGRAPH_PATH}/include")
link_directories("${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}")
set(NGRAPH_LIB ${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}/libngraph${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
endif()
if(WITH_MKL)
include_directories("${PADDLE_DIR}/third_party/install/mklml/include")
if (WIN32)
set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.lib
${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.lib)
else ()
set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
execute_process(COMMAND cp -r ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} /usr/lib)
endif ()
set(MKLDNN_PATH "${PADDLE_DIR}/third_party/install/mkldnn")
if(EXISTS ${MKLDNN_PATH})
include_directories("${MKLDNN_PATH}/include")
if (WIN32)
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib)
else ()
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
endif ()
endif()
else()
set(MATH_LIB ${PADDLE_DIR}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})
endif()
if (WIN32)
if(EXISTS "${PADDLE_DIR}/paddle/fluid/inference/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
set(DEPS
${PADDLE_DIR}/paddle/fluid/inference/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
set(DEPS
${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
endif()
endif()
if (WIN32)
set(DEPS ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
set(DEPS ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
message("PADDLE_LIB_NAME:" ${PADDLE_LIB_NAME})
message("DEPS:" $DEPS)
if (NOT WIN32)
set(DEPS ${DEPS}
${MATH_LIB} ${MKLDNN_LIB}
glog gflags protobuf z xxhash yaml-cpp
)
if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
set(DEPS ${DEPS} snappystream)
endif()
if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
set(DEPS ${DEPS} snappy)
endif()
else()
set(DEPS ${DEPS}
${MATH_LIB} ${MKLDNN_LIB}
glog gflags_static libprotobuf xxhash libyaml-cppmt)
set(DEPS ${DEPS} libcmt shlwapi)
if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
set(DEPS ${DEPS} snappy)
endif()
if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
set(DEPS ${DEPS} snappystream)
endif()
endif(NOT WIN32)
if(WITH_GPU)
if(NOT WIN32)
if (WITH_TENSORRT)
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX})
else()
set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDNN_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX})
endif()
endif()
if (NOT WIN32)
set(EXTERNAL_LIB "-ldl -lrt -lgomp -lz -lm -lpthread")
set(DEPS ${DEPS} ${EXTERNAL_LIB})
endif()
set(DEPS ${DEPS} ${OpenCV_LIBS})
add_executable(main ${SRCS})
ADD_DEPENDENCIES(main ext-yaml-cpp)
message("DEPS:" $DEPS)
target_link_libraries(main ${DEPS})
if (WIN32 AND WITH_MKL)
add_custom_command(TARGET main POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}.dll ./release/${PADDLE_LIB_NAME}.dll
)
endif()
if (WIN32 AND NOT WITH_MKL)
add_custom_command(TARGET main POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/openblas/lib/openblas.dll ./openblas.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/openblas/lib/openblas.dll ./release/openblas.dll
)
endif()
if (WIN32)
add_custom_command(TARGET main POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/onnxruntime/lib/onnxruntime.dll ./onnxruntime.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/paddle2onnx/lib/paddle2onnx.dll ./paddle2onnx.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/onnxruntime/lib/onnxruntime.dll ./release/onnxruntime.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/paddle2onnx/lib/paddle2onnx.dll ./release/paddle2onnx.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}.dll ./release/${PADDLE_LIB_NAME}.dll
)
endif()
# C++端预测部署
## 各环境编译部署教程
- [Linux 编译部署](docs/linux_build.md)
- [Windows编译部署(使用Visual Studio 2019)](docs/windows_vs2019_build.md)
- [NV Jetson编译部署](docs/Jetson_build.md)
## C++部署总览
[1.说明](#1说明)
[2.主要目录和文件](#2主要目录和文件)
### 1.说明
本目录为用户提供一个跨平台的`C++`部署方案,让用户通过`PaddleDetection`训练的模型导出后,即可基于本项目快速运行,也可以快速集成代码结合到自己的项目实际应用中去。
主要设计的目标包括以下四点:
- 跨平台,支持在 `Windows``Linux` 完成编译、二次开发集成和部署运行
- 可扩展性,支持用户针对新模型开发自己特殊的数据预处理等逻辑
- 高性能,除了`PaddlePaddle`自身带来的性能优势,我们还针对图像检测的特点对关键步骤进行了性能优化
- 支持各种不同检测模型结构,包括`Yolov3`/`Faster_RCNN`/`SSD`
### 2.主要目录和文件
```bash
deploy/cpp
|
├── src
│ ├── main.cc # 集成代码示例, 程序入口
│ ├── object_detector.cc # 模型加载和预测主要逻辑封装类实现
│ └── preprocess_op.cc # 预处理相关主要逻辑封装实现
|
├── include
│ ├── config_parser.h # 导出模型配置yaml文件解析
│ ├── object_detector.h # 模型加载和预测主要逻辑封装类
│ └── preprocess_op.h # 预处理相关主要逻辑类封装
|
├── docs
│ ├── linux_build.md # Linux 编译指南
│ └── windows_vs2019_build.md # Windows VS2019编译指南
├── build.sh # 编译命令脚本
├── CMakeList.txt # cmake编译入口文件
|
├── CMakeSettings.json # Visual Studio 2019 CMake项目编译设置
└── cmake # 依赖的外部项目cmake(目前仅有yaml-cpp)
```
find_package(Git REQUIRED)
include(ExternalProject)
message("${CMAKE_BUILD_TYPE}")
ExternalProject_Add(
ext-yaml-cpp
URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip
URL_MD5 9542d6de397d1fbd649ed468cb5850e6
CMAKE_ARGS
-DYAML_CPP_BUILD_TESTS=OFF
-DYAML_CPP_BUILD_TOOLS=OFF
-DYAML_CPP_INSTALL=OFF
-DYAML_CPP_BUILD_CONTRIB=OFF
-DMSVC_SHARED_RT=OFF
-DBUILD_SHARED_LIBS=OFF
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp"
# Disable install step
INSTALL_COMMAND ""
LOG_DOWNLOAD ON
LOG_BUILD 1
)
# Jetson平台编译指南
## 说明
`NVIDIA Jetson`设备是具有`NVIDIA GPU`的嵌入式设备,可以将目标检测算法部署到该设备上。本文档是在`Jetson`硬件上部署`PaddleDetection`模型的教程。
本文档以`Jetson TX2`硬件、`JetPack 4.3`版本为例进行说明。
`Jetson`平台的开发指南请参考[NVIDIA Jetson Linux Developer Guide](https://docs.nvidia.com/jetson/l4t/index.html).
## Jetson环境搭建
`Jetson`系统软件安装,请参考[NVIDIA Jetson Linux Developer Guide](https://docs.nvidia.com/jetson/l4t/index.html).
* (1) 查看硬件系统的l4t的版本号
```
cat /etc/nv_tegra_release
```
* (2) 根据硬件,选择硬件可安装的`JetPack`版本,硬件和`JetPack`版本对应关系请参考[jetpack-archive](https://developer.nvidia.com/embedded/jetpack-archive).
* (3) 下载`JetPack`,请参考[NVIDIA Jetson Linux Developer Guide](https://docs.nvidia.com/jetson/l4t/index.html) 中的`Preparing a Jetson Developer Kit for Use`章节内容进行刷写系统镜像。
**注意**: 请在[jetpack-archive](https://developer.nvidia.com/embedded/jetpack-archive) 根据硬件选择适配的`JetPack`版本进行刷机。
## 下载或编译`Paddle`预测库
本文档使用`Paddle``JetPack4.3`上预先编译好的预测库,请根据硬件在[安装与编译 Linux 预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/build_and_install_lib_cn.html) 中选择对应版本的`Paddle`预测库。
这里选择[nv_jetson_cuda10_cudnn7.6_trt6(jetpack4.3)](https://paddle-inference-lib.bj.bcebos.com/2.0.0-nv-jetson-jetpack4.3-all/paddle_inference.tgz), `Paddle`版本`2.0.0-rc0`,`CUDA`版本`10.0`,`CUDNN`版本`7.6``TensorRT`版本`6`
若需要自己在`Jetson`平台上自定义编译`Paddle`库,请参考文档[安装与编译 Linux 预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)`NVIDIA Jetson嵌入式硬件预测库源码编译`部分内容。
### Step1: 下载代码
`git clone https://github.com/PaddlePaddle/PaddleDetection.git`
**说明**:其中`C++`预测代码在`/root/projects/PaddleDetection/deploy/cpp` 目录,该目录不依赖任何`PaddleDetection`下其他目录。
### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference
解压下载的[nv_jetson_cuda10_cudnn7.6_trt6(jetpack4.3)](https://paddle-inference-lib.bj.bcebos.com/2.0.1-nv-jetson-jetpack4.3-all/paddle_inference.tgz)
下载并解压后`/root/projects/paddle_inference`目录包含内容为:
```
paddle_inference
├── paddle # paddle核心库和头文件
|
├── third_party # 第三方依赖库和头文件
|
└── version.txt # 版本和编译信息
```
**注意:** 预编译库`nv-jetson-cuda10-cudnn7.6-trt6`使用的`GCC`版本是`7.5.0`,其他都是使用`GCC 4.8.5`编译的。使用高版本的GCC可能存在`ABI`兼容性问题,建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)
### Step4: 编译
编译`cmake`的命令在`scripts/build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下:
注意,`TX2`平台的`CUDA``CUDNN`需要通过`JetPack`安装。
```
# 是否使用GPU(即是否使用 CUDA)
WITH_GPU=ON
# 是否使用MKL or openblas,TX2需要设置为OFF
WITH_MKL=OFF
# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
WITH_TENSORRT=ON
# TensorRT 的include路径
TENSORRT_INC_DIR=/usr/include/aarch64-linux-gnu
# TensorRT 的lib路径
TENSORRT_LIB_DIR=/usr/lib/aarch64-linux-gnu
# Paddle 预测库路径
PADDLE_DIR=/path/to/paddle_inference/
# Paddle 预测库名称
PADDLE_LIB_NAME=paddle_inference
# Paddle 的预测库是否使用静态库来编译
# 使用TensorRT时,Paddle的预测库通常为动态库
WITH_STATIC_LIB=OFF
# CUDA 的 lib 路径
CUDA_LIB=/usr/local/cuda-10.0/lib64
# CUDNN 的 lib 路径
CUDNN_LIB=/usr/lib/aarch64-linux-gnu
# 是否开启关键点模型预测功能
WITH_KEYPOINT=ON
# OPENCV_DIR 的路径
# linux平台请下载:https://bj.bcebos.com/paddleseg/deploy/opencv3.4.6gcc4.8ffmpeg.tar.gz2,并解压到deps文件夹下
# TX2平台请下载:https://paddlemodels.bj.bcebos.com/TX2_JetPack4.3_opencv_3.4.10_gcc7.5.0.zip,并解压到deps文件夹下
OPENCV_DIR=/path/to/opencv
# 请检查以上各个路径是否正确
# 以下无需改动
cmake .. \
-DWITH_GPU=${WITH_GPU} \
-DWITH_MKL=OFF \
-DWITH_TENSORRT=${WITH_TENSORRT} \
-DTENSORRT_DIR=${TENSORRT_DIR} \
-DPADDLE_DIR=${PADDLE_DIR} \
-DWITH_STATIC_LIB=${WITH_STATIC_LIB} \
-DCUDA_LIB=${CUDA_LIB} \
-DCUDNN_LIB=${CUDNN_LIB} \
-DOPENCV_DIR=${OPENCV_DIR} \
-DPADDLE_LIB_NAME={PADDLE_LIB_NAME} \
-DWITH_KEYPOINT=${WITH_KEYPOINT}
make
```
例如设置如下:
```
# 是否使用GPU(即是否使用 CUDA)
WITH_GPU=ON
# 是否使用MKL or openblas
WITH_MKL=OFF
# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
WITH_TENSORRT=OFF
# TensorRT 的include路径
TENSORRT_INC_DIR=/usr/include/aarch64-linux-gnu
# TensorRT 的lib路径
TENSORRT_LIB_DIR=/usr/lib/aarch64-linux-gnu
# Paddle 预测库路径
PADDLE_DIR=/home/nvidia/PaddleDetection_infer/paddle_inference/
# Paddle 预测库名称
PADDLE_LIB_NAME=paddle_inference
# Paddle 的预测库是否使用静态库来编译
# 使用TensorRT时,Paddle的预测库通常为动态库
WITH_STATIC_LIB=OFF
# CUDA 的 lib 路径
CUDA_LIB=/usr/local/cuda-10.0/lib64
# CUDNN 的 lib 路径
CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
# 是否开启关键点模型预测功能
WITH_KEYPOINT=ON
```
修改脚本设置好主要参数后,执行`build`脚本:
```shell
sh ./scripts/build.sh
```
### Step5: 预测及可视化
编译成功后,预测入口程序为`build/main`其主要命令参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
| --model_dir | 导出的检测预测模型所在路径 |
| --model_dir_keypoint | Option | 导出的关键点预测模型所在路径 |
| --image_file | 要预测的图片文件路径 |
| --image_dir | 要预测的图片文件夹路径 |
| --video_file | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为paddle, 可选(paddle/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | 检测模型预测时的batch size,在指定`image_dir`时有效 |
| --batch_size_keypoint | 关键点模型预测时的batch size,默认为8 |
| --run_benchmark | 是否重复预测来进行benchmark测速 |
| --output_dir | 输出图片所在的文件夹, 默认为output |
| --use_mkldnn | CPU预测中是否开启MKLDNN加速 |
| --cpu_threads | 设置cpu线程数,默认为1 |
| --use_dark | 关键点模型输出预测是否使用DarkPose后处理,默认为true |
**注意**:
- 优先级顺序:`camera_id` > `video_file` > `image_dir` > `image_file`
- --run_benchmark如果设置为True,则需要安装依赖`pip install pynvml psutil GPUtil`
`样例一`
```shell
#不使用`GPU`测试图片 `/root/projects/images/test.jpeg`
./main --model_dir=/root/projects/models/yolov3_darknet --image_file=/root/projects/images/test.jpeg
```
图片文件`可视化预测结果`会保存在当前目录下`output.jpg`文件中。
`样例二`:
```shell
#使用 `GPU`预测视频`/root/projects/videos/test.mp4`
./main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --device=GPU
```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
`样例三`
```shell
#使用关键点模型与检测模型联合预测,使用 `GPU`预测
#检测模型检测到的人送入关键点模型进行关键点预测
./main --model_dir=/root/projects/models/yolov3_darknet --model_dir_keypoint=/root/projects/models/hrnet_w32_256x192 --image_file=/root/projects/images/test.jpeg --device=GPU
```
## 性能测试
benchmark请查看[BENCHMARK_INFER](../../BENCHMARK_INFER.md)
# Linux平台编译指南
## 说明
本文档在 `Linux`平台使用`GCC 8.2`测试过,如果需要使用其他G++版本编译使用,则需要重新编译Paddle预测库,请参考: [从源码编译Paddle预测库](https://paddleinference.paddlepaddle.org.cn/user_guides/source_compile.html)。本文档使用的预置的opencv库是在ubuntu 16.04上用gcc8.2编译的,如果需要在gcc8.2以外的环境编译,那么需自行编译opencv库。
## 前置条件
* G++ 8.2
* CUDA 9.0 / CUDA 10.1, cudnn 7+ (仅在使用GPU版本的预测库时需要)
* CMake 3.0+
请确保系统已经安装好上述基本软件,**下面所有示例以工作目录为 `/root/projects/`演示**
### Step1: 下载代码
`git clone https://github.com/PaddlePaddle/PaddleDetection.git`
**说明**:其中`C++`预测代码在`/root/projects/PaddleDetection/deploy/cpp` 目录,该目录不依赖任何`PaddleDetection`下其他目录。
### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference
PaddlePaddle C++ 预测库针对不同的`CPU``CUDA`版本提供了不同的预编译版本,请根据实际情况下载: [C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html)
下载并解压后`/root/projects/paddle_inference`目录包含内容为:
```
paddle_inference
├── paddle # paddle核心库和头文件
|
├── third_party # 第三方依赖库和头文件
|
└── version.txt # 版本和编译信息
```
**注意:** 预编译版本除`nv-jetson-cuda10-cudnn7.5-trt5` 以外其它包都是基于`GCC 4.8.5`编译,使用高版本`GCC`可能存在 `ABI`兼容性问题,建议降级或[自行编译预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)
### Step3: 编译
编译`cmake`的命令在`scripts/build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下:
```
# 是否使用GPU(即是否使用 CUDA)
WITH_GPU=OFF
# 使用MKL or openblas
WITH_MKL=ON
# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
WITH_TENSORRT=OFF
# TensorRT 的include路径
TENSORRT_LIB_DIR=/path/to/TensorRT/include
# TensorRT 的lib路径
TENSORRT_LIB_DIR=/path/to/TensorRT/lib
# Paddle 预测库路径
PADDLE_DIR=/path/to/paddle_inference
# Paddle 预测库名称
PADDLE_LIB_NAME=paddle_inference
# CUDA 的 lib 路径
CUDA_LIB=/path/to/cuda/lib
# CUDNN 的 lib 路径
CUDNN_LIB=/path/to/cudnn/lib
# 是否开启关键点模型预测功能
WITH_KEYPOINT=ON
# 请检查以上各个路径是否正确
# 以下无需改动
cmake .. \
-DWITH_GPU=${WITH_GPU} \
-DWITH_MKL=${WITH_MKL} \
-DWITH_TENSORRT=${WITH_TENSORRT} \
-DTENSORRT_LIB_DIR=${TENSORRT_LIB_DIR} \
-DTENSORRT_INC_DIR=${TENSORRT_INC_DIR} \
-DPADDLE_DIR=${PADDLE_DIR} \
-DCUDA_LIB=${CUDA_LIB} \
-DCUDNN_LIB=${CUDNN_LIB} \
-DOPENCV_DIR=${OPENCV_DIR} \
-DPADDLE_LIB_NAME=${PADDLE_LIB_NAME} \
-DWITH_KEYPOINT=${WITH_KEYPOINT}
make
```
修改脚本设置好主要参数后,执行`build`脚本:
```shell
sh ./scripts/build.sh
```
**注意**: OPENCV依赖OPENBLAS,Ubuntu用户需确认系统是否已存在`libopenblas.so`。如未安装,可执行apt-get install libopenblas-dev进行安装。
### Step4: 预测及可视化
编译成功后,预测入口程序为`build/main`其主要命令参数说明如下:
| 参数 | 说明 |
| ---- | ---- |
| --model_dir | 导出的检测预测模型所在路径 |
| --model_dir_keypoint | Option | 导出的关键点预测模型所在路径 |
| --image_file | 要预测的图片文件路径 |
| --image_dir | 要预测的图片文件夹路径 |
| --video_file | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为paddle, 可选(paddle/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | 检测模型预测时的batch size,在指定`image_dir`时有效 |
| --batch_size_keypoint | 关键点模型预测时的batch size,默认为8 |
| --run_benchmark | 是否重复预测来进行benchmark测速 |
| --output_dir | 输出图片所在的文件夹, 默认为output |
| --use_mkldnn | CPU预测中是否开启MKLDNN加速 |
| --cpu_threads | 设置cpu线程数,默认为1 |
| --use_dark | 关键点模型输出预测是否使用DarkPose后处理,默认为true |
**注意**:
- 优先级顺序:`camera_id` > `video_file` > `image_dir` > `image_file`
- --run_benchmark如果设置为True,则需要安装依赖`pip install pynvml psutil GPUtil`
`样例一`
```shell
#不使用`GPU`测试图片 `/root/projects/images/test.jpeg`
./build/main --model_dir=/root/projects/models/yolov3_darknet --image_file=/root/projects/images/test.jpeg
```
图片文件`可视化预测结果`会保存在当前目录下`output.jpg`文件中。
`样例二`:
```shell
#使用 `GPU`预测视频`/root/projects/videos/test.mp4`
./build/main --model_dir=/root/projects/models/yolov3_darknet --video_file=/root/projects/images/test.mp4 --device=GPU
```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
`样例三`
```shell
#使用关键点模型与检测模型联合预测,使用 `GPU`预测
#检测模型检测到的人送入关键点模型进行关键点预测
./build/main --model_dir=/root/projects/models/yolov3_darknet --model_dir_keypoint=/root/projects/models/hrnet_w32_256x192 --image_file=/root/projects/images/test.jpeg --device=GPU
```
## 性能测试
benchmark请查看[BENCHMARK_INFER](../../BENCHMARK_INFER.md)
# Visual Studio 2019 Community CMake 编译指南
Windows 平台下,我们使用`Visual Studio 2019 Community` 进行了测试。微软从`Visual Studio 2017`开始即支持直接管理`CMake`跨平台编译项目,但是直到`2019`才提供了稳定和完全的支持,所以如果你想使用CMake管理项目编译构建,我们推荐你使用`Visual Studio 2019`环境下构建。
## 前置条件
* Visual Studio 2019 (根据Paddle预测库所使用的VS版本选择,请参考 [Visual Studio 不同版本二进制兼容性](https://docs.microsoft.com/zh-cn/cpp/porting/binary-compat-2015-2017?view=vs-2019) )
* CUDA 9.0 / CUDA 10.0,cudnn 7+ / TensorRT(仅在使用GPU版本的预测库时需要)
* CMake 3.0+ [CMake下载](https://cmake.org/download/)
**特别注意:windows下预测库需要的TensorRT版本为:**
| 预测库版本 | TensorRT版本 |
| ---- | ---- |
| cuda10.1_cudnn7.6_avx_mkl_trt6 | TensorRT-6.0.1.5 |
| cuda10.2_cudnn7.6_avx_mkl_trt7 | TensorRT-7.0.0.11 |
| cuda11.0_cudnn8.0_avx_mkl_trt7 | TensorRT-7.2.1.6 |
请确保系统已经安装好上述基本软件,我们使用的是`VS2019`的社区版。
**下面所有示例以工作目录为 `D:\projects`演示**
### Step1: 下载代码
下载源代码
```shell
git clone https://github.com/PaddlePaddle/PaddleDetection.git
```
**说明**:其中`C++`预测代码在`PaddleDetection/deploy/cpp` 目录,该目录不依赖任何`PaddleDetection`下其他目录。
### Step2: 下载PaddlePaddle C++ 预测库 paddle_inference
PaddlePaddle C++ 预测库针对不同的`CPU``CUDA`版本提供了不同的预编译版本,请根据实际情况下载: [C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html#windows)
解压后`D:\projects\paddle_inference`目录包含内容为:
```
paddle_inference
├── paddle # paddle核心库和头文件
|
├── third_party # 第三方依赖库和头文件
|
└── version.txt # 版本和编译信息
```
### Step3: 安装配置OpenCV
1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download)
2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv`
3. 配置环境变量,如下流程所示(如果使用全局绝对路径,可以不用设置环境变量)
- 我的电脑->属性->高级系统设置->环境变量
- 在系统变量中找到Path(如没有,自行创建),并双击编辑
- 新建,将opencv路径填入并保存,如`D:\projects\opencv\build\x64\vc14\bin`
### Step4: 编译
1. 进入到`cpp`文件夹
```
cd D:\projects\PaddleDetection\deploy\cpp
```
2. 使用CMake生成项目文件
编译参数的含义说明如下(带`*`表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**):
| 参数名 | 含义 |
| ---- | ---- |
| *CUDA_LIB | CUDA的库路径 |
| *CUDNN_LIB | CUDNN的库路径 |
| OPENCV_DIR | OpenCV的安装路径, |
| PADDLE_DIR | Paddle预测库的路径 |
| PADDLE_LIB_NAME | Paddle 预测库名称 |
**注意:**
1. 如果编译环境为CPU,需要下载`CPU`版预测库,请把`WITH_GPU`的勾去掉
2. 如果使用的是`openblas`版本,请把`WITH_MKL`勾去掉
3. 如无需使用关键点模型可以把`WITH_KEYPOINT`勾去掉
4. Windows环境下,`PADDLE_LIB_NAME`需要设置为`paddle_inference`
执行如下命令项目文件:
```
cmake . -G "Visual Studio 16 2019" -A x64 -T host=x64 -DWITH_GPU=ON -DWITH_MKL=ON -DCMAKE_BUILD_TYPE=Release -DCUDA_LIB=path_to_cuda_lib -DCUDNN_LIB=path_to_cudnn_lib -DPADDLE_DIR=path_to_paddle_lib -DPADDLE_LIB_NAME=paddle_inference -DOPENCV_DIR=path_to_opencv -DWITH_KEYPOINT=ON
```
例如:
```
cmake . -G "Visual Studio 16 2019" -A x64 -T host=x64 -DWITH_GPU=ON -DWITH_MKL=ON -DCMAKE_BUILD_TYPE=Release -DCUDA_LIB=D:\projects\packages\cuda10_0\lib\x64 -DCUDNN_LIB=D:\projects\packages\cuda10_0\lib\x64 -DPADDLE_DIR=D:\projects\packages\paddle_inference -DPADDLE_LIB_NAME=paddle_inference -DOPENCV_DIR=D:\projects\packages\opencv3_4_6 -DWITH_KEYPOINT=ON
```
3. 编译
`Visual Studio 16 2019`打开`cpp`文件夹下的`PaddleObjectDetector.sln`,将编译模式设置为`Release`,点击`生成`->`全部生成
### Step5: 预测及可视化
上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录:
```
cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
```
可执行文件`main`即为样例的预测程序,其主要的命令行参数如下:
| 参数 | 说明 |
| ---- | ---- |
| --model_dir | 导出的检测预测模型所在路径 |
| --model_dir_keypoint | Option | 导出的关键点预测模型所在路径 |
| --image_file | 要预测的图片文件路径 |
| --image_dir | 要预测的图片文件夹路径 |
| --video_file | 要预测的视频文件路径 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)|
| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为paddle, 可选(paddle/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | 检测模型预测时的batch size,在指定`image_dir`时有效 |
| --batch_size_keypoint | 关键点模型预测时的batch size,默认为8 |
| --run_benchmark | 是否重复预测来进行benchmark测速 |
| --output_dir | 输出图片所在的文件夹, 默认为output |
| --use_mkldnn | CPU预测中是否开启MKLDNN加速 |
| --cpu_threads | 设置cpu线程数,默认为1 |
| --use_dark | 关键点模型输出预测是否使用DarkPose后处理,默认为true |
**注意**:
(1)优先级顺序:`camera_id` > `video_file` > `image_dir` > `image_file`。
(2)如果提示找不到`opencv_world346.dll`,把`D:\projects\packages\opencv3_4_6\build\x64\vc14\bin`文件夹下的`opencv_world346.dll`拷贝到`main.exe`文件夹下即可。
(3)--run_benchmark如果设置为True,则需要安装依赖`pip install pynvml psutil GPUtil`。
`样例一`:
```shell
#不使用`GPU`测试图片 `D:\\images\\test.jpeg`
.\main --model_dir=D:\\models\\yolov3_darknet --image_file=D:\\images\\test.jpeg
```
图片文件`可视化预测结果`会保存在当前目录下`output.jpg`文件中。
`样例二`:
```shell
#使用`GPU`测试视频 `D:\\videos\\test.mp4`
.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --device=GPU
```
视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。
`样例三`:
```shell
#使用关键点模型与检测模型联合预测,使用 `GPU`预测
#检测模型检测到的人送入关键点模型进行关键点预测
.\main --model_dir=D:\\models\\yolov3_darknet --model_dir_keypoint=D:\\models\\hrnet_w32_256x192 --image_file=D:\\images\\test.jpeg --device=GPU
```
## 性能测试
Benchmark请查看[BENCHMARK_INFER](../../BENCHMARK_INFER.md)
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "yaml-cpp/yaml.h"
#ifdef _WIN32
#define OS_PATH_SEP "\\"
#else
#define OS_PATH_SEP "/"
#endif
namespace PaddleDetection {
// Inference model configuration parser
class ConfigPaser {
public:
ConfigPaser() {}
~ConfigPaser() {}
bool load_config(const std::string& model_dir,
const std::string& cfg = "infer_cfg.yml") {
// Load as a YAML::Node
YAML::Node config;
config = YAML::LoadFile(model_dir + OS_PATH_SEP + cfg);
// Get runtime mode : paddle, trt_fp16, trt_fp32
if (config["mode"].IsDefined()) {
mode_ = config["mode"].as<std::string>();
} else {
std::cerr << "Please set mode, "
<< "support value : paddle/trt_fp16/trt_fp32." << std::endl;
return false;
}
// Get model arch : YOLO, SSD, RetinaNet, RCNN, Face
if (config["arch"].IsDefined()) {
arch_ = config["arch"].as<std::string>();
} else {
std::cerr << "Please set model arch,"
<< "support value : YOLO, SSD, RetinaNet, RCNN, Face."
<< std::endl;
return false;
}
// Get min_subgraph_size for tensorrt
if (config["min_subgraph_size"].IsDefined()) {
min_subgraph_size_ = config["min_subgraph_size"].as<int>();
} else {
std::cerr << "Please set min_subgraph_size." << std::endl;
return false;
}
// Get draw_threshold for visualization
if (config["draw_threshold"].IsDefined()) {
draw_threshold_ = config["draw_threshold"].as<float>();
} else {
std::cerr << "Please set draw_threshold." << std::endl;
return false;
}
// Get Preprocess for preprocessing
if (config["Preprocess"].IsDefined()) {
preprocess_info_ = config["Preprocess"];
} else {
std::cerr << "Please set Preprocess." << std::endl;
return false;
}
// Get label_list for visualization
if (config["label_list"].IsDefined()) {
label_list_ = config["label_list"].as<std::vector<std::string>>();
} else {
std::cerr << "Please set label_list." << std::endl;
return false;
}
// Get use_dynamic_shape for TensorRT
if (config["use_dynamic_shape"].IsDefined()) {
use_dynamic_shape_ = config["use_dynamic_shape"].as<bool>();
} else {
std::cerr << "Please set use_dynamic_shape." << std::endl;
return false;
}
// Get conf_thresh for tracker
if (config["tracker"].IsDefined()) {
if (config["tracker"]["conf_thres"].IsDefined()) {
conf_thresh_ = config["tracker"]["conf_thres"].as<float>();
} else {
std::cerr << "Please set conf_thres in tracker." << std::endl;
return false;
}
}
// Get NMS for postprocess
if (config["NMS"].IsDefined()) {
nms_info_ = config["NMS"];
}
// Get fpn_stride in PicoDet
if (config["fpn_stride"].IsDefined()) {
fpn_stride_.clear();
for (auto item : config["fpn_stride"]) {
fpn_stride_.emplace_back(item.as<int>());
}
}
if (config["mask"].IsDefined()) {
mask_ = config["mask"].as<bool>();
}
return true;
}
std::string mode_;
float draw_threshold_;
std::string arch_;
int min_subgraph_size_;
YAML::Node preprocess_info_;
YAML::Node nms_info_;
std::vector<std::string> label_list_;
std::vector<int> fpn_stride_;
bool use_dynamic_shape_;
float conf_thresh_;
bool mask_ = false;
};
} // namespace PaddleDetection
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment