# Copyright (c) OpenMMLab. All rights reserved. import sys import argparse import time import os import torch from mmcv import Config from mmcv.parallel import MMDataParallel from mmcv.runner import load_checkpoint, wrap_fp16_model from mmdet3d.datasets import build_dataloader, build_dataset from mmdet3d.models import build_detector from tools.misc.fuse_conv_bn import fuse_module sys.path.insert(0, os.getcwd()) print(sys.path) def parse_args(): parser = argparse.ArgumentParser(description='MMDet benchmark a model') parser.add_argument('config', help='test config file path') parser.add_argument('checkpoint', help='checkpoint file') parser.add_argument('--samples', default=500, help='samples to benchmark') parser.add_argument( '--log-interval', default=50, help='interval of logging') parser.add_argument( '--fuse-conv-bn', action='store_true', help='Whether to fuse conv and bn, this will slightly increase' 'the inference speed') parser.add_argument( '--w_pano', action='store_true') parser.add_argument( '--w_panoproc', action='store_true') parser.add_argument( '--no-acceleration', action='store_true', help='Omit the pre-computation acceleration') args = parser.parse_args() return args def main(): args = parse_args() cfg = Config.fromfile(args.config) # set cudnn_benchmark if cfg.get('cudnn_benchmark', False): torch.backends.cudnn.benchmark = True cfg.model.pretrained = None cfg.data.test.test_mode = True # import modules from plguin/xx, registry will be updated if hasattr(cfg, 'plugin'): if cfg.plugin: import importlib if hasattr(cfg, 'plugin_dir'): plugin_dir = cfg.plugin_dir _module_dir = os.path.dirname(plugin_dir) _module_dir = _module_dir.split('/') _module_path = _module_dir[0] for m in _module_dir[1:]: _module_path = _module_path + '.' + m print(_module_path) plg_lib = importlib.import_module(_module_path) else: # import dir is the dirpath for the config file _module_dir = os.path.dirname(args.config) _module_dir = _module_dir.split('/') _module_path = _module_dir[0] for m in _module_dir[1:]: _module_path = _module_path + '.' + m plg_lib = importlib.import_module(_module_path) # build the dataloader # TODO: support multiple images per gpu (only minor changes are needed) dataset = build_dataset(cfg.data.test) data_loader = build_dataloader( dataset, samples_per_gpu=1, workers_per_gpu=0, dist=False, shuffle=False) # build the model and load checkpoint if not args.no_acceleration: cfg.model.img_view_transformer.accelerate=True cfg.model.train_cfg = None model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) load_checkpoint(model, args.checkpoint, map_location='cpu') if args.fuse_conv_bn: model = fuse_module(model) model = MMDataParallel(model, device_ids=[0]) model.eval() # the first several iterations may be very slow so skip them num_warmup = 5 pure_inf_time = 0 # benchmark with several samples and take the average # for i, data_ori in enumerate(data_loader): # if i == 0: # break # import copy # for i in range(500): # data = copy.deepcopy(data_ori) for i, data in enumerate(data_loader): torch.cuda.synchronize() start_time = time.perf_counter() with torch.no_grad(): model(return_loss=False, rescale=True, w_pano=args.w_pano, w_panoproc=args.w_panoproc, **data) torch.cuda.synchronize() elapsed = time.perf_counter() - start_time if i >= num_warmup: pure_inf_time += elapsed if (i + 1) % args.log_interval == 0: fps = (i + 1 - num_warmup) / pure_inf_time print(f'Done image [{i + 1:<3}/ {args.samples}], ' f'fps: {fps:.1f} img / s') if (i + 1) == args.samples: pure_inf_time += elapsed fps = (i + 1 - num_warmup) / pure_inf_time print(f'Overall \nfps: {fps:.2f} img / s ' f'\ninference time: {1000 / fps:.2f} ms') break if __name__ == '__main__': main()