benchmark.py 2.87 KB
Newer Older
wuyuefeng's avatar
wuyuefeng committed
1
2
3
4
5
import argparse
import time
import torch
from mmcv import Config
from mmcv.parallel import MMDataParallel
Yezhen Cong's avatar
Yezhen Cong committed
6
from mmcv.runner import load_checkpoint, wrap_fp16_model
wuyuefeng's avatar
wuyuefeng committed
7

wuyuefeng's avatar
wuyuefeng committed
8
9
from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_detector
Ziyi Wu's avatar
Ziyi Wu committed
10
from tools.misc.fuse_conv_bn import fuse_module
wuyuefeng's avatar
wuyuefeng committed
11
12
13
14
15
16


def parse_args():
    parser = argparse.ArgumentParser(description='MMDet benchmark a model')
    parser.add_argument('config', help='test config file path')
    parser.add_argument('checkpoint', help='checkpoint file')
wuyuefeng's avatar
wuyuefeng committed
17
    parser.add_argument('--samples', default=2000, help='samples to benchmark')
wuyuefeng's avatar
wuyuefeng committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
    parser.add_argument(
        '--log-interval', default=50, help='interval of logging')
    parser.add_argument(
        '--fuse-conv-bn',
        action='store_true',
        help='Whether to fuse conv and bn, this will slightly increase'
        'the inference speed')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True

    # build the dataloader
    # TODO: support multiple images per gpu (only minor changes are needed)
    dataset = build_dataset(cfg.data.test)
    data_loader = build_dataloader(
        dataset,
        samples_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)

    # build the model and load checkpoint
50
51
    cfg.model.train_cfg = None
    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
wuyuefeng's avatar
wuyuefeng committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    load_checkpoint(model, args.checkpoint, map_location='cpu')
    if args.fuse_conv_bn:
        model = fuse_module(model)

    model = MMDataParallel(model, device_ids=[0])

    model.eval()

    # the first several iterations may be very slow so skip them
    num_warmup = 5
    pure_inf_time = 0

wuyuefeng's avatar
wuyuefeng committed
67
    # benchmark with several samples and take the average
wuyuefeng's avatar
wuyuefeng committed
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
    for i, data in enumerate(data_loader):

        torch.cuda.synchronize()
        start_time = time.perf_counter()

        with torch.no_grad():
            model(return_loss=False, rescale=True, **data)

        torch.cuda.synchronize()
        elapsed = time.perf_counter() - start_time

        if i >= num_warmup:
            pure_inf_time += elapsed
            if (i + 1) % args.log_interval == 0:
                fps = (i + 1 - num_warmup) / pure_inf_time
wuyuefeng's avatar
wuyuefeng committed
83
84
                print(f'Done image [{i + 1:<3}/ {args.samples}], '
                      f'fps: {fps:.1f} img / s')
wuyuefeng's avatar
wuyuefeng committed
85

wuyuefeng's avatar
wuyuefeng committed
86
        if (i + 1) == args.samples:
wuyuefeng's avatar
wuyuefeng committed
87
88
89
90
91
92
93
94
            pure_inf_time += elapsed
            fps = (i + 1 - num_warmup) / pure_inf_time
            print(f'Overall fps: {fps:.1f} img / s')
            break


if __name__ == '__main__':
    main()