benchmark.py 2.85 KB
Newer Older
dingchang's avatar
dingchang committed
1
# Copyright (c) OpenMMLab. All rights reserved.
wuyuefeng's avatar
wuyuefeng committed
2
3
import argparse
import time
4

wuyuefeng's avatar
wuyuefeng committed
5
6
7
import torch
from mmcv import Config
from mmcv.parallel import MMDataParallel
8
from mmengine.runner import load_checkpoint
wuyuefeng's avatar
wuyuefeng committed
9

zhangshilong's avatar
zhangshilong committed
10
from mmdet3d.datasets import build_dataset
wuyuefeng's avatar
wuyuefeng committed
11
from mmdet3d.models import build_detector
Ziyi Wu's avatar
Ziyi Wu committed
12
from tools.misc.fuse_conv_bn import fuse_module
wuyuefeng's avatar
wuyuefeng committed
13
14
15
16
17
18


def parse_args():
    parser = argparse.ArgumentParser(description='MMDet benchmark a model')
    parser.add_argument('config', help='test config file path')
    parser.add_argument('checkpoint', help='checkpoint file')
wuyuefeng's avatar
wuyuefeng committed
19
    parser.add_argument('--samples', default=2000, help='samples to benchmark')
wuyuefeng's avatar
wuyuefeng committed
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
    parser.add_argument(
        '--log-interval', default=50, help='interval of logging')
    parser.add_argument(
        '--fuse-conv-bn',
        action='store_true',
        help='Whether to fuse conv and bn, this will slightly increase'
        'the inference speed')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True

    # build the dataloader
    # TODO: support multiple images per gpu (only minor changes are needed)
    dataset = build_dataset(cfg.data.test)
zhangshilong's avatar
zhangshilong committed
44
45
46
47
48

    # TODO fix this
    def build_dataloader():
        pass

wuyuefeng's avatar
wuyuefeng committed
49
50
51
52
53
54
55
56
    data_loader = build_dataloader(
        dataset,
        samples_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)

    # build the model and load checkpoint
57
58
    cfg.model.train_cfg = None
    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
wuyuefeng's avatar
wuyuefeng committed
59
60
61
62
63
64
65
66
67
68
69
70
    load_checkpoint(model, args.checkpoint, map_location='cpu')
    if args.fuse_conv_bn:
        model = fuse_module(model)

    model = MMDataParallel(model, device_ids=[0])

    model.eval()

    # the first several iterations may be very slow so skip them
    num_warmup = 5
    pure_inf_time = 0

wuyuefeng's avatar
wuyuefeng committed
71
    # benchmark with several samples and take the average
wuyuefeng's avatar
wuyuefeng committed
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    for i, data in enumerate(data_loader):

        torch.cuda.synchronize()
        start_time = time.perf_counter()

        with torch.no_grad():
            model(return_loss=False, rescale=True, **data)

        torch.cuda.synchronize()
        elapsed = time.perf_counter() - start_time

        if i >= num_warmup:
            pure_inf_time += elapsed
            if (i + 1) % args.log_interval == 0:
                fps = (i + 1 - num_warmup) / pure_inf_time
wuyuefeng's avatar
wuyuefeng committed
87
88
                print(f'Done image [{i + 1:<3}/ {args.samples}], '
                      f'fps: {fps:.1f} img / s')
wuyuefeng's avatar
wuyuefeng committed
89

wuyuefeng's avatar
wuyuefeng committed
90
        if (i + 1) == args.samples:
wuyuefeng's avatar
wuyuefeng committed
91
92
93
94
95
96
97
98
            pure_inf_time += elapsed
            fps = (i + 1 - num_warmup) / pure_inf_time
            print(f'Overall fps: {fps:.1f} img / s')
            break


if __name__ == '__main__':
    main()