benchmark.py 2.82 KB
Newer Older
dingchang's avatar
dingchang committed
1
# Copyright (c) OpenMMLab. All rights reserved.
wuyuefeng's avatar
wuyuefeng committed
2
3
import argparse
import time
4

wuyuefeng's avatar
wuyuefeng committed
5
6
7
import torch
from mmcv import Config
from mmcv.parallel import MMDataParallel
8
from mmengine.runner import load_checkpoint
wuyuefeng's avatar
wuyuefeng committed
9

10
from mmdet3d.registry import DATASETS, MODELS
Ziyi Wu's avatar
Ziyi Wu committed
11
from tools.misc.fuse_conv_bn import fuse_module
wuyuefeng's avatar
wuyuefeng committed
12
13
14
15
16
17


def parse_args():
    parser = argparse.ArgumentParser(description='MMDet benchmark a model')
    parser.add_argument('config', help='test config file path')
    parser.add_argument('checkpoint', help='checkpoint file')
wuyuefeng's avatar
wuyuefeng committed
18
    parser.add_argument('--samples', default=2000, help='samples to benchmark')
wuyuefeng's avatar
wuyuefeng committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
    parser.add_argument(
        '--log-interval', default=50, help='interval of logging')
    parser.add_argument(
        '--fuse-conv-bn',
        action='store_true',
        help='Whether to fuse conv and bn, this will slightly increase'
        'the inference speed')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    cfg.model.pretrained = None
    cfg.data.test.test_mode = True

    # build the dataloader
    # TODO: support multiple images per gpu (only minor changes are needed)
42
    dataset = DATASETS.build(cfg.data.test)
zhangshilong's avatar
zhangshilong committed
43
44
45
46
47

    # TODO fix this
    def build_dataloader():
        pass

wuyuefeng's avatar
wuyuefeng committed
48
49
50
51
52
53
54
55
    data_loader = build_dataloader(
        dataset,
        samples_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)

    # build the model and load checkpoint
56
    cfg.model.train_cfg = None
57
    model = MODELS.build(cfg.model, test_cfg=cfg.get('test_cfg'))
wuyuefeng's avatar
wuyuefeng committed
58
59
60
61
62
63
64
65
66
67
68
69
    load_checkpoint(model, args.checkpoint, map_location='cpu')
    if args.fuse_conv_bn:
        model = fuse_module(model)

    model = MMDataParallel(model, device_ids=[0])

    model.eval()

    # the first several iterations may be very slow so skip them
    num_warmup = 5
    pure_inf_time = 0

wuyuefeng's avatar
wuyuefeng committed
70
    # benchmark with several samples and take the average
wuyuefeng's avatar
wuyuefeng committed
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
    for i, data in enumerate(data_loader):

        torch.cuda.synchronize()
        start_time = time.perf_counter()

        with torch.no_grad():
            model(return_loss=False, rescale=True, **data)

        torch.cuda.synchronize()
        elapsed = time.perf_counter() - start_time

        if i >= num_warmup:
            pure_inf_time += elapsed
            if (i + 1) % args.log_interval == 0:
                fps = (i + 1 - num_warmup) / pure_inf_time
wuyuefeng's avatar
wuyuefeng committed
86
87
                print(f'Done image [{i + 1:<3}/ {args.samples}], '
                      f'fps: {fps:.1f} img / s')
wuyuefeng's avatar
wuyuefeng committed
88

wuyuefeng's avatar
wuyuefeng committed
89
        if (i + 1) == args.samples:
wuyuefeng's avatar
wuyuefeng committed
90
91
92
93
94
95
96
97
            pure_inf_time += elapsed
            fps = (i + 1 - num_warmup) / pure_inf_time
            print(f'Overall fps: {fps:.1f} img / s')
            break


if __name__ == '__main__':
    main()