test_benchmark.py 5.25 KB
Newer Older
root's avatar
root committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import unittest
from unittest import mock

import numpy

import cupy
from cupy import testing
from cupyx import profiler
from cupyx.profiler import _time


class TestBenchmark(unittest.TestCase):

    def test_cpu_routine(self):
        with mock.patch('time.perf_counter',
                        mock.Mock(side_effect=[2.4, 3.8, 3.8] * 10)):
            with mock.patch('cupy.cuda.get_elapsed_time',
                            mock.Mock(return_value=2500)):
                mock_func = mock.Mock()
                mock_func.__name__ = 'test_name_xxx'
                x = cupy.testing.shaped_random((2, 3), cupy, 'int32')
                y = cupy.testing.shaped_random((2, 3), cupy, 'int32')
                assert mock_func.call_count == 0

                perf = profiler.benchmark(
                    mock_func, (x, y), n_repeat=10, n_warmup=3)

                assert perf.name == 'test_name_xxx'
                assert mock_func.call_count == 13
                assert perf.cpu_times.shape == (10,)
                assert perf.gpu_times.shape == (1, 10,)
                assert (perf.cpu_times == 1.4).all()
                assert (perf.gpu_times == 2.5).all()

    @testing.multi_gpu(2)
    def test_multigpu_routine(self):
        with mock.patch('time.perf_counter',
                        mock.Mock(side_effect=[2.4, 3.8, 3.8] * 10)):
            with mock.patch('cupy.cuda.get_elapsed_time',
                            mock.Mock(return_value=2500)):
                mock_func = mock.Mock()
                mock_func.__name__ = 'test_name_xxx'
                x = cupy.testing.shaped_random((2, 3), cupy, 'int32')
                y = cupy.testing.shaped_random((2, 3), cupy, 'int32')
                assert mock_func.call_count == 0

                perf = profiler.benchmark(
                    mock_func, (x, y), n_repeat=10, n_warmup=3, devices=(0, 1))

                assert perf.name == 'test_name_xxx'
                assert mock_func.call_count == 13
                assert perf.cpu_times.shape == (10,)
                assert perf.gpu_times.shape == (2, 10,)
                assert (perf.cpu_times == 1.4).all()
                assert (perf.gpu_times == 2.5).all()

    def test_benchmark_max_duration(self):
        with mock.patch('time.perf_counter',
                        mock.Mock(side_effect=[1., 2., 2.] * 6)):
            with mock.patch('cupy.cuda.get_elapsed_time',
                            mock.Mock(return_value=2500)):
                mock_func = mock.Mock()
                mock_func.__name__ = 'test_name_xxx'
                x = cupy.testing.shaped_random((2, 3), cupy, 'int32')
                y = cupy.testing.shaped_random((2, 3), cupy, 'int32')
                assert mock_func.call_count == 0

                perf = profiler.benchmark(
                    mock_func, (x, y), n_warmup=3, max_duration=2.5)

                assert perf.name == 'test_name_xxx'
                assert mock_func.call_count == 6
                assert perf.cpu_times.shape == (3,)
                assert perf.gpu_times.shape == (1, 3)
                assert (perf.cpu_times == 1.).all()
                assert (perf.gpu_times == 2.5).all()

    def test_benchmark_kwargs(self):
        x = cupy.random.rand(5)
        profiler.benchmark(
            cupy.nonzero, kwargs={'a': x}, n_repeat=1, n_warmup=1)


class TestPerfCaseResult(unittest.TestCase):
    def test_show_gpu(self):
        times = numpy.array([
            [5.4, 7.1, 6.0, 5.4, 4.2],
            [6.4, 4.3, 8.9, 9.6, 3.8],
        ]) * 1e-6
        perf = _time._PerfCaseResult('test_name_xxx', times, (0,))
        expected = (
            'test_name_xxx       :'
            '    CPU:     5.620 us   +/-  0.943 '
            '(min:     4.200 / max:     7.100) us '
            '    GPU-0:     6.600 us   +/-  2.344 '
            '(min:     3.800 / max:     9.600) us'
        )
        assert str(perf) == expected

    def test_no_show_gpu(self):
        times = numpy.array([
            [5.4, 7.1, 6.0, 5.4, 4.2],
            [6.4, 4.3, 8.9, 9.6, 3.8],
        ]) * 1e-6
        perf = _time._PerfCaseResult('test_name_xxx', times, (0,))
        expected = (
            'test_name_xxx       :'
            '    CPU:     5.620 us   +/-  0.943 '
            '(min:     4.200 / max:     7.100) us'
        )
        assert perf.to_str() == expected
        # Checks if the result does not change.
        assert perf.to_str() == expected

    def test_single_show_gpu(self):
        times = numpy.array([[5.4], [6.4]]) * 1e-6
        perf = _time._PerfCaseResult('test_name_xxx', times, (0,))
        assert str(perf) == ('test_name_xxx       :    CPU:     5.400 us '
                             '    GPU-0:     6.400 us')

    def test_single_no_show_gpu(self):
        times = numpy.array([[5.4], [6.4]]) * 1e-6
        perf = _time._PerfCaseResult('test_name_xxx', times, (0,))
        assert perf.to_str() == 'test_name_xxx       :    CPU:     5.400 us'

    def test_show_multigpu(self):
        times = numpy.array([[5.4], [6.4], [7.0], [8.1]]) * 1e-6
        perf = _time._PerfCaseResult('test_name_xxx', times, (0, 1, 2))
        assert str(perf) == ('test_name_xxx       :    CPU:     5.400 us '
                             '    GPU-0:     6.400 us '
                             '    GPU-1:     7.000 us '
                             '    GPU-2:     8.100 us')