test_gpcnet_performance.py 18.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Tests for GPCNet benchmark."""

import os
import numbers
import unittest
from pathlib import Path

from superbench.benchmarks import BenchmarkRegistry, Platform, BenchmarkType


class GPCNetBenchmarkTest(unittest.TestCase):    # noqa: E501
    """Tests for GPCNetBenchmark benchmark."""
    def setUp(self):
        """Method called to prepare the test fixture."""
        # Create fake binary file just for testing.
        os.environ['SB_MICRO_PATH'] = '/tmp/superbench'
        binary_path = os.path.join(os.getenv('SB_MICRO_PATH'), 'bin')
        Path(binary_path).mkdir(parents=True, exist_ok=True)
        self.__binary_files = []
        for bin_name in ['network_test', 'network_load_test']:
            self.__binary_files.append(Path(binary_path, bin_name))
            Path(binary_path, bin_name).touch(mode=0o755, exist_ok=True)

    def tearDown(self):
        """Method called after the test method has been called and the result recorded."""
        for bin_file in self.__binary_files:
            bin_file.unlink()

    def test_gpcnet_network_test(self):
        """Test gpcnet-network-test benchmark."""
        raw_output = """# noqa: E501
Network Tests v1.3
  Test with 2 MPI ranks (2 nodes)

  Legend
   RR = random ring communication pattern
   Nat = natural ring communication pattern
   Lat = latency
   BW = bandwidth
   BW+Sync = bandwidth with barrier
+------------------------------------------------------------------------------+
|                            Isolated Network Tests                            |
+---------------------------------+--------------+--------------+--------------+
|                            Name |          Avg |          99% |        Units |
+---------------------------------+--------------+--------------+--------------+
|          RR Two-sided Lat (8 B) |      10000.0 |      10000.0 |         usec |
+---------------------------------+--------------+--------------+--------------+
|                RR Get Lat (8 B) |      10000.0 |      10000.0 |         usec |
+---------------------------------+--------------+--------------+--------------+
|      RR Two-sided BW (131072 B) |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
|            RR Put BW (131072 B) |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
| RR Two-sided BW+Sync (131072 B) |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
|     Nat Two-sided BW (131072 B) |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
|        Multiple Allreduce (8 B) |      10000.0 |      10000.0 |         usec |
+---------------------------------+--------------+--------------+--------------+
|      Multiple Alltoall (4096 B) |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+
"""
        # Check registry.
        benchmark_name = 'gpcnet-network-test'
        (benchmark_class,
         predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CPU)
        assert (benchmark_class)

        # Check preprocess
        benchmark = benchmark_class(benchmark_name)
        ret = benchmark._preprocess()
        assert (ret)

        expect_command = 'network_test'
        command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
        assert (command == expect_command)

        raw_output_no_execution = """
ERROR: this application must be run on at least 2 nodes
--------------------------------------------------------------------------
Primary job  terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun detected that one or more processes exited with non-zero status, thus causing
the job to be terminated. The first process to do so was:

  Process name: [[63697,1],0]
  Exit code:    1
--------------------------------------------------------------------------
"""
        assert (benchmark._process_raw_result(0, raw_output_no_execution))
96
        assert (len(benchmark.result) == benchmark.default_metric_count)
97
98
99
100
101

        # Check function process_raw_data.
        # Positive case - valid raw output.
        assert (benchmark._process_raw_result(0, raw_output))
        metric_list = [
102
103
104
105
106
107
108
109
            'rr_two-sided_lat',
            'rr_get_lat',
            'rr_two-sided_bw',
            'rr_put_bw',
            'rr_two-sided+sync_bw',
            'nat_two-sided_bw',
            'multiple_allreduce_time',
            'multiple_alltoall_bw',
110
111
        ]
        for metric_medium in metric_list:
112
113
            for suffix in ['avg', '99%']:
                metric = metric_medium + '_' + suffix
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
                assert (metric in benchmark.result)
                assert (len(benchmark.result[metric]) == 1)
                assert (isinstance(benchmark.result[metric][0], numbers.Number))

        # Negative case - Add invalid raw output.
        assert (benchmark._process_raw_result(0, 'ERROR') is False)

        # Check basic information.
        assert (benchmark.name == 'gpcnet-network-test')
        assert (benchmark.type == BenchmarkType.MICRO)
        assert (benchmark._bin_name == 'network_test')

    def test_gpcnet_network_load(self):    # noqa: C901
        """Test gpcnet-network-load-test benchmark."""
        raw_output = """# noqa: E501
NetworkLoad Tests v1.3
  Test with 10 MPI ranks (10 nodes)
  2 nodes running Network Tests
  8 nodes running Congestion Tests (min 100 nodes per congestor)

  Legend
   RR = random ring communication pattern
   Lat = latency
   BW = bandwidth
   BW+Sync = bandwidth with barrier
+------------------------------------------------------------------------------------------------------------------------------------------+
|                                                          Isolated Network Tests                                                          |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|                            Name |          Min |          Max |          Avg |   Avg(Worst) |          99% |        99.9% |        Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|          RR Two-sided Lat (8 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |         usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided BW+Sync (131072 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|        Multiple Allreduce (8 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |         usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+

+------------------------------------------------------------------------------------------------------------------------------------------+
|                                                        Isolated Congestion Tests                                                         |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|                            Name |          Min |          Max |          Avg |   Avg(Worst) |          99% |        99.9% |        Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|               Alltoall (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|       Two-sided Incast (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|             Put Incast (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|              Get Bcast (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+

+------------------------------------------------------------------------------------------------------------------------------------------+
|                             Network Tests running with Congestion Tests (    RR Two-sided Lat Network Test)                              |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|                            Name |          Min |          Max |          Avg |   Avg(Worst) |          99% |        99.9% |        Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|          RR Two-sided Lat (8 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |         usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|               Alltoall (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|       Two-sided Incast (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|             Put Incast (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|              Get Bcast (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+

+------------------------------------------------------------------------------------------------------------------------------------------+
|                             Network Tests running with Congestion Tests (RR Two-sided BW+Sync Network Test)                              |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|                            Name |          Min |          Max |          Avg |   Avg(Worst) |          99% |        99.9% |        Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
| RR Two-sided BW+Sync (131072 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|               Alltoall (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|       Two-sided Incast (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|             Put Incast (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|              Get Bcast (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+

+------------------------------------------------------------------------------------------------------------------------------------------+
|                             Network Tests running with Congestion Tests (  Multiple Allreduce Network Test)                              |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|                            Name |          Min |          Max |          Avg |   Avg(Worst) |          99% |        99.9% |        Units |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|        Multiple Allreduce (8 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |         usec |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|               Alltoall (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|       Two-sided Incast (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|             Put Incast (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+
|              Get Bcast (4096 B) |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |      10000.0 |   MiB/s/rank |
+---------------------------------+--------------+--------------+--------------+--------------+--------------+--------------+--------------+

+------------------------------------------------------------------------------+
|          Network Tests running with Congestion Tests - Key Results           |
+---------------------------------+--------------------------------------------+
|                            Name |                   Congestion Impact Factor |
+---------------------------------+----------------------+---------------------+
|                                 |                  Avg |                 99% |
+---------------------------------+----------------------+---------------------+
|          RR Two-sided Lat (8 B) |                 0.0X |                0.0X |
+---------------------------------+----------------------+---------------------+
| RR Two-sided BW+Sync (131072 B) |                 0.0X |                0.0X |
+---------------------------------+----------------------+---------------------+
|        Multiple Allreduce (8 B) |                 0.0X |                0.0X |
+---------------------------------+----------------------+---------------------+
"""
        # Check registry.
        benchmark_name = 'gpcnet-network-load-test'
        (benchmark_class,
         predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CPU)
        assert (benchmark_class)

        # Check preprocess
        benchmark = benchmark_class(benchmark_name)
        ret = benchmark._preprocess()
        assert (ret)

        expect_command = 'network_load_test'
        command = benchmark._bin_name + benchmark._commands[0].split(benchmark._bin_name)[1]
        assert (command == expect_command)

        # Check function process_raw_data.
        raw_output_no_execution = """
ERROR: this application must be run on at least 10 nodes
--------------------------------------------------------------------------
Primary job  terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun detected that one or more processes exited with non-zero status, thus causing
the job to be terminated. The first process to do so was:

  Process name: [[63697,1],0]
  Exit code:    1
--------------------------------------------------------------------------
"""
        assert (benchmark._process_raw_result(0, raw_output_no_execution))
258
        assert (len(benchmark.result) == benchmark.default_metric_count)
259
260
        # Positive case - valid raw output.
        assert (benchmark._process_raw_result(0, raw_output))
261
        metric_list = ['rr_two-sided_lat_x', 'rr_two-sided+sync_bw_x', 'multiple_allreduce_x']
262
        for metric_medium in metric_list:
263
264
            for suffix in ['avg', '99%']:
                metric = metric_medium + '_' + suffix
265
266
267
268
269
270
271
272
273
274
275
                assert (metric in benchmark.result)
                assert (len(benchmark.result[metric]) == 1)
                assert (isinstance(benchmark.result[metric][0], numbers.Number))

        # Negative case - Add invalid raw output.
        assert (benchmark._process_raw_result(0, 'ERROR') is False)

        # Check basic information.
        assert (benchmark.name == 'gpcnet-network-load-test')
        assert (benchmark.type == BenchmarkType.MICRO)
        assert (benchmark._bin_name == 'network_load_test')