cpu_stream_performance.py 5.09 KB
Newer Older
rafsalas19's avatar
rafsalas19 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""Module for running the University of Virginia STREAM tool. It measures sustainable main memory \
    bandwidth in MB/s and the corresponding computation rate for simple vector kernels."""

import os

from superbench.common.utils import logger
from superbench.benchmarks import BenchmarkRegistry
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke


class CpuStreamBenchmark(MicroBenchmarkWithInvoke):
    """The Stream benchmark class."""
    def __init__(self, name, parameters=''):
        """Constructor.

        Args:
            name (str): benchmark name.
            parameters (str): benchmark parameters.
        """
        super().__init__(name, parameters)

25
        self._bin_name = 'stream'
26
        self.__cpu_arch = ['other', 'zen3', 'zen4', 'neo2']
rafsalas19's avatar
rafsalas19 committed
27
28
29
30
31
32
33
34

    def add_parser_arguments(self):
        """Add the specified arguments."""
        super().add_parser_arguments()

        self._parser.add_argument(
            '--cpu_arch',
            type=str,
35
            default='other',
rafsalas19's avatar
rafsalas19 committed
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
            required=False,
            help='The targeted cpu architectures to run \
                STREAM. Default is zen4. Possible values are {}.'.format(' '.join(self.__cpu_arch))
        )
        core_link = 'https://techcommunity.microsoft.com/t5/azure-compute-blog/performance-\
        amp-scalability-of-hbv3-vms-with-milan-x-cpus/ba-p/2939814'

        self._parser.add_argument(
            '--cores',
            nargs='+',
            type=int,
            default=[
                0, 8, 16, 24, 32, 38, 44, 52, 60, 68, 76, 82, 88, 96, 104, 112, 120, 126, 132, 140, 148, 156, 164, 170
            ],
            required=False,
            help='List of cores to perform test. Default core configuration is for HBv4/Zen4 SKU offering. \
            For HBv3/Zen3 please see: ' + core_link
        )

    def _preprocess(self):
        """Preprocess/preparation operations before the benchmarking.

        Return:
            True if _preprocess() succeed.
        """
        if not super()._preprocess():
            return False

        # zen3
        # cores=[0, 4, 8, 12, 16, 20, 24, 28, 30, 34, 38, 42, 46, 50,
        # 54, 58, 60, 64, 68, 72, 76, 80, 84, 88, 90, 94, 98, 102, 106, 110, 114, 118]
        # zen4
        # cores=[0, 8, 16, 24, 32, 38, 44, 52, 60, 68, 76, 82, 88, 96, 104, 112, 120,
        # 126, 132, 140, 148, 156, 164, 170]

        # parse cores argument
        omp_places = ''
        for core in self._args.cores:
            omp_places += '{' + '{}:1'.format(core) + '}'

        envar = 'OMP_SCHEDULE=static && OMP_DYNAMIC=false && OMP_MAX_ACTIVE_LEVELS=1 && OMP_STACKSIZE=256M && \
            OMP_PROC_BIND=true && OMP_NUM_THREADS={} && OMP_PLACES={}'.format(len(self._args.cores), omp_places)

79
        # set the binary name based on cpu architecture
rafsalas19's avatar
rafsalas19 committed
80
        if self._args.cpu_arch == 'zen3':
81
            self._bin_name = 'streamZen3'
rafsalas19's avatar
rafsalas19 committed
82
        elif self._args.cpu_arch == 'zen4':
83
            self._bin_name = 'streamZen4'
84
        elif self._args.cpu_arch == 'neo2':
85
            self._bin_name = 'streamNeo2'
rafsalas19's avatar
rafsalas19 committed
86

87
        command = envar + ' ' + os.path.join(self._args.bin_dir, self._bin_name)
rafsalas19's avatar
rafsalas19 committed
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

        if not self._set_binary_path():
            logger.error(
                'Executable {} not found in {} or it is not executable'.format(self._bin_name, self._args.bin_dir)
            )
            return False

        self._commands.append(command)
        return True

    def _process_raw_result(self, cmd_idx, raw_output):
        """Function to parse raw results and save the summarized results.

          self._result.add_raw_data() and self._result.add_result() need to be called to save the results.

        Args:
            cmd_idx (int): the index of command corresponding with the raw_output.
            raw_output (str): raw output string of the micro-benchmark.

        Return:
            True if the raw output string is valid and result can be extracted.
        """
        functions = ['Copy', 'Scale', 'Add', 'Triad']
        records = []
        content = raw_output.splitlines()
        for line in content:
            if 'Number of Threads counted' in line:
                line.split('= ')[1]
                self._result.add_result('threads', int(line.split('= ')[1]))
            for function in functions:
                if function in line:
                    records.append(line)

        # individual results
        for record in records:
            entries = record.split()
            metric = entries[0].strip().replace(':', '')
            self._result.add_result(metric.lower() + '_throughput', float(entries[1].strip()))
            self._result.add_result(metric.lower() + '_time_avg', float(entries[2].strip()))
            self._result.add_result(metric.lower() + '_time_min', float(entries[3].strip()))
            self._result.add_result(metric.lower() + '_time_max', float(entries[4].strip()))

        # raw output
        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)

        return True


BenchmarkRegistry.register_benchmark('cpu-stream', CpuStreamBenchmark)