dtk_hpl_performance.py 6.28 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""Module of the DTK HPL benchmark."""

from superbench.benchmarks import BenchmarkRegistry, Platform
from superbench.benchmarks.micro_benchmarks import GpuHplBenchmark
from superbench.benchmarks.micro_benchmarks.gpu_hpl_performance_base import format_hpl_extended_tv, format_hpl_tv


class DtkHplBenchmark(GpuHplBenchmark):
    """The DTK HPL benchmark class."""

    _default_bin_name = 'run_rochpl'
    _default_dat_name = 'HPL.dat'
    _default_out_name = 'HPL.out'
    _file_label = 'HPL'

    def _add_variant_parser_arguments(self):
        """Add rocHPL-specific arguments."""
        self._parser.add_argument(
            '--pfact',
            dest='pfact',
            type=int,
            default=2,
            choices=[0, 1, 2],
            required=False,
            help='Panel factorization: 0 for left, 1 for Crout, 2 for right.',
        )
        self._parser.add_argument(
            '--nbmin',
            dest='nbmin',
            type=int,
            default=32,
            required=False,
            help='Recursive stopping criterion.',
        )
        self._parser.add_argument(
            '--ndiv',
            dest='ndiv',
            type=int,
            default=2,
            required=False,
            help='Number of panels in recursion.',
        )
        self._parser.add_argument(
            '--rfact',
            dest='rfact',
            type=int,
            default=2,
            choices=[0, 1, 2],
            required=False,
            help='Recursive panel factorization: 0 for left, 1 for Crout, 2 for right.',
        )
        self._parser.add_argument(
            '--depth',
            dest='depth',
            type=int,
            default=1,
            required=False,
            help='Lookahead depth.',
        )
        self._parser.add_argument(
            '--swap',
            dest='swap',
            type=int,
            default=1,
            choices=[0, 1, 2],
            required=False,
            help='Swapping algorithm: 0 for binary exchange, 1 for long, 2 for mix.',
        )
        self._parser.add_argument(
            '--swapping-threshold',
            dest='swapping_threshold',
            type=int,
            default=64,
            required=False,
            help='Swapping threshold.',
        )
        self._parser.add_argument(
            '--l1',
            dest='l1',
            type=int,
            default=0,
            choices=[0, 1],
            required=False,
            help='L1 storage form: 0 for transposed, 1 for non-transposed.',
        )
        self._parser.add_argument(
            '--u',
            dest='u',
            type=int,
            default=0,
            choices=[0, 1],
            required=False,
            help='U storage form: 0 for transposed, 1 for non-transposed.',
        )
        self._parser.add_argument(
            '--equilibration',
            dest='equilibration',
            type=int,
            default=0,
            choices=[0, 1],
            required=False,
            help='Equilibration: 0 for no, 1 for yes.',
        )
        self._parser.add_argument(
            '--memory-alignment',
            dest='memory_alignment',
            type=int,
            default=8,
            required=False,
            help='Memory alignment in double.',
        )

    def _format_tv(self):
        """Format the expected rocHPL T/V field from input arguments."""
        return format_hpl_extended_tv(
            self._args.pmap,
            self._args.depth,
            self._args.bcast,
            self._args.rfact,
            self._args.ndiv,
            self._args.pfact,
            self._args.nbmin,
            self._args.l1,
            self._args.u,
            self._args.equilibration,
            self._args.memory_alignment,
        )

    def _format_output_tv(self):
        """Format the expected rocHPL T/V field in generated output."""
        return format_hpl_tv(
            self._args.pmap,
            self._args.depth,
            self._args.bcast,
            self._args.rfact,
            self._args.ndiv,
            self._args.pfact,
            self._args.nbmin,
        )

    def _format_dat_content(self):
        """Format generated rocHPL input file content."""
        return (
            'HPLinpack benchmark input file\n'
            'Innovative Computing Laboratory, University of Tennessee\n'
            f'{self._out_file_name} output file name (if any)\n'
            '0            device out (6=stdout,7=stderr,file)\n'
            '1            # of problems sizes (N)\n'
            f'{self._args.n}         Ns\n'
            '1            # of NBs\n'
            f'{self._args.nb}         NBs\n'
            f'{self._args.pmap}            PMAP process mapping (0=Row-,1=Column-major)\n'
            '1            # of process grids (P x Q)\n'
            f'{self._args.p}            Ps\n'
            f'{self._args.q}            Qs\n'
            f'{self._args.threshold}         threshold\n'
            '1            # of panel fact\n'
            f'{self._args.pfact}            PFACTs (0=left, 1=Crout, 2=Right)\n'
            '1            # of recursive stopping criterium\n'
            f'{self._args.nbmin}           NBMINs (>= 1)\n'
            '1            # of panels in recursion\n'
            f'{self._args.ndiv}            NDIVs\n'
            '1            # of recursive panel fact.\n'
            f'{self._args.rfact}            RFACTs (0=left, 1=Crout, 2=Right)\n'
            '1            # of broadcast\n'
            f'{self._args.bcast}            BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)\n'
            '1            # of lookahead depth\n'
            f'{self._args.depth}            DEPTHs (>=0)\n'
            f'{self._args.swap}            SWAP (0=bin-exch,1=long,2=mix)\n'
            f'{self._args.swapping_threshold}           swapping threshold\n'
            f'{self._args.l1}            L1 in (0=transposed,1=no-transposed) form\n'
            f'{self._args.u}            U  in (0=transposed,1=no-transposed) form\n'
            f'{self._args.equilibration}            Equilibration (0=no,1=yes)\n'
            f'{self._args.memory_alignment}            memory alignment in double (> 0)\n'
        )


BenchmarkRegistry.register_benchmark('gpu-hpl', DtkHplBenchmark, platform=Platform.DTK)