# Copyright (c) Microsoft Corporation. # Licensed under the MIT license. """Module of the DTK HPL benchmark.""" from superbench.benchmarks import BenchmarkRegistry, Platform from superbench.benchmarks.micro_benchmarks import GpuHplBenchmark from superbench.benchmarks.micro_benchmarks.gpu_hpl_performance_base import format_hpl_extended_tv, format_hpl_tv class DtkHplBenchmark(GpuHplBenchmark): """The DTK HPL benchmark class.""" _default_bin_name = 'run_rochpl' _default_dat_name = 'HPL.dat' _default_out_name = 'HPL.out' _file_label = 'HPL' def _add_variant_parser_arguments(self): """Add rocHPL-specific arguments.""" self._parser.add_argument( '--pfact', dest='pfact', type=int, default=2, choices=[0, 1, 2], required=False, help='Panel factorization: 0 for left, 1 for Crout, 2 for right.', ) self._parser.add_argument( '--nbmin', dest='nbmin', type=int, default=32, required=False, help='Recursive stopping criterion.', ) self._parser.add_argument( '--ndiv', dest='ndiv', type=int, default=2, required=False, help='Number of panels in recursion.', ) self._parser.add_argument( '--rfact', dest='rfact', type=int, default=2, choices=[0, 1, 2], required=False, help='Recursive panel factorization: 0 for left, 1 for Crout, 2 for right.', ) self._parser.add_argument( '--depth', dest='depth', type=int, default=1, required=False, help='Lookahead depth.', ) self._parser.add_argument( '--swap', dest='swap', type=int, default=1, choices=[0, 1, 2], required=False, help='Swapping algorithm: 0 for binary exchange, 1 for long, 2 for mix.', ) self._parser.add_argument( '--swapping-threshold', dest='swapping_threshold', type=int, default=64, required=False, help='Swapping threshold.', ) self._parser.add_argument( '--l1', dest='l1', type=int, default=0, choices=[0, 1], required=False, help='L1 storage form: 0 for transposed, 1 for non-transposed.', ) self._parser.add_argument( '--u', dest='u', type=int, default=0, choices=[0, 1], required=False, help='U storage form: 0 for transposed, 1 for non-transposed.', ) self._parser.add_argument( '--equilibration', dest='equilibration', type=int, default=0, choices=[0, 1], required=False, help='Equilibration: 0 for no, 1 for yes.', ) self._parser.add_argument( '--memory-alignment', dest='memory_alignment', type=int, default=8, required=False, help='Memory alignment in double.', ) def _format_tv(self): """Format the expected rocHPL T/V field from input arguments.""" return format_hpl_extended_tv( self._args.pmap, self._args.depth, self._args.bcast, self._args.rfact, self._args.ndiv, self._args.pfact, self._args.nbmin, self._args.l1, self._args.u, self._args.equilibration, self._args.memory_alignment, ) def _format_output_tv(self): """Format the expected rocHPL T/V field in generated output.""" return format_hpl_tv( self._args.pmap, self._args.depth, self._args.bcast, self._args.rfact, self._args.ndiv, self._args.pfact, self._args.nbmin, ) def _format_dat_content(self): """Format generated rocHPL input file content.""" return ( 'HPLinpack benchmark input file\n' 'Innovative Computing Laboratory, University of Tennessee\n' f'{self._out_file_name} output file name (if any)\n' '0 device out (6=stdout,7=stderr,file)\n' '1 # of problems sizes (N)\n' f'{self._args.n} Ns\n' '1 # of NBs\n' f'{self._args.nb} NBs\n' f'{self._args.pmap} PMAP process mapping (0=Row-,1=Column-major)\n' '1 # of process grids (P x Q)\n' f'{self._args.p} Ps\n' f'{self._args.q} Qs\n' f'{self._args.threshold} threshold\n' '1 # of panel fact\n' f'{self._args.pfact} PFACTs (0=left, 1=Crout, 2=Right)\n' '1 # of recursive stopping criterium\n' f'{self._args.nbmin} NBMINs (>= 1)\n' '1 # of panels in recursion\n' f'{self._args.ndiv} NDIVs\n' '1 # of recursive panel fact.\n' f'{self._args.rfact} RFACTs (0=left, 1=Crout, 2=Right)\n' '1 # of broadcast\n' f'{self._args.bcast} BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)\n' '1 # of lookahead depth\n' f'{self._args.depth} DEPTHs (>=0)\n' f'{self._args.swap} SWAP (0=bin-exch,1=long,2=mix)\n' f'{self._args.swapping_threshold} swapping threshold\n' f'{self._args.l1} L1 in (0=transposed,1=no-transposed) form\n' f'{self._args.u} U in (0=transposed,1=no-transposed) form\n' f'{self._args.equilibration} Equilibration (0=no,1=yes)\n' f'{self._args.memory_alignment} memory alignment in double (> 0)\n' ) BenchmarkRegistry.register_benchmark('gpu-hpl', DtkHplBenchmark, platform=Platform.DTK)