# Copyright (c) Microsoft Corporation. # Licensed under the MIT license. """Module of the cublas functions benchmarks.""" import os import json import yaml from superbench.common.utils import logger from superbench.benchmarks import Platform, BenchmarkRegistry, ReturnCode from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke class CublasBenchmark(MicroBenchmarkWithInvoke): """The Cublas performance benchmark class.""" def __init__(self, name, parameters=''): """Constructor. Args: name (str): benchmark name. parameters (str): benchmark parameters. """ super().__init__(name, parameters) self.__default_params_dict_list = [ { 'name': 'cublasCgemm', 'm': 512, 'n': 512, 'k': 32, 'transa': 1, 'transb': 0 }, { 'name': 'cublasCgemm', 'm': 2048, 'n': 512, 'k': 32, 'transa': 1, 'transb': 0 }, { 'name': 'cublasCgemm', 'm': 512, 'n': 2048, 'k': 32, 'transa': 1, 'transb': 0 }, { 'name': 'cublasCgemm', 'm': 640, 'n': 1280, 'k': 32, 'transa': 1, 'transb': 0 }, { 'name': 'cublasCgemm', 'm': 896, 'n': 1792, 'k': 32, 'transa': 1, 'transb': 0 }, { 'name': 'cublasCgemm3mStridedBatched', 'm': 64, 'n': 32, 'k': 3, 'transa': 0, 'transb': 1, 'batchCount': 544 }, { 'name': 'cublasCgemm3mStridedBatched', 'm': 64, 'n': 32, 'k': 64, 'transa': 1, 'transb': 0, 'batchCount': 544 }, { 'name': 'cublasCgemm3mStridedBatched', 'm': 128, 'n': 32, 'k': 128, 'transa': 0, 'transb': 1, 'batchCount': 544 }, { 'name': 'cublasCgemm3mStridedBatched', 'm': 128, 'n': 32, 'k': 64, 'transa': 0, 'transb': 1, 'batchCount': 544 }, { 'name': 'cublasCgemm3mStridedBatched', 'm': 64, 'n': 32, 'k': 128, 'transa': 0, 'transb': 1, 'batchCount': 544 }, { 'name': 'cublasGemmStridedBatchedEx', 'm': 224, 'n': 224, 'k': 64, 'transa': 0, 'transb': 0, 'datatype': 'half', 'use_tensor_core': True, 'batchCount': 160 }, { 'name': 'cublasGemmStridedBatchedEx', 'm': 64, 'n': 224, 'k': 224, 'transa': 0, 'transb': 0, 'datatype': 'half', 'use_tensor_core': True, 'batchCount': 160 }, { 'name': 'cublasGemmEx', 'm': 4000, 'n': 224, 'k': 1000, 'transa': 0, 'transb': 0, 'datatype': 'float', 'use_tensor_core': False }, { 'name': 'cublasGemmEx', 'm': 4000, 'n': 224, 'k': 1000, 'transa': 1, 'transb': 0, 'datatype': 'half', 'use_tensor_core': True }, { 'name': 'cublasGemmEx', 'm': 1000, 'n': 224, 'k': 4000, 'transa': 0, 'transb': 0, 'datatype': 'half', 'use_tensor_core': False }, { 'name': 'cublasGemmEx', 'm': 1000, 'n': 224, 'k': 4000, 'transa': 0, 'transb': 0, 'datatype': 'float', 'use_tensor_core': False }, { 'name': 'cublasSgemm', 'm': 1024, 'n': 7168, 'k': 1024, 'transa': 1, 'transb': 0 }, { 'name': 'cublasSgemmStridedBatched', 'm': 64, 'n': 224, 'k': 224, 'transa': 0, 'transb': 0, 'batchCount': 512 }, { 'name': 'cublasSgemmStridedBatched', 'm': 64, 'n': 224, 'k': 224, 'transa': 0, 'transb': 0, 'batchCount': 160 } ] self._bin_name = 'CublasBenchmark' def add_parser_arguments(self): """Add the specified arguments.""" super().add_parser_arguments() self._parser.add_argument( '--num_warmup', type=int, default=8, required=False, help='The number of warmup step.', ) self._parser.add_argument( '--num_steps', type=int, default=100, required=False, help='The number of test step.', ) self._parser.add_argument( '--num_in_step', type=int, default=1000, required=False, help='The number of functions in one step.', ) self._parser.add_argument( '--random_seed', type=int, default=33931, required=False, help='The random seed to fill in the data of the function.', ) self._parser.add_argument( '--config_json_str', type=str, default=None, required=False, help='The custom json string defining the params in a cublas function.', ) def _preprocess(self): """Preprocess/preparation operations before the benchmarking. Return: True if _preprocess() succeed. """ if not super()._preprocess(): return False command = os.path.join(self._args.bin_dir, self._bin_name) command += (' --num_test ' + str(self._args.num_steps)) command += (' --warm_up ' + str(self._args.num_warmup)) command += (' --num_in_step ' + str(self._args.num_in_step)) command += (' --random_seed ' + str(self._args.random_seed)) try: if not self._args.config_json_str: for config_dict in self.__default_params_dict_list: config_json_str = "\'" + json.dumps(config_dict).replace(' ', '') + "\'" complete_command = command + (' --config_json ') + config_json_str self._commands.append(complete_command) else: custom_config_str = yaml.safe_load(self._args.config_json_str) config_json_str = "\'" + json.dumps(custom_config_str).replace(' ', '') + "\'" complete_command = command + (' --config_json ') + config_json_str self._commands.append(complete_command) except BaseException as e: logger.error('Invalid input params - benchmark: {}, message: {}'.format(self._name, str(e))) self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) return False return True def _process_raw_result(self, cmd_idx, raw_output): """Function to process raw results and save the summarized results. self._result.add_raw_data() and self._result.add_result() need to be called to save the results. Args: cmd_idx (int): the index of command corresponding with the raw_output. raw_output (str): raw output string of the micro-benchmark. Return: True if the raw output string is valid and result can be extracted. """ self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output) try: lines = raw_output.splitlines() metric = '' error = False raw_data = [] for line in lines: if '[function config]' in line: metric = line[line.index('[function config]: ') + len('[function config]: '):] if '[raw_data]' in line: raw_data = line[line.index('[raw_data]: ') + len('[raw_data]: '):] raw_data = raw_data.split(',') raw_data.pop() raw_data = [float(item) for item in raw_data] self._result.add_result(metric, sum(raw_data) / len(raw_data)) self._result.add_raw_data(metric, raw_data) if 'Error' in line: error = True except BaseException as e: logger.error( 'Cannot extract results from cublas functions - round: {}, index of cmd: {}, \ benchmark: {}, raw data: {}, message: {}'.format( self._curr_run_index, cmd_idx, self._name, raw_output, str(e) ) ) return False if error: logger.error( 'Error in running cublas test - round: {}, index of cmd: {}, benchmark: {}, raw data: {}'.format( self._curr_run_index, cmd_idx, self._name, raw_output ) ) return False return True BenchmarkRegistry.register_benchmark('cublas-function', CublasBenchmark, platform=Platform.CUDA)