Benchmarks: micro benchmarks - add python code for DirectXGPUCopy (#546)

**Description** add python code for DirectXGPUCopy.

Benchmarks: micro benchmarks - add python code for DirectXGPUCopy (#546)
**Description** add python code for DirectXGPUCopy.
c8c079c2 · Yuting Jiang · GitHub · af4cfd5b · c8c079c2 · c8c079c2
Unverified Commit c8c079c2 authored Jul 06, 2023 by Yuting Jiang Committed by GitHub Jul 06, 2023
5 changed files
--- a/superbench/benchmarks/micro_benchmarks/__init__.py
+++ b/superbench/benchmarks/micro_benchmarks/__init__.py
@@ -31,6 +31,7 @@
 from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul
 from superbench.benchmarks.micro_benchmarks.tcp_connectivity import TCPConnectivityBenchmark
 from superbench.benchmarks.micro_benchmarks.tensorrt_inference_performance import TensorRTInferenceBenchmark
+from superbench.benchmarks.micro_benchmarks.directx_gpu_copy_performance import DirectXGPUCopyBw
 from superbench.benchmarks.micro_benchmarks.directx_mem_bw_performance import DirectXGPUMemBw
 from superbench.benchmarks.micro_benchmarks.directx_gemm_flops_performance import DirectXGPUCoreFlops

@@ -63,6 +64,7 @@
    'ShardingMatmul',
    'TCPConnectivityBenchmark',
    'TensorRTInferenceBenchmark',
+    'DirectXGPUCopyBw',
    'DirectXGPUMemBw',
    'DirectXGPUCoreFlops',
 ]
--- a/superbench/benchmarks/micro_benchmarks/directx_gpu_copy_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/directx_gpu_copy_performance.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""Module of the DirectXGPUCopyBw performance benchmarks."""
+
+import os
+from superbench.common.utils import logger
+from superbench.benchmarks import BenchmarkRegistry, Platform
+from superbench.benchmarks.micro_benchmarks import MemBwBenchmark
+
+
+class DirectXGPUCopyBw(MemBwBenchmark):
+    """The GPUCopyBw benchmark class."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+
+        Args:
+            name (str): benchmark name.
+            parameters (str): benchmark parameters.
+        """
+        super().__init__(name, parameters)
+        self._mem_types = ['htod', 'dtoh']
+        self._bin_name = 'DirectXGPUCopyBw.exe'
+
+    def add_parser_arguments(self):
+        """Add the specified arguments."""
+        super().add_parser_arguments()
+
+        self._parser.add_argument(
+            '--size',
+            type=int,
+            required=False,
+            default=None,
+            help='Size of data for GPU copy.',
+        )
+        self._parser.add_argument(
+            '--warm_up',
+            type=int,
+            required=False,
+            default=20,
+            help='Number of warm up copy times to run.',
+        )
+        self._parser.add_argument(
+            '--num_loops',
+            type=int,
+            required=False,
+            default=1000,
+            help='Number of copy times to run.',
+        )
+        self._parser.add_argument(
+            '--minbytes',
+            type=int,
+            required=False,
+            default=64,
+            help='Run size from min_size to max_size for GPU copy.',
+        )
+        self._parser.add_argument(
+            '--maxbytes',
+            type=int,
+            required=False,
+            default=8 * 1024 * 1024,
+            help='Run size from min_size to max_size for GPU copy.',
+        )
+        self._parser.add_argument(
+            '--check',
+            action='store_true',
+            help='Whether check data after copy.',
+        )
+
+    def _preprocess(self):
+        """Preprocess/preparation operations before the benchmarking.
+
+        Return:
+            True if _preprocess() succeed.
+        """
+        if not super()._preprocess():
+            return False
+
+        for mem_type in self._args.mem_type:
+            # Prepare the command line.
+            command = os.path.join(self._args.bin_dir, self._bin_name)
+            command += f' --{mem_type}'
+            command += ' --warm_up ' + str(self._args.warm_up)
+            command += ' --num_loops ' + str(self._args.num_loops)
+            if self._args.size is not None:
+                command += ' --size ' + str(self._args.size)
+            else:
+                command += ' --minbytes ' + str(self._args.minbytes)
+                command += ' --maxbytes ' + str(self._args.maxbytes)
+            if self._args.check:
+                command += ' --check'
+            self._commands.append(command)
+        return True
+
+    def _process_raw_result(self, cmd_idx, raw_output):
+        """Function to process raw results and save the summarized results.
+
+        Args:
+            cmd_idx (int): the index of command corresponding with the raw_output.
+            raw_output (str): raw output string of the micro-benchmark.
+
+        Return:
+            True if the raw output string is valid and result can be extracted.
+        """
+        self._result.add_raw_data('raw_output', raw_output, self._args.log_raw_data)
+
+        try:
+            lines = raw_output.splitlines()
+            for line in lines:
+                if 'GB' in line:
+                    type = line.split()[0].strip(':')
+                    size = int(line.strip().split()[1].strip('B'))
+                    bw = float(line.strip().split()[2])
+                    self._result.add_result(f'{type}_{size}_bw', bw)
+                if 'error' in line.lower():
+                    logger.error(
+                        'The result format is invalid - round: {}, benchmark: {}, raw output: {}.'.format(
+                            self._curr_run_index, self._name, raw_output
+                        )
+                    )
+                    return False
+            return True
+        except Exception as e:
+            logger.error(
+                'The result format is invalid - round: {}, benchmark: {}, raw output: {}, exception: {}.'.format(
+                    self._curr_run_index, self._name, raw_output, str(e)
+                )
+            )
+            return False
+
+
+BenchmarkRegistry.register_benchmark('directx-gpu-copy-bw', DirectXGPUCopyBw, platform=Platform.DIRECTX)
--- a/superbench/benchmarks/micro_benchmarks/directx_gpu_copy_performance/GPUCopyBw.vcxproj
+++ b/superbench/benchmarks/micro_benchmarks/directx_gpu_copy_performance/GPUCopyBw.vcxproj
@@ -19,12 +19,14 @@
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <TargetName>DirectXGPUCopyBw</TargetName>
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <TargetName>DirectXGPUCopyBw</TargetName>
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>

--- a/superbench/benchmarks/micro_benchmarks/directx_gpu_copy_performance/Main.cpp
+++ b/superbench/benchmarks/micro_benchmarks/directx_gpu_copy_performance/Main.cpp
@@ -16,6 +16,7 @@ int main(int argc, char *argv[]) {
    } else {
        // Run all sizes
        for (SIZE_T usize = option.min_size; usize <= option.max_size; usize += usize) {
+            option.size = usize;
            GPUCopyBw benchmark(&option);
            benchmark.Run();
        }

--- a/tests/benchmarks/micro_benchmarks/test_directx_gpu_copy_performance.py
+++ b/tests/benchmarks/micro_benchmarks/test_directx_gpu_copy_performance.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Tests for DirectXGPUCopyBw benchmark."""
+
+import numbers
+
+from tests.helper import decorator
+from superbench.benchmarks import BenchmarkRegistry, BenchmarkType, ReturnCode, Platform
+
+
+@decorator.directx_test
+def test_directx_gpu_copy_bw():
+    """Test DirectXGPUCopyBw benchmark."""
+    # Test for default configuration
+    context = BenchmarkRegistry.create_benchmark_context(
+        'directx-gpu-copy-bw',
+        platform=Platform.DIRECTX,
+        parameters=r'--warm_up 20 --num_loops 1000 --minbytes 64 --maxbytes 8388608 --mem_type htod dtoh'
+    )
+
+    assert (BenchmarkRegistry.is_benchmark_context_valid(context))
+
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+
+    # Check basic information.
+    assert (benchmark)
+    assert (benchmark.name == 'directx-gpu-copy-bw')
+    assert (benchmark.type == BenchmarkType.MICRO)
+
+    # Check parameters specified in BenchmarkContext.
+    assert (benchmark._args.warm_up == 20)
+    assert (benchmark._args.num_loops == 1000)
+    assert (benchmark._args.minbytes == 64)
+    assert (benchmark._args.maxbytes == 8388608)
+    assert (sorted(benchmark._args.mem_type) == ['dtoh', 'htod'])
+
+    # Check results and metrics.
+    assert (benchmark.run_count == 1)
+    assert (benchmark.return_code == ReturnCode.SUCCESS)
+    assert ('raw_output' in benchmark.raw_data)
+    assert (isinstance(benchmark.raw_data['raw_output'][0], str))
+    size = 64
+    while size <= 8388608:
+        for mem_type in ['htod', 'dtoh']:
+            assert (f'{mem_type}_{size}_bw' in benchmark.result)
+            assert (len(benchmark.result[f'{mem_type}_{size}_bw']) == 1)
+            assert (isinstance(benchmark.result[f'{mem_type}_{size}_bw'][0], numbers.Number))
+        size *= 2