Benchmarks: Add Benchmark - Add gpcnet microbenchmark (#229)

**Description** Add gpcnet microbenchmark **Major Revision** - add 2 microbenmark for gpcnet, gpc-network-test, gpc-network-load-test - add related test and example file

Benchmarks: Add Benchmark - Add gpcnet microbenchmark (#229)
**Description** Add gpcnet microbenchmark **Major Revision** - add 2 microbenmark for gpcnet, gpc-network-test, gpc-network-load-test - add related test and example file
6003f2c2 · Yuting Jiang · GitHub · f841c8f4 · 6003f2c2 · 6003f2c2
Unverified Commit 6003f2c2 authored Oct 22, 2021 by Yuting Jiang Committed by GitHub Oct 22, 2021
4 changed files
--- a/examples/benchmarks/gpcnet_performance.py
+++ b/examples/benchmarks/gpcnet_performance.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+"""Micro benchmark example for gpcnet performance.
+Commands to run:
+  mpirun --allow-run-as-root -np 2 -H node0:1,node1:1 examples/benchmarks/gpcnet_performance.py
+"""
+from superbench.benchmarks import BenchmarkRegistry
+from superbench.common.utils import logger
+if __name__ == '__main__':
+    context = BenchmarkRegistry.create_benchmark_context('gpcnet-network-test')
+    # context = BenchmarkRegistry.create_benchmark_context('gpcnet-network-load-test')
+    benchmark = BenchmarkRegistry.launch_benchmark(context)
+    if benchmark:
+        logger.info(
+            'benchmark: {}, return code: {}, result: {}'.format(
+                benchmark.name, benchmark.return_code, benchmark.result
+            )
+        )
--- a/superbench/benchmarks/micro_benchmarks/__init__.py
+++ b/superbench/benchmarks/micro_benchmarks/__init__.py
@@ -20,10 +20,11 @@ from superbench.benchmarks.micro_benchmarks.rocm_memory_bw_performance import Ro
 from superbench.benchmarks.micro_benchmarks.rocm_gemm_flops_performance import RocmGemmFlopsBenchmark
 from superbench.benchmarks.micro_benchmarks.gpu_sm_copy_bw_performance import GpuSmCopyBwBenchmark
 from superbench.benchmarks.micro_benchmarks.tcp_connectivity import TCPConnectivityBenchmark
+from superbench.benchmarks.micro_benchmarks.gpcnet_performance import GPCNetBenchmark
 __all__ = [
    'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ShardingMatmul', 'ComputationCommunicationOverlap', 'KernelLaunch',
    'CublasBenchmark', 'CudnnBenchmark', 'GemmFlopsBenchmark', 'CudaGemmFlopsBenchmark', 'MemBwBenchmark',
    'CudaMemBwBenchmark', 'DiskBenchmark', 'IBLoopbackBenchmark', 'CudaNcclBwBenchmark', 'RocmMemBwBenchmark',
-    'RocmGemmFlopsBenchmark', 'GpuSmCopyBwBenchmark', 'TCPConnectivityBenchmark'
+    'RocmGemmFlopsBenchmark', 'GpuSmCopyBwBenchmark', 'TCPConnectivityBenchmark', 'GPCNetBenchmark'
 ]
--- a/superbench/benchmarks/micro_benchmarks/gpcnet_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/gpcnet_performance.py
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+"""Module of the GPCNet benchmarks."""
+import os
+from superbench.common.utils import logger
+from superbench.benchmarks import BenchmarkRegistry
+from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
+class GPCNetBenchmark(MicroBenchmarkWithInvoke):
+    """The GPCNet performance benchmark class."""
+    def __init__(self, name, parameters=''):
+        """Constructor.
+        Args:
+            name (str): benchmark name.
+            parameters (str): benchmark parameters.
+        """
+        super().__init__(name, parameters)
+        if self._name == 'gpcnet-network-test':
+            self._bin_name = 'network_test'
+        if self._name == 'gpcnet-network-load-test':
+            self._bin_name = 'network_load_test'
+    def add_parser_arguments(self):
+        """Add the specified arguments."""
+        super().add_parser_arguments()
+    def _preprocess(self):
+        """Preprocess/preparation operations before the benchmarking.
+        Return:
+            True if _preprocess() succeed.
+        """
+        if not super()._preprocess():
+            return False
+        command = os.path.join(self._args.bin_dir, self._bin_name)
+        self._commands.append(command)
+        return True
+    def _process_raw_result(self, idx, raw_output):    # noqa: C901
+        """Function to process raw results and save the summarized results.
+          self._result.add_raw_data() and self._result.add_result() need to be called to save the results.
+        Args:
+            idx (int): the index corresponding with the raw_output.
+            raw_output (str): raw output string of the micro-benchmark.
+        Return:
+            True if the raw output string is valid and result can be extracted.
+        """
+        self._result.add_raw_data('raw_output_' + str(idx), raw_output)
+        try:
+            # Parse and add result
+            if 'ERROR' not in raw_output:
+                raw_output = raw_output.splitlines()
+                labels = None
+                test_name = ''
+                for line in raw_output:
+                    if not line.startswith('|'):
+                        continue
+                    items = line.split('|')
+                    items = [item.strip() for item in items]
+                    # Get table name
+                    if len(items) == 3 and 'Tests' in items[1]:
+                        test_name = items[1].replace(' ', '')
+                    # Get the line of the table labels
+                    elif 'Avg' in line or 'Name' in line:
+                        labels = items
+                    # Get values related to the labels
+                    else:
+                        name_prefix = items[1].replace(' ', '')
+                        for i in range(2, len(items) - 1):
+                            if labels[i] != 'Units':
+                                self._result.add_result(
+                                    test_name + '_' + name_prefix + '_' + labels[i], float(items[i].strip('X'))
+                                )
+            elif 'ERROR: this application must be run on at least' in raw_output:
+                return True
+            else:
+                logger.error(
+                    'The result format is invalid - round: {}, benchmark: {}, raw output: {}.'.format(
+                        self._curr_run_index, self._name, raw_output
+                    )
+                )
+                return False
+        except Exception as e:
+            logger.error(
+                'The result format is invalid - round: {}, benchmark: {}, raw output: {}, message: {}.'.format(
+                    self._curr_run_index, self._name, raw_output, str(e)
+                )
+            )
+            return False
+        return True
+BenchmarkRegistry.register_benchmark('gpcnet-network-test', GPCNetBenchmark)
+BenchmarkRegistry.register_benchmark('gpcnet-network-load-test', GPCNetBenchmark)
--- a/tests/benchmarks/micro_benchmarks/test_gpcnet_performance.py
+++ b/tests/benchmarks/micro_benchmarks/test_gpcnet_performance.py