Benchmarks: Add Feature - Provide option to save raw data into file. (#333)

**Description** Use config `log_raw_data` to control whether log the raw data into file or not. The default value is `no`. We can set it as `yes` for some particular benchmarks to save the raw data into file, such as NCCL/RCCL test.

Benchmarks: Add Feature - Provide option to save raw data into file. (#333)
**Description** Use config `log_raw_data` to control whether log the raw data into file or not. The default value is `no`. We can set it as `yes` for some particular benchmarks to save the raw data into file, such as NCCL/RCCL test.
6d895da8 · guoshzhao · GitHub · d368d90e · 6d895da8 · 6d895da8
Unverified Commit 6d895da8 authored Apr 01, 2022 by guoshzhao Committed by GitHub Apr 01, 2022
20 changed files
--- a/docs/superbench-config.mdx
+++ b/docs/superbench-config.mdx
@@ -209,6 +209,7 @@ ${benchmark_name}:
  parameters:
    run_count: int
    duration: int
+    log_raw_data: bool
    ${argument}: bool | str | int | float | list
 ```
@@ -224,6 +225,7 @@ model-benchmarks:${annotation}:
  parameters:
    run_count: int
    duration: int
+    log_raw_data: bool
    num_warmup: int
    num_steps: int
    sample_count: int
@@ -334,6 +336,18 @@ A list of models to run, only supported in model-benchmark.
 Parameters for benchmark to use, varying for different benchmarks.
+There have three common parameters for all benchmarks:
+* run_count: how many times do user want to run this benchmark, default value is 1.
+* duration: the elapsed time of benchmark in seconds. It can work for all model-benchmark. But for micro-benchmark, benchmark authors should consume it by themselves.
+* log_raw_data: log raw data into file instead of saving it into result object, default value is `False`.  Benchmarks who have large raw output may want to set it as `True`, such as `nccl-bw`/`rccl-bw`.
+For Model-Benchmark, there have some parameters that can control the elapsed time.
+* duration: the elapsed time of benchmark in seconds.
+* num_warmup: the number of warmup step.
+* num_steps: the number of test step.
+If `duration > 0` and `num_warmup + num_steps > 0`, then benchmark will take the least as the elapsed time. Otherwise only one of them will take effect.
 ## `Mode` Schema
 Definition for each benchmark mode, here is an overview of `Mode` configuration structure:

--- a/superbench/benchmarks/base.py
+++ b/superbench/benchmarks/base.py
@@ -65,6 +65,12 @@ def add_parser_arguments(self):
            required=False,
            help='The elapsed time of benchmark in seconds.',
        )
+        self._parser.add_argument(
+            '--log_raw_data',
+            action='store_true',
+            default=False,
+            help='Log raw data into file instead of saving it into result object.',
+        )
    def get_configurable_settings(self):
        """Get all the configurable settings.

--- a/superbench/benchmarks/docker_benchmarks/rocm_onnxruntime_performance.py
+++ b/superbench/benchmarks/docker_benchmarks/rocm_onnxruntime_performance.py
@@ -59,7 +59,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output', raw_output)
+        self._result.add_raw_data('raw_output', raw_output, self._args.log_raw_data)
        content = raw_output.splitlines(False)
        try:

--- a/superbench/benchmarks/micro_benchmarks/cpu_memory_bw_latency_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/cpu_memory_bw_latency_performance.py
@@ -78,7 +78,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
+        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
        # parse the command to see which command this output belongs to
        # the command is formed as ...; mlc --option; ...

--- a/superbench/benchmarks/micro_benchmarks/cublas_function.py
+++ b/superbench/benchmarks/micro_benchmarks/cublas_function.py
@@ -268,7 +268,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
+        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
        try:
            lines = raw_output.splitlines()
@@ -292,7 +292,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
                    raw_data.pop()
                    raw_data = [float(item) for item in raw_data]
                    self._result.add_result(metric.lower() + '_time', statistics.mean(raw_data))
-                    self._result.add_raw_data(metric.lower() + '_time', raw_data)
+                    self._result.add_raw_data(metric.lower() + '_time', raw_data, self._args.log_raw_data)
                if 'Error' in line:
                    error = True
        except BaseException as e:

--- a/superbench/benchmarks/micro_benchmarks/cuda_gemm_flops_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/cuda_gemm_flops_performance.py
@@ -110,7 +110,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
            True if the raw output string is valid and result can be extracted.
        """
        precision = self._precision_need_to_run[cmd_idx]
-        self._result.add_raw_data('raw_output_' + precision, raw_output)
+        self._result.add_raw_data('raw_output_' + precision, raw_output, self._args.log_raw_data)
        valid = True
        flops = list()

--- a/superbench/benchmarks/micro_benchmarks/cuda_memory_bw_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/cuda_memory_bw_performance.py
@@ -68,7 +68,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output)
+        self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output, self._args.log_raw_data)
        mem_bw = -1
        valid = True

--- a/superbench/benchmarks/micro_benchmarks/cuda_nccl_bw_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/cuda_nccl_bw_performance.py
@@ -143,7 +143,7 @@ def _process_raw_result(self, cmd_idx, raw_output):    # noqa: C901
            if rank > 0:
                return True
-        self._result.add_raw_data('raw_output_' + self._args.operation, raw_output)
+        self._result.add_raw_data('raw_output_' + self._args.operation, raw_output, self._args.log_raw_data)
        content = raw_output.splitlines()
        size = -1

--- a/superbench/benchmarks/micro_benchmarks/cudnn_function.py
+++ b/superbench/benchmarks/micro_benchmarks/cudnn_function.py
@@ -402,7 +402,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
+        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
        try:
            lines = raw_output.splitlines()
@@ -426,7 +426,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
                    raw_data.pop()
                    raw_data = [float(item) for item in raw_data]
                    self._result.add_result(metric.lower() + '_time', statistics.mean(raw_data) * 1000)
-                    self._result.add_raw_data(metric.lower() + '_time', raw_data)
+                    self._result.add_raw_data(metric.lower() + '_time', raw_data, self._args.log_raw_data)
                if 'Error' in line:
                    error = True
        except BaseException as e:

--- a/superbench/benchmarks/micro_benchmarks/disk_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/disk_performance.py
@@ -184,7 +184,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
+        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
        try:
            fio_output = json.loads(raw_output)

--- a/superbench/benchmarks/micro_benchmarks/gpcnet_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/gpcnet_performance.py
@@ -74,7 +74,7 @@ def _process_raw_result(self, idx, raw_output):    # noqa: C901
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(idx), raw_output)
+        self._result.add_raw_data('raw_output_' + str(idx), raw_output, self._args.log_raw_data)
        try:
            # Parse and add result

--- a/superbench/benchmarks/micro_benchmarks/gpu_burn_test.py
+++ b/superbench/benchmarks/micro_benchmarks/gpu_burn_test.py
@@ -123,9 +123,9 @@ def _process_raw_result(self, cmd_idx, raw_output):    # noqa: C901
                        self._result.add_result(res.split(':')[0].replace(' ', '_').lower() + '_pass', 1)
                    else:
                        self._result.add_result(res.split(':')[0].replace(' ', '_').lower() + '_pass', 0)
-                    self._result.add_raw_data('GPU-Burn_result', res)
+                    self._result.add_raw_data('GPU-Burn_result', res, self._args.log_raw_data)
            else:
-                self._result.add_raw_data('GPU Burn Failure: ', failure_msg)
+                self._result.add_raw_data('GPU Burn Failure: ', failure_msg, self._args.log_raw_data)
                self._result.add_result('abort', 1)
                return False
        except BaseException as e:

--- a/superbench/benchmarks/micro_benchmarks/gpu_copy_bw_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/gpu_copy_bw_performance.py
@@ -122,7 +122,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
+        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
        try:
            output_lines = [x.strip() for x in raw_output.strip().splitlines()]

--- a/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/ib_loopback_performance.py
@@ -187,7 +187,8 @@ def _process_raw_result(self, cmd_idx, raw_output):
            True if the raw output string is valid and result can be extracted.
        """
        self._result.add_raw_data(
-            'raw_output_' + self._args.commands[cmd_idx] + '_IB' + str(self._args.ib_index), raw_output
+            'raw_output_' + self._args.commands[cmd_idx] + '_IB' + str(self._args.ib_index), raw_output,
+            self._args.log_raw_data
        )
        valid = False

--- a/superbench/benchmarks/micro_benchmarks/ib_validation_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/ib_validation_performance.py
@@ -336,7 +336,7 @@ def _process_raw_result(self, cmd_idx, raw_output):    # noqa: C901
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + self._args.commands[cmd_idx], raw_output)
+        self._result.add_raw_data('raw_output_' + self._args.commands[cmd_idx], raw_output, self._args.log_raw_data)
        # If it's invoked by MPI and rank is not 0, no result is expected
        if os.getenv('OMPI_COMM_WORLD_RANK'):

--- a/superbench/benchmarks/micro_benchmarks/kernel_launch_overhead.py
+++ b/superbench/benchmarks/micro_benchmarks/kernel_launch_overhead.py
@@ -79,7 +79,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output)
+        self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
        pattern = r'\d+\.\d+'
        result = re.findall(pattern, raw_output)

--- a/superbench/benchmarks/micro_benchmarks/micro_base.py
+++ b/superbench/benchmarks/micro_benchmarks/micro_base.py
@@ -69,7 +69,7 @@ def _process_numeric_result(self, metric, result, reduce_type=None, cal_percenti
            )
            return False
-        self._result.add_raw_data(metric, result)
+        self._result.add_raw_data(metric, result, self._args.log_raw_data)
        self._result.add_result(metric, statistics.mean(result), reduce_type)
        if cal_percentile:
            self._process_percentile_result(metric, result, reduce_type)

--- a/superbench/benchmarks/micro_benchmarks/rocm_gemm_flops_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/rocm_gemm_flops_performance.py
@@ -127,7 +127,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
            True if the raw output string is valid and result can be extracted.
        """
        precision = self._precision_need_to_run[cmd_idx]
-        self._result.add_raw_data('raw_output_' + precision, raw_output)
+        self._result.add_raw_data('raw_output_' + precision, raw_output, self._args.log_raw_data)
        content = raw_output.splitlines()
        gflops_index = None

--- a/superbench/benchmarks/micro_benchmarks/rocm_memory_bw_performance.py
+++ b/superbench/benchmarks/micro_benchmarks/rocm_memory_bw_performance.py
@@ -60,7 +60,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
        Return:
            True if the raw output string is valid and result can be extracted.
        """
-        self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output)
+        self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output, self._args.log_raw_data)
        mem_bw = -1
        value_index = -1

--- a/superbench/benchmarks/micro_benchmarks/tcp_connectivity.py
+++ b/superbench/benchmarks/micro_benchmarks/tcp_connectivity.py
@@ -154,7 +154,7 @@ def _process_raw_result(self, idx, raw_output):
            True if the raw output string is valid and result can be extracted.
        """
        host = self.__hosts[idx]
-        self._result.add_raw_data('raw_output_' + host, raw_output)
+        self._result.add_raw_data('raw_output_' + host, raw_output, self._args.log_raw_data)
        try:
            # If socket error or exception happens on TCPing, add result values as failed