Unverified Commit 6d895da8 authored by guoshzhao's avatar guoshzhao Committed by GitHub
Browse files

Benchmarks: Add Feature - Provide option to save raw data into file. (#333)

**Description**
Use config `log_raw_data` to control whether log the raw data into file or not. The default value is `no`. We can set it as `yes` for some particular benchmarks to save the raw data into file, such as NCCL/RCCL test.
parent d368d90e
...@@ -209,6 +209,7 @@ ${benchmark_name}: ...@@ -209,6 +209,7 @@ ${benchmark_name}:
parameters: parameters:
run_count: int run_count: int
duration: int duration: int
log_raw_data: bool
${argument}: bool | str | int | float | list ${argument}: bool | str | int | float | list
``` ```
...@@ -224,6 +225,7 @@ model-benchmarks:${annotation}: ...@@ -224,6 +225,7 @@ model-benchmarks:${annotation}:
parameters: parameters:
run_count: int run_count: int
duration: int duration: int
log_raw_data: bool
num_warmup: int num_warmup: int
num_steps: int num_steps: int
sample_count: int sample_count: int
...@@ -334,6 +336,18 @@ A list of models to run, only supported in model-benchmark. ...@@ -334,6 +336,18 @@ A list of models to run, only supported in model-benchmark.
Parameters for benchmark to use, varying for different benchmarks. Parameters for benchmark to use, varying for different benchmarks.
There have three common parameters for all benchmarks:
* run_count: how many times do user want to run this benchmark, default value is 1.
* duration: the elapsed time of benchmark in seconds. It can work for all model-benchmark. But for micro-benchmark, benchmark authors should consume it by themselves.
* log_raw_data: log raw data into file instead of saving it into result object, default value is `False`. Benchmarks who have large raw output may want to set it as `True`, such as `nccl-bw`/`rccl-bw`.
For Model-Benchmark, there have some parameters that can control the elapsed time.
* duration: the elapsed time of benchmark in seconds.
* num_warmup: the number of warmup step.
* num_steps: the number of test step.
If `duration > 0` and `num_warmup + num_steps > 0`, then benchmark will take the least as the elapsed time. Otherwise only one of them will take effect.
## `Mode` Schema ## `Mode` Schema
Definition for each benchmark mode, here is an overview of `Mode` configuration structure: Definition for each benchmark mode, here is an overview of `Mode` configuration structure:
......
...@@ -65,6 +65,12 @@ def add_parser_arguments(self): ...@@ -65,6 +65,12 @@ def add_parser_arguments(self):
required=False, required=False,
help='The elapsed time of benchmark in seconds.', help='The elapsed time of benchmark in seconds.',
) )
self._parser.add_argument(
'--log_raw_data',
action='store_true',
default=False,
help='Log raw data into file instead of saving it into result object.',
)
def get_configurable_settings(self): def get_configurable_settings(self):
"""Get all the configurable settings. """Get all the configurable settings.
......
...@@ -59,7 +59,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -59,7 +59,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output', raw_output) self._result.add_raw_data('raw_output', raw_output, self._args.log_raw_data)
content = raw_output.splitlines(False) content = raw_output.splitlines(False)
try: try:
......
...@@ -78,7 +78,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -78,7 +78,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output) self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
# parse the command to see which command this output belongs to # parse the command to see which command this output belongs to
# the command is formed as ...; mlc --option; ... # the command is formed as ...; mlc --option; ...
......
...@@ -268,7 +268,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -268,7 +268,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output) self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
try: try:
lines = raw_output.splitlines() lines = raw_output.splitlines()
...@@ -292,7 +292,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -292,7 +292,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
raw_data.pop() raw_data.pop()
raw_data = [float(item) for item in raw_data] raw_data = [float(item) for item in raw_data]
self._result.add_result(metric.lower() + '_time', statistics.mean(raw_data)) self._result.add_result(metric.lower() + '_time', statistics.mean(raw_data))
self._result.add_raw_data(metric.lower() + '_time', raw_data) self._result.add_raw_data(metric.lower() + '_time', raw_data, self._args.log_raw_data)
if 'Error' in line: if 'Error' in line:
error = True error = True
except BaseException as e: except BaseException as e:
......
...@@ -110,7 +110,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -110,7 +110,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
precision = self._precision_need_to_run[cmd_idx] precision = self._precision_need_to_run[cmd_idx]
self._result.add_raw_data('raw_output_' + precision, raw_output) self._result.add_raw_data('raw_output_' + precision, raw_output, self._args.log_raw_data)
valid = True valid = True
flops = list() flops = list()
......
...@@ -68,7 +68,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -68,7 +68,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output) self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output, self._args.log_raw_data)
mem_bw = -1 mem_bw = -1
valid = True valid = True
......
...@@ -143,7 +143,7 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901 ...@@ -143,7 +143,7 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901
if rank > 0: if rank > 0:
return True return True
self._result.add_raw_data('raw_output_' + self._args.operation, raw_output) self._result.add_raw_data('raw_output_' + self._args.operation, raw_output, self._args.log_raw_data)
content = raw_output.splitlines() content = raw_output.splitlines()
size = -1 size = -1
......
...@@ -402,7 +402,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -402,7 +402,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output) self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
try: try:
lines = raw_output.splitlines() lines = raw_output.splitlines()
...@@ -426,7 +426,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -426,7 +426,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
raw_data.pop() raw_data.pop()
raw_data = [float(item) for item in raw_data] raw_data = [float(item) for item in raw_data]
self._result.add_result(metric.lower() + '_time', statistics.mean(raw_data) * 1000) self._result.add_result(metric.lower() + '_time', statistics.mean(raw_data) * 1000)
self._result.add_raw_data(metric.lower() + '_time', raw_data) self._result.add_raw_data(metric.lower() + '_time', raw_data, self._args.log_raw_data)
if 'Error' in line: if 'Error' in line:
error = True error = True
except BaseException as e: except BaseException as e:
......
...@@ -184,7 +184,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -184,7 +184,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output) self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
try: try:
fio_output = json.loads(raw_output) fio_output = json.loads(raw_output)
......
...@@ -74,7 +74,7 @@ def _process_raw_result(self, idx, raw_output): # noqa: C901 ...@@ -74,7 +74,7 @@ def _process_raw_result(self, idx, raw_output): # noqa: C901
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + str(idx), raw_output) self._result.add_raw_data('raw_output_' + str(idx), raw_output, self._args.log_raw_data)
try: try:
# Parse and add result # Parse and add result
......
...@@ -123,9 +123,9 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901 ...@@ -123,9 +123,9 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901
self._result.add_result(res.split(':')[0].replace(' ', '_').lower() + '_pass', 1) self._result.add_result(res.split(':')[0].replace(' ', '_').lower() + '_pass', 1)
else: else:
self._result.add_result(res.split(':')[0].replace(' ', '_').lower() + '_pass', 0) self._result.add_result(res.split(':')[0].replace(' ', '_').lower() + '_pass', 0)
self._result.add_raw_data('GPU-Burn_result', res) self._result.add_raw_data('GPU-Burn_result', res, self._args.log_raw_data)
else: else:
self._result.add_raw_data('GPU Burn Failure: ', failure_msg) self._result.add_raw_data('GPU Burn Failure: ', failure_msg, self._args.log_raw_data)
self._result.add_result('abort', 1) self._result.add_result('abort', 1)
return False return False
except BaseException as e: except BaseException as e:
......
...@@ -122,7 +122,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -122,7 +122,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output) self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
try: try:
output_lines = [x.strip() for x in raw_output.strip().splitlines()] output_lines = [x.strip() for x in raw_output.strip().splitlines()]
......
...@@ -187,7 +187,8 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -187,7 +187,8 @@ def _process_raw_result(self, cmd_idx, raw_output):
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data( self._result.add_raw_data(
'raw_output_' + self._args.commands[cmd_idx] + '_IB' + str(self._args.ib_index), raw_output 'raw_output_' + self._args.commands[cmd_idx] + '_IB' + str(self._args.ib_index), raw_output,
self._args.log_raw_data
) )
valid = False valid = False
......
...@@ -336,7 +336,7 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901 ...@@ -336,7 +336,7 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + self._args.commands[cmd_idx], raw_output) self._result.add_raw_data('raw_output_' + self._args.commands[cmd_idx], raw_output, self._args.log_raw_data)
# If it's invoked by MPI and rank is not 0, no result is expected # If it's invoked by MPI and rank is not 0, no result is expected
if os.getenv('OMPI_COMM_WORLD_RANK'): if os.getenv('OMPI_COMM_WORLD_RANK'):
......
...@@ -79,7 +79,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -79,7 +79,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output) self._result.add_raw_data('raw_output_' + str(cmd_idx), raw_output, self._args.log_raw_data)
pattern = r'\d+\.\d+' pattern = r'\d+\.\d+'
result = re.findall(pattern, raw_output) result = re.findall(pattern, raw_output)
......
...@@ -69,7 +69,7 @@ def _process_numeric_result(self, metric, result, reduce_type=None, cal_percenti ...@@ -69,7 +69,7 @@ def _process_numeric_result(self, metric, result, reduce_type=None, cal_percenti
) )
return False return False
self._result.add_raw_data(metric, result) self._result.add_raw_data(metric, result, self._args.log_raw_data)
self._result.add_result(metric, statistics.mean(result), reduce_type) self._result.add_result(metric, statistics.mean(result), reduce_type)
if cal_percentile: if cal_percentile:
self._process_percentile_result(metric, result, reduce_type) self._process_percentile_result(metric, result, reduce_type)
......
...@@ -127,7 +127,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -127,7 +127,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
precision = self._precision_need_to_run[cmd_idx] precision = self._precision_need_to_run[cmd_idx]
self._result.add_raw_data('raw_output_' + precision, raw_output) self._result.add_raw_data('raw_output_' + precision, raw_output, self._args.log_raw_data)
content = raw_output.splitlines() content = raw_output.splitlines()
gflops_index = None gflops_index = None
......
...@@ -60,7 +60,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -60,7 +60,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
Return: Return:
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output) self._result.add_raw_data('raw_output_' + self._args.mem_type[cmd_idx], raw_output, self._args.log_raw_data)
mem_bw = -1 mem_bw = -1
value_index = -1 value_index = -1
......
...@@ -154,7 +154,7 @@ def _process_raw_result(self, idx, raw_output): ...@@ -154,7 +154,7 @@ def _process_raw_result(self, idx, raw_output):
True if the raw output string is valid and result can be extracted. True if the raw output string is valid and result can be extracted.
""" """
host = self.__hosts[idx] host = self.__hosts[idx]
self._result.add_raw_data('raw_output_' + host, raw_output) self._result.add_raw_data('raw_output_' + host, raw_output, self._args.log_raw_data)
try: try:
# If socket error or exception happens on TCPing, add result values as failed # If socket error or exception happens on TCPing, add result values as failed
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment