Unverified Commit 9f56b219 authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Benchmarks: Unify metric names of benchmarks (#252)

**Description**
Unify metric names of benchmarks.
parent c13ed2a2
...@@ -238,22 +238,22 @@ result = { ...@@ -238,22 +238,22 @@ result = {
'run_count': N, 'run_count': N,
'return_code': 0, 'return_code': 0,
'raw_data': { 'raw_data': {
'throughput-train-float32': [[step1_time, ..., stepK_time], ..., []], 'fp32_train_throughput': [[step1_throughput, ..., stepK_throughput], ..., []],
'throughput-train-float16': [[step1_time, ..., stepK_time], ..., []], 'fp16_train_throughput': [[step1_throughput, ..., stepK_throughput], ..., []],
'throughput-inference-float32': [[step1_time, ..., stepK_time], ..., []], 'fp32_inference_throughput': [[step1_throughput, ..., stepK_throughput], ..., []],
'throughput-inference-float16': [[step1_time, ..., stepK_time], ..., []], 'fp16_inference_throughput': [[step1_throughput, ..., stepK_throughput], ..., []],
}, },
'result': { 'result': {
'throughput-train-float32': [avg_throughput1, ..., avg_throughputN], 'fp32_train_throughput': [avg_throughput1, ..., avg_throughputN],
'throughput-train-float16': [avg_throughput1, ..., avg_throughputN], 'fp16_train_throughput': [avg_throughput1, ..., avg_throughputN],
'throughput-inference-float32': [avg_throughput1, ..., avg_throughputN], 'fp32_inference_throughput': [avg_throughput1, ..., avg_throughputN],
'throughput-inference-float16': [avg_throughput1, ..., avg_throughputN], 'fp16_inference_throughput': [avg_throughput1, ..., avg_throughputN],
}, },
'reduce_op': { 'reduce_op': {
'throughput-train-float32': 'min', 'fp32_train_throughput': 'min',
'throughput-train-float16': 'min', 'fp16_train_throughput': 'min',
'throughput-inference-float32': None, 'fp32_inference_throughput': None,
'throughput-inference-float16': None, 'fp16_inference_throughput': None,
}, },
} }
``` ```
......
...@@ -14,15 +14,15 @@ Run the rocm onnxruntime model training benchmarks packaged in docker `superbenc ...@@ -14,15 +14,15 @@ Run the rocm onnxruntime model training benchmarks packaged in docker `superbenc
#### Metrics #### Metrics
| Name | Unit | Description | | Name | Unit | Description |
|-------------------------------------------------------|------------------------|-----------------------------------------------------------| |------------------------------------------------------------------------|------------------------|-----------------------------------------------------------|
| onnxruntime-ort-models/bert_large_uncased_ngpu_1 | throughput (samples/s) | The throughput of bert large uncased model on 1 GPU. | | onnxruntime-ort-models/bert_large_uncased_ngpu_1_train_throughput | throughput (samples/s) | The throughput of bert large uncased model on 1 GPU. |
| onnxruntime-ort-models/bert_large_uncased_ngpu_8 | throughput (samples/s) | The throughput of bert large uncased model on 8 GPU. | | onnxruntime-ort-models/bert_large_uncased_ngpu_8_train_throughput | throughput (samples/s) | The throughput of bert large uncased model on 8 GPU. |
| onnxruntime-ort-models/distilbert_base_uncased_ngpu_1 | throughput (samples/s) | The throughput of distilbert base uncased model on 1 GPU. | | onnxruntime-ort-models/distilbert_base_uncased_ngpu_1_train_throughput | throughput (samples/s) | The throughput of distilbert base uncased model on 1 GPU. |
| onnxruntime-ort-models/distilbert_base_uncased_ngpu_8 | throughput (samples/s) | The throughput of distilbert base uncased model on 8 GPU. | | onnxruntime-ort-models/distilbert_base_uncased_ngpu_8_train_throughput | throughput (samples/s) | The throughput of distilbert base uncased model on 8 GPU. |
| onnxruntime-ort-models/gpt2_ngpu_1 | throughput (samples/s) | The throughput of gpt2 model on 1 GPU. | | onnxruntime-ort-models/gpt2_ngpu_1_train_throughput | throughput (samples/s) | The throughput of gpt2 model on 1 GPU. |
| onnxruntime-ort-models/gpt2_ngpu_8 | throughput (samples/s) | The throughput of gpt2 model on 8 GPU. | | onnxruntime-ort-models/gpt2_ngpu_8_train_throughput | throughput (samples/s) | The throughput of gpt2 model on 8 GPU. |
| onnxruntime-ort-models/facebook_bart_large_ngpu_1 | throughput (samples/s) | The throughput of facebook bart large model on 1 GPU. | | onnxruntime-ort-models/facebook_bart_large_ngpu_1_train_throughput | throughput (samples/s) | The throughput of facebook bart large model on 1 GPU. |
| onnxruntime-ort-models/facebook_bart_large_ngpu_8 | throughput (samples/s) | The throughput of facebook bart large model on 8 GPU. | | onnxruntime-ort-models/facebook_bart_large_ngpu_8_train_throughput | throughput (samples/s) | The throughput of facebook bart large model on 8 GPU. |
| onnxruntime-ort-models/roberta_large_ngpu_1 | throughput (samples/s) | The throughput of roberta large model on 1 GPU. | | onnxruntime-ort-models/roberta_large_ngpu_1_train_throughput | throughput (samples/s) | The throughput of roberta large model on 1 GPU. |
| onnxruntime-ort-models/roberta_large_ngpu_8 | throughput (samples/s) | The throughput of roberta large model on 8 GPU. | | onnxruntime-ort-models/roberta_large_ngpu_8_train_throughput | throughput (samples/s) | The throughput of roberta large model on 8 GPU. |
...@@ -15,16 +15,16 @@ including gpt2-small, gpt2-medium, gpt2-large and gpt2-xl. ...@@ -15,16 +15,16 @@ including gpt2-small, gpt2-medium, gpt2-large and gpt2-xl.
#### Metrics #### Metrics
| Name | Unit | Description | | Name | Unit | Description |
|---------------------------------------------------------------|------------------------|---------------------------------------------| |------------------------------------------------------------|------------------------|---------------------------------------------|
| gpt_models/pytorch-${model_name}/steptime_train_float32 | time (ms) | Train step time with single precision. | | gpt_models/pytorch-${model_name}/fp32_train_step_time | time (ms) | Train step time with single precision. |
| gpt_models/pytorch-${model_name}/throughput_train_float32 | throughput (samples/s) | Train throughput with single precision. | | gpt_models/pytorch-${model_name}/fp32_train_throughput | throughput (samples/s) | Train throughput with single precision. |
| gpt_models/pytorch-${model_name}/steptime_inference_float32 | time (ms) | Inference step time with single precision. | | gpt_models/pytorch-${model_name}/fp32_inference_step_time | time (ms) | Inference step time with single precision. |
| gpt_models/pytorch-${model_name}/throughput_inference_float32 | throughput (samples/s) | Inference throughput with single precision. | | gpt_models/pytorch-${model_name}/fp32_inference_throughput | throughput (samples/s) | Inference throughput with single precision. |
| gpt_models/pytorch-${model_name}/steptime_train_float16 | time (ms) | Train step time with half precision. | | gpt_models/pytorch-${model_name}/fp16_train_step_time | time (ms) | Train step time with half precision. |
| gpt_models/pytorch-${model_name}/throughput_train_float16 | throughput (samples/s) | Train throughput with half precision. | | gpt_models/pytorch-${model_name}/fp16_train_throughput | throughput (samples/s) | Train throughput with half precision. |
| gpt_models/pytorch-${model_name}/steptime_inference_float16 | time (ms) | Inference step time with half precision. | | gpt_models/pytorch-${model_name}/fp16_inference_step_time | time (ms) | Inference step time with half precision. |
| gpt_models/pytorch-${model_name}/throughput_inference_float16 | throughput (samples/s) | Inference throughput with half precision. | | gpt_models/pytorch-${model_name}/fp16_inference_throughput | throughput (samples/s) | Inference throughput with half precision. |
### `bert_models` ### `bert_models`
...@@ -34,16 +34,16 @@ Run training or inference tasks with single or half precision for BERT models, i ...@@ -34,16 +34,16 @@ Run training or inference tasks with single or half precision for BERT models, i
#### Metrics #### Metrics
| Name | Unit | Description | | Name | Unit | Description |
|----------------------------------------------------------------|------------------------|---------------------------------------------| |-------------------------------------------------------------|------------------------|---------------------------------------------|
| bert_models/pytorch-${model_name}/steptime_train_float32 | time (ms) | Train step time with single precision. | | bert_models/pytorch-${model_name}/fp32_train_step_time | time (ms) | Train step time with single precision. |
| bert_models/pytorch-${model_name}/throughput_train_float32 | throughput (samples/s) | Train throughput with single precision. | | bert_models/pytorch-${model_name}/fp32_train_throughput | throughput (samples/s) | Train throughput with single precision. |
| bert_models/pytorch-${model_name}/steptime_inference_float32 | time (ms) | Inference step time with single precision. | | bert_models/pytorch-${model_name}/fp32_inference_step_time | time (ms) | Inference step time with single precision. |
| bert_models/pytorch-${model_name}/throughput_inference_float32 | throughput (samples/s) | Inference throughput with single precision. | | bert_models/pytorch-${model_name}/fp32_inference_throughput | throughput (samples/s) | Inference throughput with single precision. |
| bert_models/pytorch-${model_name}/steptime_train_float16 | time (ms) | Train step time with half precision. | | bert_models/pytorch-${model_name}/fp16_train_step_time | time (ms) | Train step time with half precision. |
| bert_models/pytorch-${model_name}/throughput_train_float16 | throughput (samples/s) | Train throughput with half precision. | | bert_models/pytorch-${model_name}/fp16_train_throughput | throughput (samples/s) | Train throughput with half precision. |
| bert_models/pytorch-${model_name}/steptime_inference_float16 | time (ms) | Inference step time with half precision. | | bert_models/pytorch-${model_name}/fp16_inference_step_time | time (ms) | Inference step time with half precision. |
| bert_models/pytorch-${model_name}/throughput_inference_float16 | throughput (samples/s) | Inference throughput with half precision. | | bert_models/pytorch-${model_name}/fp16_inference_throughput | throughput (samples/s) | Inference throughput with half precision. |
### `lstm_models` ### `lstm_models`
...@@ -53,16 +53,16 @@ Run training or inference tasks with single or half precision for one bidirectio ...@@ -53,16 +53,16 @@ Run training or inference tasks with single or half precision for one bidirectio
#### Metrics #### Metrics
| Name | Unit | Description | | Name | Unit | Description |
|-------------------------------------------------------|------------------------|---------------------------------------------| |----------------------------------------------------|------------------------|---------------------------------------------|
| lstm_models/pytorch-lstm/steptime_train_float32 | time (ms) | Train step time with single precision. | | lstm_models/pytorch-lstm/fp32_train_step_time | time (ms) | Train step time with single precision. |
| lstm_models/pytorch-lstm/throughput_train_float32 | throughput (samples/s) | Train throughput with single precision. | | lstm_models/pytorch-lstm/fp32_train_throughput | throughput (samples/s) | Train throughput with single precision. |
| lstm_models/pytorch-lstm/steptime_inference_float32 | time (ms) | Inference step time with single precision. | | lstm_models/pytorch-lstm/fp32_inference_step_time | time (ms) | Inference step time with single precision. |
| lstm_models/pytorch-lstm/throughput_inference_float32 | throughput (samples/s) | Inference throughput with single precision. | | lstm_models/pytorch-lstm/fp32_inference_throughput | throughput (samples/s) | Inference throughput with single precision. |
| lstm_models/pytorch-lstm/steptime_train_float16 | time (ms) | Train step time with half precision. | | lstm_models/pytorch-lstm/fp16_train_step_time | time (ms) | Train step time with half precision. |
| lstm_models/pytorch-lstm/throughput_train_float16 | throughput (samples/s) | Train throughput with half precision. | | lstm_models/pytorch-lstm/fp16_train_throughput | throughput (samples/s) | Train throughput with half precision. |
| lstm_models/pytorch-lstm/steptime_inference_float16 | time (ms) | Inference step time with half precision. | | lstm_models/pytorch-lstm/fp16_inference_step_time | time (ms) | Inference step time with half precision. |
| lstm_models/pytorch-lstm/throughput_inference_float16 | throughput (samples/s) | Inference throughput with half precision. | | lstm_models/pytorch-lstm/fp16_inference_throughput | throughput (samples/s) | Inference throughput with half precision. |
### `cnn_models` ### `cnn_models`
...@@ -83,13 +83,13 @@ Run training or inference tasks with single or half precision for CNN models lis ...@@ -83,13 +83,13 @@ Run training or inference tasks with single or half precision for CNN models lis
#### Metrics #### Metrics
| Name | Unit | Description | | Name | Unit | Description |
|---------------------------------------------------------------|------------------------|---------------------------------------------| |------------------------------------------------------------|------------------------|---------------------------------------------|
| cnn_models/pytorch-${model_name}/steptime_train_float32 | time (ms) | Train step time with single precision. | | cnn_models/pytorch-${model_name}/fp32_train_step_time | time (ms) | Train step time with single precision. |
| cnn_models/pytorch-${model_name}/throughput_train_float32 | throughput (samples/s) | Train throughput with single precision. | | cnn_models/pytorch-${model_name}/fp32_train_throughput | throughput (samples/s) | Train throughput with single precision. |
| cnn_models/pytorch-${model_name}/steptime_inference_float32 | time (ms) | Inference step time with single precision. | | cnn_models/pytorch-${model_name}/fp32_inference_step_time | time (ms) | Inference step time with single precision. |
| cnn_models/pytorch-${model_name}/throughput_inference_float32 | throughput (samples/s) | Inference throughput with single precision. | | cnn_models/pytorch-${model_name}/fp32_inference_throughput | throughput (samples/s) | Inference throughput with single precision. |
| cnn_models/pytorch-${model_name}/steptime_train_float16 | time (ms) | Train step time with half precision. | | cnn_models/pytorch-${model_name}/fp16_train_step_time | time (ms) | Train step time with half precision. |
| cnn_models/pytorch-${model_name}/throughput_train_float16 | throughput (samples/s) | Train throughput with half precision. | | cnn_models/pytorch-${model_name}/fp16_train_throughput | throughput (samples/s) | Train throughput with half precision. |
| cnn_models/pytorch-${model_name}/steptime_inference_float16 | time (ms) | Inference step time with half precision. | | cnn_models/pytorch-${model_name}/fp16_inference_step_time | time (ms) | Inference step time with half precision. |
| cnn_models/pytorch-${model_name}/throughput_inference_float16 | throughput (samples/s) | Inference throughput with half precision. | | cnn_models/pytorch-${model_name}/fp16_inference_throughput | throughput (samples/s) | Inference throughput with half precision. |
...@@ -73,7 +73,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -73,7 +73,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
model_name = model_name.replace(char, '_') model_name = model_name.replace(char, '_')
elif value_prefix in line and model_name is not None: elif value_prefix in line and model_name is not None:
throughput = float(line[len(value_prefix):]) throughput = float(line[len(value_prefix):])
self._result.add_result(model_name, throughput) self._result.add_result(model_name + '_throughput', throughput)
model_name = None model_name = None
except BaseException as e: except BaseException as e:
logger.error( logger.error(
......
...@@ -237,7 +237,7 @@ def _benchmark(self): ...@@ -237,7 +237,7 @@ def _benchmark(self):
compute_end = time.perf_counter() compute_end = time.perf_counter()
torch.cuda.synchronize() torch.cuda.synchronize()
compute_metric = '{}_cost'.format(kernel) compute_metric = '{}_time'.format(kernel)
compute_elapse_times = [(compute_end - start) * 1000 / self._args.num_steps] compute_elapse_times = [(compute_end - start) * 1000 / self._args.num_steps]
if not self._process_numeric_result(compute_metric, compute_elapse_times): if not self._process_numeric_result(compute_metric, compute_elapse_times):
......
...@@ -27,21 +27,21 @@ def __init__(self, name, parameters=''): ...@@ -27,21 +27,21 @@ def __init__(self, name, parameters=''):
# TODO - To support more architecutres, currently only support compute capability = 7.0 and 8.0 # TODO - To support more architecutres, currently only support compute capability = 7.0 and 8.0
self.__kernel_map = { self.__kernel_map = {
7.0: { 7.0: {
'FP64': 'cutlass_simt_dgemm_128x128_8x2_*', 'fp64': 'cutlass_simt_dgemm_128x128_8x2_*',
'FP32': 'cutlass_simt_sgemm_128x128_8x2_*', 'fp32': 'cutlass_simt_sgemm_128x128_8x2_*',
'FP16': 'cutlass_simt_hgemm_256x128_8x2_*', 'fp16': 'cutlass_simt_hgemm_256x128_8x2_*',
'FP16_TC': 'cutlass_tensorop_h884gemm_256x128_32x2_*', 'fp16_tc': 'cutlass_tensorop_h884gemm_256x128_32x2_*',
}, },
8.0: { 8.0: {
'FP64': 'cutlass_simt_dgemm_128x128_8x2_*', 'fp64': 'cutlass_simt_dgemm_128x128_8x2_*',
'FP32': 'cutlass_simt_sgemm_128x128_8x2_*', 'fp32': 'cutlass_simt_sgemm_128x128_8x2_*',
'FP16': 'cutlass_simt_hgemm_256x128_8x2_*', 'fp16': 'cutlass_simt_hgemm_256x128_8x2_*',
'FP64_TC': 'cutlass_tensorop_d884gemm_128x128_16x3_*', 'fp64_tc': 'cutlass_tensorop_d884gemm_128x128_16x3_*',
'TF32_TC': 'cutlass_tensorop_tf32_s1688gemm_tf32_256x128_16x3_*', 'tf32_tc': 'cutlass_tensorop_tf32_s1688gemm_tf32_256x128_16x3_*',
'BF16_TC': 'cutlass_tensorop_bf16_s16816gemm_bf16_256x128_32x3_*', 'bf16_tc': 'cutlass_tensorop_bf16_s16816gemm_bf16_256x128_32x3_*',
'FP16_TC': 'cutlass_tensorop_h16816gemm_256x128_32x3_*', 'fp16_tc': 'cutlass_tensorop_h16816gemm_256x128_32x3_*',
'INT8_TC': 'cutlass_tensorop_s8_i16832gemm_s8_256x128_64x3_*', 'int8_tc': 'cutlass_tensorop_s8_i16832gemm_s8_256x128_64x3_*',
'INT4_TC': 'cutlass_tensorop_s4_i16864gemm_s4_256x128_128x3_*', 'int4_tc': 'cutlass_tensorop_s4_i16864gemm_s4_256x128_128x3_*',
} }
} }
self.__parse_logline = [ self.__parse_logline = [
...@@ -128,7 +128,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -128,7 +128,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
) )
return False return False
self._result.add_result(precision, max(flops)) self._result.add_result(self._metric_map[precision], max(flops))
return True return True
......
...@@ -209,7 +209,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -209,7 +209,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
lat_unit_prefix = '%s_%s' % (io_type_prefix, lat_unit) lat_unit_prefix = '%s_%s' % (io_type_prefix, lat_unit)
for lat_percentile in ['95.000000', '99.000000', '99.900000']: for lat_percentile in ['95.000000', '99.000000', '99.900000']:
lat = fio_output['jobs'][0][io_type][lat_unit]['percentile'][lat_percentile] lat = fio_output['jobs'][0][io_type][lat_unit]['percentile'][lat_percentile]
self._result.add_result('%s_%s' % (lat_unit_prefix, lat_percentile), float(lat)) self._result.add_result('%s_%s' % (lat_unit_prefix, lat_percentile[:-5]), float(lat))
break break
except BaseException as e: except BaseException as e:
self._result.set_return_code(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE) self._result.set_return_code(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE)
......
...@@ -20,9 +20,24 @@ def __init__(self, name, parameters=''): ...@@ -20,9 +20,24 @@ def __init__(self, name, parameters=''):
super().__init__(name, parameters) super().__init__(name, parameters)
self._support_precisions = [ self._support_precisions = [
'FP64', 'FP32', 'FP16', 'FP64_TC', 'TF32_TC', 'BF16_TC', 'FP16_TC', 'INT8_TC', 'INT4_TC' 'fp64', 'fp32', 'fp16', 'fp64_tc', 'tf32_tc', 'bf16_tc', 'fp16_tc', 'int8_tc', 'int4_tc'
] ]
self._precision_need_to_run = list() self._precision_need_to_run = list()
self._metric_map = {
'fp64': 'fp64_flops',
'fp32': 'fp32_flops',
'fp16': 'fp16_flops',
'fp64_tc': 'fp64_tc_flops',
'tf32_tc': 'tf32_tc_flops',
'bf16_tc': 'bf16_tc_flops',
'fp16_tc': 'fp16_tc_flops',
'int8_tc': 'int8_tc_iops',
'int4_tc': 'int4_tc_iops',
'fp32_xdlops': 'fp32_xdlops_flops',
'fp16_xdlops': 'fp16_xdlops_flops',
'bf16_xdlops': 'bf16_xdlops_flops',
'int8_xdlops': 'int8_xdlops_iops'
}
def add_parser_arguments(self): def add_parser_arguments(self):
"""Add the specified arguments.""" """Add the specified arguments."""
...@@ -76,7 +91,7 @@ def _preprocess(self): ...@@ -76,7 +91,7 @@ def _preprocess(self):
if len(self._args.precision) == 0: if len(self._args.precision) == 0:
self._precision_need_to_run = self._support_precisions self._precision_need_to_run = self._support_precisions
else: else:
self._args.precision = [p.upper() for p in self._args.precision] self._args.precision = [p.lower() for p in self._args.precision]
for p in self._args.precision: for p in self._args.precision:
if p not in self._support_precisions: if p not in self._support_precisions:
logger.warning( logger.warning(
......
...@@ -24,6 +24,25 @@ def __init__(self, name, parameters=''): ...@@ -24,6 +24,25 @@ def __init__(self, name, parameters=''):
self._bin_name = 'network_test' self._bin_name = 'network_test'
if self._name == 'gpcnet-network-load-test': if self._name == 'gpcnet-network-load-test':
self._bin_name = 'network_load_test' self._bin_name = 'network_load_test'
self.__metrics = {
'RRTwo-sidedLat(8B)': 'rr_two-sided_lat',
'RRGetLat(8B)': 'rr_get_lat',
'RRTwo-sidedBW(131072B)': 'rr_two-sided_bw',
'RRPutBW(131072B)': 'rr_put_bw',
'RRTwo-sidedBW+Sync(131072B)': 'rr_two-sided+sync_bw',
'NatTwo-sidedBW(131072B)': 'nat_two-sided_bw',
'MultipleAllreduce(8B)': 'multiple_allreduce_time',
'MultipleAlltoall(4096B)': 'multiple_alltoall_bw',
'GetBcast(4096B)': 'get_bcast_bw',
'PutIncast(4096B)': 'put_incast_bw',
'Two-sidedIncast(4096B)': 'two-sided_incast_bw',
'Alltoall(4096B)': 'alltoall_bw'
}
self.__metrics_x = {
'RRTwo-sidedLat(8B)': 'rr_two-sided_lat_x',
'RRTwo-sidedBW+Sync(131072B)': 'rr_two-sided+sync_bw_x',
'MultipleAllreduce(8B)': 'multiple_allreduce_x',
}
def add_parser_arguments(self): def add_parser_arguments(self):
"""Add the specified arguments.""" """Add the specified arguments."""
...@@ -70,18 +89,29 @@ def _process_raw_result(self, idx, raw_output): # noqa: C901 ...@@ -70,18 +89,29 @@ def _process_raw_result(self, idx, raw_output): # noqa: C901
items = [item.strip() for item in items] items = [item.strip() for item in items]
# Get table name # Get table name
if len(items) == 3 and 'Tests' in items[1]: if len(items) == 3 and 'Tests' in items[1]:
test_name = items[1].replace(' ', '') test_name = items[1].replace(' ', '_').lower()
# Get the line of the table labels # Get the line of the table labels
elif 'Avg' in line or 'Name' in line: elif 'Avg' in line or 'Name' in line:
labels = items labels = items
# Get values related to the labels # Get values related to the labels
else: else:
name_prefix = items[1].replace(' ', '') if self._name == 'gpcnet-network-test':
for i in range(2, len(items) - 1): name_prefix = items[1].replace(' ', '')
if labels[i] != 'Units': for i in range(2, len(items) - 1):
self._result.add_result( if labels[i] != 'Units':
test_name + '_' + name_prefix + '_' + labels[i], float(items[i].strip('X')) self._result.add_result(
) self.__metrics[name_prefix] + '_' + labels[i].lower(),
float(items[i].strip('X'))
)
elif test_name == 'network_tests_running_with_congestion_tests_-_key_results' \
and self._name == 'gpcnet-network-load-test':
name_prefix = items[1].replace(' ', '')
for i in range(2, len(items) - 1):
if labels[i] != 'Units':
self._result.add_result(
self.__metrics_x[name_prefix] + '_' + labels[i].lower(),
float(items[i].strip('X'))
)
elif 'ERROR: this application must be run on at least' in raw_output: elif 'ERROR: this application must be run on at least' in raw_output:
return True return True
else: else:
......
...@@ -100,7 +100,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -100,7 +100,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
output_lines = [x.strip() for x in raw_output.strip().splitlines()] output_lines = [x.strip() for x in raw_output.strip().splitlines()]
for output_line in output_lines: for output_line in output_lines:
tag, bw_str = output_line.split() tag, bw_str = output_line.split()
self._result.add_result(tag, float(bw_str)) self._result.add_result(tag + '_bw', float(bw_str))
except BaseException as e: except BaseException as e:
self._result.set_return_code(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE) self._result.set_return_code(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE)
logger.error( logger.error(
......
...@@ -201,8 +201,8 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -201,8 +201,8 @@ def _process_raw_result(self, cmd_idx, raw_output):
continue continue
# Extract value from the line # Extract value from the line
size = int(values[0]) size = int(values[0])
avg_bw = float(values[-2]) avg_bw = float(values[-2]) / 1000
metric = 'IB_{}_{}_Avg_{}'.format(self._args.commands[cmd_idx], size, str(self._args.ib_index)) metric = 'ib_{}_{}_ib{}_bw'.format(self._args.commands[cmd_idx], size, str(self._args.ib_index))
# Filter useless value in client output # Filter useless value in client output
if metric not in metric_set: if metric not in metric_set:
metric_set.add(metric) metric_set.add(metric)
......
...@@ -207,19 +207,22 @@ def __prepare_config(self, node_num): ...@@ -207,19 +207,22 @@ def __prepare_config(self, node_num):
# Use the config file defined in args # Use the config file defined in args
else: else:
self.__config_path = self._args.config self.__config_path = self._args.config
# Read the hostfile
with open(self._args.hostfile, 'r') as f:
hosts = f.readlines()
# Read the config file and check if it's empty and valid # Read the config file and check if it's empty and valid
with open(self.__config_path, 'r') as f: with open(self.__config_path, 'r') as f:
lines = f.readlines() lines = f.readlines()
for line in lines: for line in lines:
pairs = line.strip().strip(';').split(';') pairs = line.strip().strip(';').split(';')
# Check format of config # Check format of config
for pair in pairs: for pair in pairs:
pair = pair.split(',') pair = pair.split(',')
if len(pair) != 2: if len(pair) != 2:
return False return False
pair[0] = int(pair[0]) pair[0] = int(pair[0])
pair[1] = int(pair[1]) pair[1] = int(pair[1])
self.__config.extend(pairs) self.__config.append('{}_{}'.format(hosts[pair[0]].strip(), hosts[pair[1]].strip()))
except BaseException as e: except BaseException as e:
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
logger.error('Failed to generate and check config - benchmark: {}, message: {}.'.format(self._name, str(e))) logger.error('Failed to generate and check config - benchmark: {}, message: {}.'.format(self._name, str(e)))
...@@ -345,6 +348,8 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901 ...@@ -345,6 +348,8 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901
content = raw_output.splitlines() content = raw_output.splitlines()
line_index = 0 line_index = 0
config_index = 0 config_index = 0
command = self._args.commands[cmd_idx]
suffix = command.split('_')[-1]
try: try:
result_index = -1 result_index = -1
for index, line in enumerate(content): for index, line in enumerate(content):
...@@ -357,11 +362,22 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901 ...@@ -357,11 +362,22 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901
content = content[result_index:] content = content[result_index:]
for line in content: for line in content:
line = list(filter(None, line.strip().split(','))) line = list(filter(None, line.strip().split(',')))
pair_index = 0
for item in line: for item in line:
metric = '{line}-{pair}'.format(line=str(line_index), pair=self.__config[config_index]) metric = '{command}_{line}_{pair}_{host}_{suffix}'.format(
self._result.add_result(metric, float(item)) command=command,
line=str(line_index),
pair=pair_index,
host=self.__config[config_index],
suffix=suffix
)
value = float(item)
if 'bw' in command:
value = value / 1000
self._result.add_result(metric, value)
valid = True valid = True
config_index += 1 config_index += 1
pair_index += 1
line_index += 1 line_index += 1
except Exception: except Exception:
valid = False valid = False
......
...@@ -100,8 +100,8 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -100,8 +100,8 @@ def _process_raw_result(self, cmd_idx, raw_output):
) )
return False return False
self._result.add_result('event_overhead', result[0]) self._result.add_result('event_time', result[0])
self._result.add_result('wall_overhead', result[1]) self._result.add_result('wall_time', result[1])
return True return True
......
...@@ -20,7 +20,7 @@ def __init__(self, name, parameters=''): ...@@ -20,7 +20,7 @@ def __init__(self, name, parameters=''):
super().__init__(name, parameters) super().__init__(name, parameters)
self._mem_types = ['htod', 'dtoh', 'dtod'] self._mem_types = ['htod', 'dtoh', 'dtod']
self._metrics = ['H2D_Mem_BW', 'D2H_Mem_BW', 'D2D_Mem_BW'] self._metrics = ['h2d_bw', 'd2h_bw', 'd2d_bw']
self._memory = ['pinned', 'unpinned'] self._memory = ['pinned', 'unpinned']
self._parse_logline_map = {'htod': 'H2D', 'dtoh': 'D2H', 'dtod': 'D2D'} self._parse_logline_map = {'htod': 'H2D', 'dtoh': 'D2H', 'dtod': 'D2D'}
......
...@@ -22,13 +22,13 @@ def __init__(self, name, parameters=''): ...@@ -22,13 +22,13 @@ def __init__(self, name, parameters=''):
super().__init__(name, parameters) super().__init__(name, parameters)
self._bin_name = 'rocblas-bench' self._bin_name = 'rocblas-bench'
self._support_precisions = ['FP64', 'FP32_xDLOPS', 'FP16_xDLOPS', 'BF16_xDLOPS', 'INT8_xDLOPS'] self._support_precisions = ['fp64', 'fp32_xdlops', 'fp16_xdlops', 'bf16_xdlops', 'int8_xdlops']
self.__precision_and_kernel_map = { self.__precision_and_kernel_map = {
'FP64': '-r f64_r -f gemm', 'fp64': '-r f64_r -f gemm',
'FP32_xDLOPS': '-r f32_r -f gemm_ex --compute_type f32_r', 'fp32_xdlops': '-r f32_r -f gemm_ex --compute_type f32_r',
'FP16_xDLOPS': '-r f16_r -f gemm_ex --compute_type f32_r', 'fp16_xdlops': '-r f16_r -f gemm_ex --compute_type f32_r',
'BF16_xDLOPS': '-r bf16_r -f gemm_ex --compute_type f32_r', 'bf16_xdlops': '-r bf16_r -f gemm_ex --compute_type f32_r',
'INT8_xDLOPS': '--a_type i8_r --b_type i8_r --c_type i32_r --d_type i32_r -f gemm_ex --compute_type i32_r' 'int8_xdlops': '--a_type i8_r --b_type i8_r --c_type i32_r --d_type i32_r -f gemm_ex --compute_type i32_r'
} }
def add_parser_arguments(self): def add_parser_arguments(self):
...@@ -154,7 +154,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -154,7 +154,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
) )
return False return False
self._result.add_result(precision, gflops) self._result.add_result(self._metric_map[precision], gflops)
return True return True
......
...@@ -64,21 +64,18 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -64,21 +64,18 @@ def _process_raw_result(self, cmd_idx, raw_output):
mem_bw = -1 mem_bw = -1
value_index = -1 value_index = -1
size_index = -1
valid = True valid = True
content = raw_output.splitlines() content = raw_output.splitlines()
try: try:
metric = self._metrics[self._mem_types.index(self._args.mem_type[cmd_idx])]
parse_logline = self._parse_logline_map[self._args.mem_type[cmd_idx]] parse_logline = self._parse_logline_map[self._args.mem_type[cmd_idx]]
for line in content: for line in content:
if parse_logline in line and value_index != -1: if parse_logline in line and value_index != -1:
line = line.split() line = line.split()
mem_bw = float(line[value_index]) mem_bw = max(mem_bw, float(line[value_index]))
metric = self._args.mem_type[cmd_idx] + '_' + line[size_index]
self._result.add_result(metric, mem_bw)
elif 'mean' in line: elif 'mean' in line:
line = line.split() line = line.split()
value_index = line.index('mean') value_index = line.index('mean')
size_index = line.index('atts')
except BaseException: except BaseException:
valid = False valid = False
finally: finally:
...@@ -89,7 +86,7 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -89,7 +86,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
) )
) )
return False return False
self._result.add_result(metric, mem_bw)
return True return True
......
...@@ -256,7 +256,7 @@ def _benchmark(self): ...@@ -256,7 +256,7 @@ def _benchmark(self):
logger.error('Unknown sharding mode - benchmark: {}, mode: {}.'.format(self._name, mode)) logger.error('Unknown sharding mode - benchmark: {}, mode: {}.'.format(self._name, mode))
return False return False
metric = '{}'.format(mode) metric = '{}_time'.format(mode)
if not self._process_numeric_result(metric, elapse_times, reduce_type=ReduceType.MAX): if not self._process_numeric_result(metric, elapse_times, reduce_type=ReduceType.MAX):
return False return False
......
...@@ -182,12 +182,12 @@ def _process_raw_result(self, idx, raw_output): ...@@ -182,12 +182,12 @@ def _process_raw_result(self, idx, raw_output):
mininum = float(res[labels.index('Minimum')].strip('ms')) mininum = float(res[labels.index('Minimum')].strip('ms'))
maximum = float(res[labels.index('Maximum')].strip('ms')) maximum = float(res[labels.index('Maximum')].strip('ms'))
average = float(res[labels.index('Average')].strip('ms')) average = float(res[labels.index('Average')].strip('ms'))
self._result.add_result('Successed_' + host, suc) self._result.add_result(host + '_successed_count', suc)
self._result.add_result('Failed_' + host, fail) self._result.add_result(host + '_failed_count', fail)
self._result.add_result('Success_Rate_' + host, rate) self._result.add_result(host + '_success_rate', rate)
self._result.add_result('Minimum_' + host, mininum) self._result.add_result(host + '_time_min', mininum)
self._result.add_result('Maximum_' + host, maximum) self._result.add_result(host + '_time_max', maximum)
self._result.add_result('Average_' + host, average) self._result.add_result(host + '_time_avg', average)
except Exception as e: except Exception as e:
logger.error( logger.error(
'The result format is invalid - round: {}, benchmark: {}, address: {}, raw output: {}, message: {}.'. 'The result format is invalid - round: {}, benchmark: {}, address: {}, raw output: {}, message: {}.'.
......
...@@ -131,16 +131,17 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -131,16 +131,17 @@ def _process_raw_result(self, cmd_idx, raw_output):
success = False success = False
try: try:
model = self._args.pytorch_models[cmd_idx]
for line in raw_output.strip().splitlines(): for line in raw_output.strip().splitlines():
line = line.strip() line = line.strip()
if '[I] mean:' in line or '[I] percentile:' in line: if '[I] mean:' in line or '[I] percentile:' in line:
tag = 'mean' if '[I] mean:' in line else '99' tag = 'mean' if '[I] mean:' in line else '99'
lats = re.findall(r'(\d+\.\d+) ms', line) lats = re.findall(r'(\d+\.\d+) ms', line)
if len(lats) == 1: if len(lats) == 1:
self._result.add_result(f'gpu_lat_ms_{tag}', float(lats[0])) self._result.add_result(f'{model}_gpu_time_{tag}', float(lats[0]))
elif len(lats) == 2: elif len(lats) == 2:
self._result.add_result(f'host_lat_ms_{tag}', float(lats[0])) self._result.add_result(f'{model}_host_time_{tag}', float(lats[0]))
self._result.add_result(f'end_to_end_lat_ms_{tag}', float(lats[1])) self._result.add_result(f'{model}_end_to_end_time_{tag}', float(lats[1]))
success = True success = True
except BaseException as e: except BaseException as e:
self._result.set_return_code(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE) self._result.set_return_code(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment