Unverified Commit 0a1a15ea authored by one's avatar one Committed by GitHub
Browse files

Benchmarks: Update gpu-hpcg metrics to encode process and problem shape (#8)

* Update gpu-hpcg metrics to encode process and problem shape

* Fix tests
parent d7a56e0b
......@@ -198,21 +198,20 @@ Performed by [rocHPCG](https://github.com/ROCm/rocHPCG).
#### Metrics
rocHPCG reports operation-level metrics for `final`, `ddot`, `waxpby`, `spmv`, `mg`, and `total`.
| Name | Unit | Description |
|-----------------------------------------------------|------------------|---------------------------------------------------------|
| gpu-hpcg/${operation}\_gflops | FLOPS (GFLOPS) | Throughput for the specified rocHPCG operation. |
| gpu-hpcg/${operation}\_bandwidth | bandwidth (GB/s) | Bandwidth for the specified rocHPCG operation. |
| gpu-hpcg/${operation}\_gflops_per_process | FLOPS (GFLOPS) | Per-process throughput for the specified operation. |
| gpu-hpcg/${operation}\_bandwidth_per_process | bandwidth (GB/s) | Per-process bandwidth for the specified operation. |
| gpu-hpcg/setup_time | time (s) | Setup phase duration. |
| gpu-hpcg/optimization_time | time (s) | Optimization phase duration. |
| gpu-hpcg/total_time | time (s) | Total runtime. |
| gpu-hpcg/is_valid | | Run validity inferred from rocHPCG invalid markers. |
| gpu-hpcg/local_domain_[x\|y\|z] | | Local domain size for each dimension. |
| gpu-hpcg/global_domain_[x\|y\|z] | | Global domain size for each dimension. |
| gpu-hpcg/process_domain_[x\|y\|z] | | Process topology for each dimension. |
rocHPCG reports performance and time metrics.
Performance metrics are reported for `final`, `ddot`, `waxpby`, `spmv`, `mg`, and `total`.
The metric key includes the configured process domain and local problem size:
`p${npx}x${npy}x${npz}_n${nx}x${ny}x${nz}`.
| Name | Unit | Description |
|--------------------------------------------------------------------------------------------------|------------------|---------------------------------------------------------|
| `gpu-hpcg/${operation}_p${npx}x${npy}x${npz}_n${nx}x${ny}x${nz}_gflops` | FLOPS (GFLOPS) | Throughput for the specified rocHPCG operation. |
| `gpu-hpcg/${operation}_p${npx}x${npy}x${npz}_n${nx}x${ny}x${nz}_bandwidth` | bandwidth (GB/s) | Bandwidth for the specified rocHPCG operation. |
| `gpu-hpcg/${operation}_p${npx}x${npy}x${npz}_n${nx}x${ny}x${nz}_gflops_per_process` | FLOPS (GFLOPS) | Per-process throughput for the specified operation. |
| `gpu-hpcg/${operation}_p${npx}x${npy}x${npz}_n${nx}x${ny}x${nz}_bandwidth_per_process` | bandwidth (GB/s) | Per-process bandwidth for the specified operation. |
| `gpu-hpcg/setup_time_p${npx}x${npy}x${npz}_n${nx}x${ny}x${nz}` | time (s) | Setup phase duration. |
| `gpu-hpcg/optimization_time_p${npx}x${npy}x${npz}_n${nx}x${ny}x${nz}` | time (s) | Optimization phase duration. |
| `gpu-hpcg/total_time_p${npx}x${npy}x${npz}_n${nx}x${ny}x${nz}` | time (s) | Total runtime. |
### `cpu-stream`
......
......@@ -12,45 +12,35 @@
_RCCL_PATTERN = re.compile(r'^(?P<bench>rccl-bw(?::[^/]+)?)/(?P<op>[^_]+)_(?P<size>\d+)_(?P<suffix>.+?)(?::\d+)?$')
_HPCG_PATTERN = re.compile(r'^(?P<bench>gpu-hpcg(?::[^/]+)?)/(?P<metric>.+?)(?::\d+)?$')
_HPCG_WORKLOAD_PATTERN = re.compile(
r'^(?P<subject>final|ddot|waxpby|spmv|mg|total)_'
r'p(?P<npx>\d+)x(?P<npy>\d+)x(?P<npz>\d+)_'
r'n(?P<nx>\d+)x(?P<ny>\d+)x(?P<nz>\d+)_'
r'(?P<type>gflops|bandwidth|gflops_per_process|bandwidth_per_process)$'
)
_HPCG_TIME_PATTERN = re.compile(
r'^(?P<subject>setup_time|optimization_time|total_time)_'
r'p(?P<npx>\d+)x(?P<npy>\d+)x(?P<npz>\d+)_'
r'n(?P<nx>\d+)x(?P<ny>\d+)x(?P<nz>\d+)$'
)
_HPCG_METRIC_ORDER = {
'local_domain_x': 0,
'local_domain_y': 1,
'local_domain_z': 2,
'global_domain_x': 3,
'global_domain_y': 4,
'global_domain_z': 5,
'process_domain_x': 6,
'process_domain_y': 7,
'process_domain_z': 8,
'total_time': 9,
'setup_time': 10,
'optimization_time': 11,
'ddot_gflops': 12,
'ddot_bandwidth': 13,
'ddot_gflops_per_process': 14,
'ddot_bandwidth_per_process': 15,
'waxpby_gflops': 16,
'waxpby_bandwidth': 17,
'waxpby_gflops_per_process': 18,
'waxpby_bandwidth_per_process': 19,
'spmv_gflops': 20,
'spmv_bandwidth': 21,
'spmv_gflops_per_process': 22,
'spmv_bandwidth_per_process': 23,
'mg_gflops': 24,
'mg_bandwidth': 25,
'mg_gflops_per_process': 26,
'mg_bandwidth_per_process': 27,
'total_gflops': 28,
'total_bandwidth': 29,
'total_gflops_per_process': 30,
'total_bandwidth_per_process': 31,
'final_gflops': 32,
'final_bandwidth': 33,
'final_gflops_per_process': 34,
'final_bandwidth_per_process': 35,
'is_valid': 36,
_HPCG_SUBJECT_ORDER = {
'setup_time': 0,
'optimization_time': 1,
'total_time': 2,
'ddot': 3,
'waxpby': 4,
'spmv': 5,
'mg': 6,
'total': 7,
'final': 8,
}
_HPCG_PERF_TYPE_ORDER = {
'gflops': 0,
'bandwidth': 1,
'gflops_per_process': 2,
'bandwidth_per_process': 3,
}
......@@ -70,6 +60,18 @@ def _rccl_sort_key(metric_name):
)
def _hpcg_workload_key(match):
"""Return a numeric sort key for the HPCG process domain and local problem size."""
return (
int(match.group('npx')),
int(match.group('npy')),
int(match.group('npz')),
int(match.group('nx')),
int(match.group('ny')),
int(match.group('nz')),
)
def _hpcg_sort_key(metric_name):
"""Sort HPCG metrics roughly in the order they appear in rocHPCG logs."""
match = _HPCG_PATTERN.match(metric_name)
......@@ -77,10 +79,34 @@ def _hpcg_sort_key(metric_name):
return None
metric = match.group('metric')
time_match = _HPCG_TIME_PATTERN.match(metric)
if time_match:
return (
1,
match.group('bench'),
_HPCG_SUBJECT_ORDER.get(time_match.group('subject'), 999),
0,
*_hpcg_workload_key(time_match),
metric_name,
)
workload_match = _HPCG_WORKLOAD_PATTERN.match(metric)
if workload_match:
subject = workload_match.group('subject')
metric_type = workload_match.group('type')
return (
1,
match.group('bench'),
_HPCG_SUBJECT_ORDER.get(subject, 999),
_HPCG_PERF_TYPE_ORDER.get(metric_type, 999),
*_hpcg_workload_key(workload_match),
metric_name,
)
return (
1,
match.group('bench'),
_HPCG_METRIC_ORDER.get(metric, 999),
_HPCG_SUBJECT_ORDER.get(metric, 999),
metric,
metric_name,
)
......
......@@ -27,15 +27,8 @@ class GpuHpcgBenchmark(MicroBenchmarkWithInvoke):
'Setup Time': 'setup_time',
'Optimization Time': 'optimization_time'
}
_domain_metric_map = {
'Local domain': 'local_domain',
'Global domain': 'global_domain',
'Process domain': 'process_domain'
}
_float_pattern = re.compile(r'([0-9]+(?:\.[0-9]+)?)\s+(GFlop/s|GB/s)')
_dimension_pattern = re.compile(r'([0-9]+)\s*x\s*([0-9]+)\s*x\s*([0-9]+)')
_time_value_pattern = re.compile(r'([0-9]+(?:\.[0-9]+)?)\s+sec')
_invalid_markers = ['*** WARNING *** INVALID RUN', '*** WARNING *** THIS IS NOT A VALID RUN ***']
def __init__(self, name, parameters=''):
"""Constructor.
......@@ -203,15 +196,6 @@ def _process_raw_result(self, cmd_idx, raw_output):
'setup_time',
'optimization_time',
'total_time',
'local_domain_x',
'local_domain_y',
'local_domain_z',
'global_domain_x',
'global_domain_y',
'global_domain_z',
'process_domain_x',
'process_domain_y',
'process_domain_z',
}
for raw_line in raw_output.splitlines():
......@@ -226,10 +210,6 @@ def _process_raw_result(self, cmd_idx, raw_output):
if self._parse_time_line(line, parsed_results):
continue
self._parse_domain_line(line, parsed_results)
parsed_results['is_valid'] = 0 if any(marker in raw_output for marker in self._invalid_markers) else 1
missing_metrics = sorted(metric for metric in required_metrics if metric not in parsed_results)
if missing_metrics:
logger.error(
......@@ -241,10 +221,32 @@ def _process_raw_result(self, cmd_idx, raw_output):
return False
for metric, value in parsed_results.items():
self._result.add_result(metric, value)
self._result.add_result(self._format_metric_name(metric), value)
return True
def _format_metric_name(self, metric):
"""Format a rocHPCG metric with the configured process domain and local problem size."""
metric_suffixes = (
'gflops_per_process',
'bandwidth_per_process',
'gflops',
'bandwidth',
)
workload = (
f'p{self._args.npx}x{self._args.npy}x{self._args.npz}_'
f'n{self._args.nx}x{self._args.ny}x{self._args.nz}'
)
if metric in self._time_metric_map.values():
return f'{metric}_{workload}'
for suffix in metric_suffixes:
suffix_token = f'_{suffix}'
if metric.endswith(suffix_token):
return f'{metric[:-len(suffix_token)]}_{workload}_{suffix}'
return metric
def _parse_operation_line(self, line, parsed_results):
"""Parse one rocHPCG operation summary line."""
operation_key = None
......@@ -284,20 +286,3 @@ def _parse_time_line(self, line, parsed_results):
return True
return False
def _parse_domain_line(self, line, parsed_results):
"""Parse one rocHPCG domain summary line."""
for label, metric_prefix in self._domain_metric_map.items():
if not line.startswith(label + ':'):
continue
match = self._dimension_pattern.search(line)
if not match:
return False
parsed_results[f'{metric_prefix}_x'] = int(match.group(1))
parsed_results[f'{metric_prefix}_y'] = int(match.group(2))
parsed_results[f'{metric_prefix}_z'] = int(match.group(3))
return True
return False
......@@ -90,214 +90,40 @@ superbench:
statistics: mean
categories: HPCG gpu-hpcg:r1
metrics:
- gpu-hpcg:r1/is_valid
- gpu-hpcg:r1/final_gflops
- gpu-hpcg:r1/final_bandwidth
- gpu-hpcg:r1/final_gflops_per_process
- gpu-hpcg:r1/final_bandwidth_per_process
- gpu-hpcg:r1/ddot_gflops
- gpu-hpcg:r1/ddot_bandwidth
- gpu-hpcg:r1/ddot_gflops_per_process
- gpu-hpcg:r1/ddot_bandwidth_per_process
- gpu-hpcg:r1/waxpby_gflops
- gpu-hpcg:r1/waxpby_bandwidth
- gpu-hpcg:r1/waxpby_gflops_per_process
- gpu-hpcg:r1/waxpby_bandwidth_per_process
- gpu-hpcg:r1/spmv_gflops
- gpu-hpcg:r1/spmv_bandwidth
- gpu-hpcg:r1/spmv_gflops_per_process
- gpu-hpcg:r1/spmv_bandwidth_per_process
- gpu-hpcg:r1/mg_gflops
- gpu-hpcg:r1/mg_bandwidth
- gpu-hpcg:r1/mg_gflops_per_process
- gpu-hpcg:r1/mg_bandwidth_per_process
- gpu-hpcg:r1/total_gflops
- gpu-hpcg:r1/total_bandwidth
- gpu-hpcg:r1/total_gflops_per_process
- gpu-hpcg:r1/total_bandwidth_per_process
- gpu-hpcg:r1/local_domain_x
- gpu-hpcg:r1/local_domain_y
- gpu-hpcg:r1/local_domain_z
- gpu-hpcg:r1/process_domain_x
- gpu-hpcg:r1/process_domain_y
- gpu-hpcg:r1/process_domain_z
- gpu-hpcg:r1/(setup_time|optimization_time|total_time)_p1x1x1_n560x280x280
- gpu-hpcg:r1/(ddot|waxpby|spmv|mg|total|final)_p1x1x1_n560x280x280_(gflops_per_process|bandwidth_per_process|gflops|bandwidth)
gpu_hpcg_r2:
statistics: mean
categories: HPCG gpu-hpcg:r2
metrics:
- gpu-hpcg:r2/is_valid
- gpu-hpcg:r2/final_gflops
- gpu-hpcg:r2/final_bandwidth
- gpu-hpcg:r2/final_gflops_per_process
- gpu-hpcg:r2/final_bandwidth_per_process
- gpu-hpcg:r2/ddot_gflops
- gpu-hpcg:r2/ddot_bandwidth
- gpu-hpcg:r2/ddot_gflops_per_process
- gpu-hpcg:r2/ddot_bandwidth_per_process
- gpu-hpcg:r2/waxpby_gflops
- gpu-hpcg:r2/waxpby_bandwidth
- gpu-hpcg:r2/waxpby_gflops_per_process
- gpu-hpcg:r2/waxpby_bandwidth_per_process
- gpu-hpcg:r2/spmv_gflops
- gpu-hpcg:r2/spmv_bandwidth
- gpu-hpcg:r2/spmv_gflops_per_process
- gpu-hpcg:r2/spmv_bandwidth_per_process
- gpu-hpcg:r2/mg_gflops
- gpu-hpcg:r2/mg_bandwidth
- gpu-hpcg:r2/mg_gflops_per_process
- gpu-hpcg:r2/mg_bandwidth_per_process
- gpu-hpcg:r2/total_gflops
- gpu-hpcg:r2/total_bandwidth
- gpu-hpcg:r2/total_gflops_per_process
- gpu-hpcg:r2/total_bandwidth_per_process
- gpu-hpcg:r2/local_domain_x
- gpu-hpcg:r2/local_domain_y
- gpu-hpcg:r2/local_domain_z
- gpu-hpcg:r2/process_domain_x
- gpu-hpcg:r2/process_domain_y
- gpu-hpcg:r2/process_domain_z
- gpu-hpcg:r2/(setup_time|optimization_time|total_time)_p2x1x1_n560x280x280
- gpu-hpcg:r2/(ddot|waxpby|spmv|mg|total|final)_p2x1x1_n560x280x280_(gflops_per_process|bandwidth_per_process|gflops|bandwidth)
gpu_hpcg_r4:
statistics: mean
categories: HPCG gpu-hpcg:r4
metrics:
- gpu-hpcg:r4/is_valid
- gpu-hpcg:r4/final_gflops
- gpu-hpcg:r4/final_bandwidth
- gpu-hpcg:r4/final_gflops_per_process
- gpu-hpcg:r4/final_bandwidth_per_process
- gpu-hpcg:r4/ddot_gflops
- gpu-hpcg:r4/ddot_bandwidth
- gpu-hpcg:r4/ddot_gflops_per_process
- gpu-hpcg:r4/ddot_bandwidth_per_process
- gpu-hpcg:r4/waxpby_gflops
- gpu-hpcg:r4/waxpby_bandwidth
- gpu-hpcg:r4/waxpby_gflops_per_process
- gpu-hpcg:r4/waxpby_bandwidth_per_process
- gpu-hpcg:r4/spmv_gflops
- gpu-hpcg:r4/spmv_bandwidth
- gpu-hpcg:r4/spmv_gflops_per_process
- gpu-hpcg:r4/spmv_bandwidth_per_process
- gpu-hpcg:r4/mg_gflops
- gpu-hpcg:r4/mg_bandwidth
- gpu-hpcg:r4/mg_gflops_per_process
- gpu-hpcg:r4/mg_bandwidth_per_process
- gpu-hpcg:r4/total_gflops
- gpu-hpcg:r4/total_bandwidth
- gpu-hpcg:r4/total_gflops_per_process
- gpu-hpcg:r4/total_bandwidth_per_process
- gpu-hpcg:r4/local_domain_x
- gpu-hpcg:r4/local_domain_y
- gpu-hpcg:r4/local_domain_z
- gpu-hpcg:r4/process_domain_x
- gpu-hpcg:r4/process_domain_y
- gpu-hpcg:r4/process_domain_z
- gpu-hpcg:r4/(setup_time|optimization_time|total_time)_p2x2x1_n560x280x280
- gpu-hpcg:r4/(ddot|waxpby|spmv|mg|total|final)_p2x2x1_n560x280x280_(gflops_per_process|bandwidth_per_process|gflops|bandwidth)
gpu_hpcg_r8:
statistics: mean
categories: HPCG gpu-hpcg:r8
metrics:
- gpu-hpcg:r8/is_valid
- gpu-hpcg:r8/final_gflops
- gpu-hpcg:r8/final_bandwidth
- gpu-hpcg:r8/final_gflops_per_process
- gpu-hpcg:r8/final_bandwidth_per_process
- gpu-hpcg:r8/ddot_gflops
- gpu-hpcg:r8/ddot_bandwidth
- gpu-hpcg:r8/ddot_gflops_per_process
- gpu-hpcg:r8/ddot_bandwidth_per_process
- gpu-hpcg:r8/waxpby_gflops
- gpu-hpcg:r8/waxpby_bandwidth
- gpu-hpcg:r8/waxpby_gflops_per_process
- gpu-hpcg:r8/waxpby_bandwidth_per_process
- gpu-hpcg:r8/spmv_gflops
- gpu-hpcg:r8/spmv_bandwidth
- gpu-hpcg:r8/spmv_gflops_per_process
- gpu-hpcg:r8/spmv_bandwidth_per_process
- gpu-hpcg:r8/mg_gflops
- gpu-hpcg:r8/mg_bandwidth
- gpu-hpcg:r8/mg_gflops_per_process
- gpu-hpcg:r8/mg_bandwidth_per_process
- gpu-hpcg:r8/total_gflops
- gpu-hpcg:r8/total_bandwidth
- gpu-hpcg:r8/total_gflops_per_process
- gpu-hpcg:r8/total_bandwidth_per_process
- gpu-hpcg:r8/local_domain_x
- gpu-hpcg:r8/local_domain_y
- gpu-hpcg:r8/local_domain_z
- gpu-hpcg:r8/process_domain_x
- gpu-hpcg:r8/process_domain_y
- gpu-hpcg:r8/process_domain_z
- gpu-hpcg:r8/(setup_time|optimization_time|total_time)_p2x2x2_n560x280x280
- gpu-hpcg:r8/(ddot|waxpby|spmv|mg|total|final)_p2x2x2_n560x280x280_(gflops_per_process|bandwidth_per_process|gflops|bandwidth)
gpu_hpcg_r16:
statistics: mean
categories: HPCG gpu-hpcg:r16
metrics:
- gpu-hpcg:r16/is_valid
- gpu-hpcg:r16/final_gflops
- gpu-hpcg:r16/final_bandwidth
- gpu-hpcg:r16/final_gflops_per_process
- gpu-hpcg:r16/final_bandwidth_per_process
- gpu-hpcg:r16/ddot_gflops
- gpu-hpcg:r16/ddot_bandwidth
- gpu-hpcg:r16/ddot_gflops_per_process
- gpu-hpcg:r16/ddot_bandwidth_per_process
- gpu-hpcg:r16/waxpby_gflops
- gpu-hpcg:r16/waxpby_bandwidth
- gpu-hpcg:r16/waxpby_gflops_per_process
- gpu-hpcg:r16/waxpby_bandwidth_per_process
- gpu-hpcg:r16/spmv_gflops
- gpu-hpcg:r16/spmv_bandwidth
- gpu-hpcg:r16/spmv_gflops_per_process
- gpu-hpcg:r16/spmv_bandwidth_per_process
- gpu-hpcg:r16/mg_gflops
- gpu-hpcg:r16/mg_bandwidth
- gpu-hpcg:r16/mg_gflops_per_process
- gpu-hpcg:r16/mg_bandwidth_per_process
- gpu-hpcg:r16/total_gflops
- gpu-hpcg:r16/total_bandwidth
- gpu-hpcg:r16/total_gflops_per_process
- gpu-hpcg:r16/total_bandwidth_per_process
- gpu-hpcg:r16/local_domain_x
- gpu-hpcg:r16/local_domain_y
- gpu-hpcg:r16/local_domain_z
- gpu-hpcg:r16/process_domain_x
- gpu-hpcg:r16/process_domain_y
- gpu-hpcg:r16/process_domain_z
- gpu-hpcg:r16/(setup_time|optimization_time|total_time)_p4x2x2_n560x280x280
- gpu-hpcg:r16/(ddot|waxpby|spmv|mg|total|final)_p4x2x2_n560x280x280_(gflops_per_process|bandwidth_per_process|gflops|bandwidth)
gpu_hpcg_r32:
statistics: mean
categories: HPCG gpu-hpcg:r32
metrics:
- gpu-hpcg:r32/is_valid
- gpu-hpcg:r32/final_gflops
- gpu-hpcg:r32/final_bandwidth
- gpu-hpcg:r32/final_gflops_per_process
- gpu-hpcg:r32/final_bandwidth_per_process
- gpu-hpcg:r32/ddot_gflops
- gpu-hpcg:r32/ddot_bandwidth
- gpu-hpcg:r32/ddot_gflops_per_process
- gpu-hpcg:r32/ddot_bandwidth_per_process
- gpu-hpcg:r32/waxpby_gflops
- gpu-hpcg:r32/waxpby_bandwidth
- gpu-hpcg:r32/waxpby_gflops_per_process
- gpu-hpcg:r32/waxpby_bandwidth_per_process
- gpu-hpcg:r32/spmv_gflops
- gpu-hpcg:r32/spmv_bandwidth
- gpu-hpcg:r32/spmv_gflops_per_process
- gpu-hpcg:r32/spmv_bandwidth_per_process
- gpu-hpcg:r32/mg_gflops
- gpu-hpcg:r32/mg_bandwidth
- gpu-hpcg:r32/mg_gflops_per_process
- gpu-hpcg:r32/mg_bandwidth_per_process
- gpu-hpcg:r32/total_gflops
- gpu-hpcg:r32/total_bandwidth
- gpu-hpcg:r32/total_gflops_per_process
- gpu-hpcg:r32/total_bandwidth_per_process
- gpu-hpcg:r32/local_domain_x
- gpu-hpcg:r32/local_domain_y
- gpu-hpcg:r32/local_domain_z
- gpu-hpcg:r32/process_domain_x
- gpu-hpcg:r32/process_domain_y
- gpu-hpcg:r32/process_domain_z
- gpu-hpcg:r32/(setup_time|optimization_time|total_time)_p4x4x2_n560x280x280
- gpu-hpcg:r32/(ddot|waxpby|spmv|mg|total|final)_p4x4x2_n560x280x280_(gflops_per_process|bandwidth_per_process|gflops|bandwidth)
......@@ -72,7 +72,15 @@ def get_benchmark(self):
"""Get benchmark."""
(benchmark_cls, _) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(self.benchmark_name, Platform.DTK)
benchmark = benchmark_cls(self.benchmark_name, parameters='')
benchmark._args = SimpleNamespace(log_raw_data=False)
benchmark._args = SimpleNamespace(
log_raw_data=False,
npx=4,
npy=4,
npz=2,
nx=560,
ny=280,
nz=280,
)
benchmark._curr_run_index = 0
benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1)
return benchmark
......@@ -93,51 +101,59 @@ def test_dtk_hpcg_result_parsing_with_wrapper_noise(self):
self.assertTrue(benchmark._process_raw_result(0, self.example_raw_output))
self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code)
self.assertEqual(6904.9, benchmark.result['final_gflops'][0])
self.assertEqual(215.8, benchmark.result['final_gflops_per_process'][0])
self.assertEqual(5849.4, benchmark.result['ddot_gflops'][0])
self.assertEqual(46794.9, benchmark.result['ddot_bandwidth'][0])
self.assertEqual(182.8, benchmark.result['ddot_gflops_per_process'][0])
self.assertEqual(1462.3, benchmark.result['ddot_bandwidth_per_process'][0])
self.assertEqual(3052.0, benchmark.result['waxpby_gflops'][0])
self.assertEqual(36623.8, benchmark.result['waxpby_bandwidth'][0])
self.assertEqual(5473.9, benchmark.result['spmv_gflops'][0])
self.assertEqual(34468.8, benchmark.result['spmv_bandwidth'][0])
self.assertEqual(7716.9, benchmark.result['mg_gflops'][0])
self.assertEqual(59557.1, benchmark.result['mg_bandwidth'][0])
self.assertEqual(6971.0, benchmark.result['total_gflops'][0])
self.assertEqual(52859.9, benchmark.result['total_bandwidth'][0])
self.assertEqual(217.8, benchmark.result['total_gflops_per_process'][0])
self.assertEqual(1651.9, benchmark.result['total_bandwidth_per_process'][0])
self.assertEqual(0.12, benchmark.result['setup_time'][0])
self.assertEqual(0.25, benchmark.result['optimization_time'][0])
self.assertEqual(7.55, benchmark.result['total_time'][0])
self.assertEqual(0, benchmark.result['is_valid'][0])
self.assertEqual(560, benchmark.result['local_domain_x'][0])
self.assertEqual(280, benchmark.result['local_domain_y'][0])
self.assertEqual(280, benchmark.result['local_domain_z'][0])
self.assertEqual(2240, benchmark.result['global_domain_x'][0])
self.assertEqual(1120, benchmark.result['global_domain_y'][0])
self.assertEqual(560, benchmark.result['global_domain_z'][0])
self.assertEqual(4, benchmark.result['process_domain_x'][0])
self.assertEqual(4, benchmark.result['process_domain_y'][0])
self.assertEqual(2, benchmark.result['process_domain_z'][0])
workload = 'p4x4x2_n560x280x280'
expected_results = {
f'final_{workload}_gflops': 6904.9,
f'final_{workload}_gflops_per_process': 215.8,
f'final_{workload}_bandwidth': 52359.0,
f'final_{workload}_bandwidth_per_process': 1636.2,
f'ddot_{workload}_gflops': 5849.4,
f'ddot_{workload}_bandwidth': 46794.9,
f'ddot_{workload}_gflops_per_process': 182.8,
f'ddot_{workload}_bandwidth_per_process': 1462.3,
f'waxpby_{workload}_gflops': 3052.0,
f'waxpby_{workload}_bandwidth': 36623.8,
f'waxpby_{workload}_gflops_per_process': 95.4,
f'waxpby_{workload}_bandwidth_per_process': 1144.5,
f'spmv_{workload}_gflops': 5473.9,
f'spmv_{workload}_bandwidth': 34468.8,
f'spmv_{workload}_gflops_per_process': 171.1,
f'spmv_{workload}_bandwidth_per_process': 1077.1,
f'mg_{workload}_gflops': 7716.9,
f'mg_{workload}_bandwidth': 59557.1,
f'mg_{workload}_gflops_per_process': 241.2,
f'mg_{workload}_bandwidth_per_process': 1861.2,
f'total_{workload}_gflops': 6971.0,
f'total_{workload}_bandwidth': 52859.9,
f'total_{workload}_gflops_per_process': 217.8,
f'total_{workload}_bandwidth_per_process': 1651.9,
f'setup_time_{workload}': 0.12,
f'optimization_time_{workload}': 0.25,
f'total_time_{workload}': 7.55,
}
self.assertEqual(len(expected_results), len(benchmark.result) - benchmark.default_metric_count)
for metric, value in expected_results.items():
self.assertIn(metric, benchmark.result)
self.assertEqual(value, benchmark.result[metric][0])
for metric in benchmark.result:
self.assertNotIn('valid', metric)
self.assertNotIn('domain', metric)
self.assertIn('raw_output_0', benchmark.raw_data)
def test_dtk_hpcg_result_parsing_valid_by_absence_of_invalid_markers(self):
"""Test DTK gpu-hpcg valid detection by absence of invalid markers."""
def test_dtk_hpcg_result_parsing_ignores_invalid_markers(self):
"""Test DTK gpu-hpcg does not emit validity metrics."""
benchmark = self.get_benchmark()
valid_output = self.example_raw_output.replace('*** WARNING *** INVALID RUN', '')
valid_output = valid_output.replace('*** WARNING *** THIS IS NOT A VALID RUN ***', '')
self.assertTrue(benchmark._process_raw_result(0, valid_output))
self.assertEqual(1, benchmark.result['is_valid'][0])
self.assertTrue(benchmark._process_raw_result(0, self.example_raw_output))
self.assertFalse(any('valid' in metric for metric in benchmark.result))
def test_dtk_hpcg_result_parsing_failure_when_required_summary_is_missing(self):
"""Test DTK gpu-hpcg parsing failure when required summary is missing."""
benchmark = self.get_benchmark()
invalid_output = self.example_raw_output.replace(
'[1,0]<stdout>: Process domain: 4 x 4 x 2\n\n',
'[1,0]<stdout>: Final = 6904.9 GFlop/s ( 52359.0 GB/s) '
'215.8 GFlop/s per process ( 1636.2 GB/s per process)\n',
'',
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment