Unverified Commit 60762518 authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Benchmarks: Code Revision - Revise arguments of nccl/rccl to support mpi mode...

Benchmarks: Code Revision - Revise arguments of nccl/rccl to support mpi mode and rename metric (#189)

**Description**
Revise arguments of nccl/rccl to support mpi mode for (mpi can not run in nccl/rccl due to multiple operators run in sequence without barrier) and rename metric .

**Major Revision**
- revise argument operators to be a single one

**Minor Revision**
- rename metric to remove benchmark name info
- change argument ngpus default value to be 1
parent 4e431f11
...@@ -41,16 +41,15 @@ def add_parser_arguments(self): ...@@ -41,16 +41,15 @@ def add_parser_arguments(self):
super().add_parser_arguments() super().add_parser_arguments()
self._parser.add_argument( self._parser.add_argument(
'--operations', '--operation',
type=str, type=str,
nargs='+', default='allreduce',
default=list(self.__operations.keys()), help='NCCL operation to benchmark, e.g., {}.'.format(' '.join(list(self.__operations.keys()))),
help='Nccl operations to benchmark, e.g., {}.'.format(' '.join(list(self.__operations.keys()))),
) )
self._parser.add_argument( self._parser.add_argument(
'--ngpus', '--ngpus',
type=int, type=int,
default=8, default=1,
help='Number of gpus per thread to run the nccl test.', help='Number of gpus per thread to run the nccl test.',
) )
self._parser.add_argument( self._parser.add_argument(
...@@ -100,10 +99,10 @@ def _preprocess(self): ...@@ -100,10 +99,10 @@ def _preprocess(self):
return False return False
# Format the arguments # Format the arguments
self._args.operations = [p.lower() for p in self._args.operations] self._args.operation = self._args.operation.lower()
# Check the arguments and generate the commands # Check the arguments and generate the commands
for op in self._args.operations: op = self._args.operation
if op not in self.__operations: if op not in self.__operations:
self._result.set_return_code(ReturnCode.INVALID_ARGUMENT) self._result.set_return_code(ReturnCode.INVALID_ARGUMENT)
logger.error( logger.error(
...@@ -144,7 +143,7 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901 ...@@ -144,7 +143,7 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901
if rank > 0: if rank > 0:
return True return True
self._result.add_raw_data('raw_output_' + self._args.operations[cmd_idx], raw_output) self._result.add_raw_data('raw_output_' + self._args.operation, raw_output)
content = raw_output.splitlines() content = raw_output.splitlines()
size = -1 size = -1
...@@ -189,15 +188,9 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901 ...@@ -189,15 +188,9 @@ def _process_raw_result(self, cmd_idx, raw_output): # noqa: C901
busbw_out = float(line[busbw_index]) busbw_out = float(line[busbw_index])
time_out = float(line[time_index]) time_out = float(line[time_index])
algbw_out = float(line[algbw_index]) algbw_out = float(line[algbw_index])
self._result.add_result( self._result.add_result(self._args.operation + '_' + str(size) + '_busbw', busbw_out)
'NCCL_' + self._args.operations[cmd_idx] + '_' + str(size) + '_busbw', busbw_out self._result.add_result(self._args.operation + '_' + str(size) + '_algbw', algbw_out)
) self._result.add_result(self._args.operation + '_' + str(size) + '_time', time_out)
self._result.add_result(
'NCCL_' + self._args.operations[cmd_idx] + '_' + str(size) + '_algbw', algbw_out
)
self._result.add_result(
'NCCL_' + self._args.operations[cmd_idx] + '_' + str(size) + '_time', time_out
)
except BaseException as e: except BaseException as e:
logger.error( logger.error(
'The result format is invalid - round: {}, benchmark: {}, raw output: {}, message: {}.'.format( 'The result format is invalid - round: {}, benchmark: {}, raw output: {}, message: {}.'.format(
......
...@@ -39,7 +39,7 @@ def test_nccl_bw_performance(self): ...@@ -39,7 +39,7 @@ def test_nccl_bw_performance(self):
predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CUDA) predefine_params) = BenchmarkRegistry._BenchmarkRegistry__select_benchmark(benchmark_name, Platform.CUDA)
assert (benchmark_class) assert (benchmark_class)
benchmark = benchmark_class(benchmark_name) benchmark = benchmark_class(benchmark_name, parameters='--ngpus 8')
ret = benchmark._preprocess() ret = benchmark._preprocess()
assert (ret is True) assert (ret is True)
...@@ -51,11 +51,7 @@ def test_nccl_bw_performance(self): ...@@ -51,11 +51,7 @@ def test_nccl_bw_performance(self):
assert (benchmark.type == BenchmarkType.MICRO) assert (benchmark.type == BenchmarkType.MICRO)
# Check parameters specified in BenchmarkContext. # Check parameters specified in BenchmarkContext.
assert ( assert (benchmark._args.operation == 'allreduce')
benchmark._args.operations == [
'allreduce', 'allgather', 'broadcast', 'reduce', 'reducescatter', 'alltoall'
]
)
assert (benchmark._args.ngpus == 8) assert (benchmark._args.ngpus == 8)
assert (benchmark._args.minbytes == '8') assert (benchmark._args.minbytes == '8')
assert (benchmark._args.maxbytes == '8G') assert (benchmark._args.maxbytes == '8G')
...@@ -70,9 +66,8 @@ def test_nccl_bw_performance(self): ...@@ -70,9 +66,8 @@ def test_nccl_bw_performance(self):
'alltoall_perf' 'alltoall_perf'
] ]
for i in range(len(benchmark._args.operations)): command = bin_names[0] + benchmark._commands[0].split(bin_names[0])[1]
command = bin_names[i] + benchmark._commands[i].split(bin_names[i])[1] expected_command = '{} -b 8 -e 8G -f 2 -g 8 -c 0 -n 20 -w 5'.format(bin_names[0])
expected_command = '{} -b 8 -e 8G -f 2 -g 8 -c 0 -n 20 -w 5'.format(bin_names[i])
assert (command == expected_command) assert (command == expected_command)
# Check results and metrics. # Check results and metrics.
...@@ -411,18 +406,20 @@ def test_nccl_bw_performance(self): ...@@ -411,18 +406,20 @@ def test_nccl_bw_performance(self):
""" """
for i, op in enumerate(benchmark._args.operations): for op in raw_output.keys():
assert (benchmark._process_raw_result(i, raw_output[op])) benchmark._args.operation = op
assert (benchmark._process_raw_result(0, raw_output[op]))
for name in ['time', 'algbw', 'busbw']: for name in ['time', 'algbw', 'busbw']:
for size in ['8589934592', '4294967296', '2147483648', '1073741824', '536870912', '32']: for size in ['8589934592', '4294967296', '2147483648', '1073741824', '536870912', '32']:
metric = 'NCCL_' + op + '_' + size + '_' + name metric = op + '_' + size + '_' + name
assert (metric in benchmark.result) assert (metric in benchmark.result)
assert (len(benchmark.result[metric]) == 1) assert (len(benchmark.result[metric]) == 1)
assert (isinstance(benchmark.result[metric][0], numbers.Number)) assert (isinstance(benchmark.result[metric][0], numbers.Number))
assert (benchmark.result['NCCL_allreduce_8589934592_time'][0] == 63896.0) assert (benchmark.result['allreduce_8589934592_time'][0] == 63896.0)
assert (benchmark.result['NCCL_allreduce_8589934592_algbw'][0] == 134.44) assert (benchmark.result['allreduce_8589934592_algbw'][0] == 134.44)
assert (benchmark.result['NCCL_allreduce_8589934592_busbw'][0] == 235.26) assert (benchmark.result['allreduce_8589934592_busbw'][0] == 235.26)
assert (benchmark.result['NCCL_alltoall_8589934592_time'][0] == 33508.0) assert (benchmark.result['alltoall_8589934592_time'][0] == 33508.0)
assert (benchmark.result['NCCL_alltoall_8589934592_algbw'][0] == 256.36) assert (benchmark.result['alltoall_8589934592_algbw'][0] == 256.36)
assert (benchmark.result['NCCL_alltoall_8589934592_busbw'][0] == 224.31) assert (benchmark.result['alltoall_8589934592_busbw'][0] == 224.31)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment