Unverified Commit 682b2c12 authored by Ziyue Yang's avatar Ziyue Yang Committed by GitHub
Browse files

Benchmarks: Revise Code - Make data checking in gpu_copy optional (#301)

This commit makes data checking in gpu_copy optional, because it will take too long time if message size is large.
parent 85389055
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
# ) # )
# For bidirectional test, please specify parameters as the following. # For bidirectional test, please specify parameters as the following.
# parameters='--mem_type htod dtod --copy_type sm dma --bidirectional' # parameters='--mem_type htod dtod --copy_type sm dma --bidirectional'
# To enable data checking, please add '--check_data'.
benchmark = BenchmarkRegistry.launch_benchmark(context) benchmark = BenchmarkRegistry.launch_benchmark(context)
if benchmark: if benchmark:
......
...@@ -75,6 +75,12 @@ def add_parser_arguments(self): ...@@ -75,6 +75,12 @@ def add_parser_arguments(self):
help='Enable bidirectional test', help='Enable bidirectional test',
) )
self._parser.add_argument(
'--check_data',
action='store_true',
help='Enable data checking',
)
def _preprocess(self): def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking. """Preprocess/preparation operations before the benchmarking.
...@@ -97,6 +103,9 @@ def _preprocess(self): ...@@ -97,6 +103,9 @@ def _preprocess(self):
if self._args.bidirectional: if self._args.bidirectional:
args += ' --bidirectional' args += ' --bidirectional'
if self._args.check_data:
args += ' --check_data'
self._commands = ['%s %s' % (self.__bin_path, args)] self._commands = ['%s %s' % (self.__bin_path, args)]
return True return True
......
...@@ -83,6 +83,9 @@ struct BenchArgs { ...@@ -83,6 +83,9 @@ struct BenchArgs {
// Uses SM copy, otherwise DMA copy. // Uses SM copy, otherwise DMA copy.
bool is_sm_copy = false; bool is_sm_copy = false;
// Whether check data after copy.
bool check_data = false;
// Sub-benchmarks in parallel. // Sub-benchmarks in parallel.
SubBenchArgs subs[kMaxNumSubs]; SubBenchArgs subs[kMaxNumSubs];
}; };
...@@ -115,6 +118,9 @@ struct Opts { ...@@ -115,6 +118,9 @@ struct Opts {
// Whether bidirectional transfer is enabled. // Whether bidirectional transfer is enabled.
bool bidirectional_enabled = false; bool bidirectional_enabled = false;
// Whether check data after copy.
bool check_data = false;
}; };
// Print usage of this program. // Print usage of this program.
...@@ -128,7 +134,8 @@ void PrintUsage() { ...@@ -128,7 +134,8 @@ void PrintUsage() {
"[--htod] " "[--htod] "
"[--dtoh] " "[--dtoh] "
"[--dtod] " "[--dtod] "
"[--bidirectional]\n"); "[--bidirectional] "
"[--check_data]\n");
} }
// Parse options of this program. // Parse options of this program.
...@@ -142,7 +149,8 @@ int ParseOpts(int argc, char **argv, Opts *opts) { ...@@ -142,7 +149,8 @@ int ParseOpts(int argc, char **argv, Opts *opts) {
kEnableHToD, kEnableHToD,
kEnableDToH, kEnableDToH,
kEnableDToD, kEnableDToD,
kEnableBidirectional kEnableBidirectional,
kEnableCheckData
}; };
const struct option options[] = { const struct option options[] = {
{"size", required_argument, nullptr, static_cast<int>(OptIdx::kSize)}, {"size", required_argument, nullptr, static_cast<int>(OptIdx::kSize)},
...@@ -153,7 +161,8 @@ int ParseOpts(int argc, char **argv, Opts *opts) { ...@@ -153,7 +161,8 @@ int ParseOpts(int argc, char **argv, Opts *opts) {
{"htod", no_argument, nullptr, static_cast<int>(OptIdx::kEnableHToD)}, {"htod", no_argument, nullptr, static_cast<int>(OptIdx::kEnableHToD)},
{"dtoh", no_argument, nullptr, static_cast<int>(OptIdx::kEnableDToH)}, {"dtoh", no_argument, nullptr, static_cast<int>(OptIdx::kEnableDToH)},
{"dtod", no_argument, nullptr, static_cast<int>(OptIdx::kEnableDToD)}, {"dtod", no_argument, nullptr, static_cast<int>(OptIdx::kEnableDToD)},
{"bidirectional", no_argument, nullptr, static_cast<int>(OptIdx::kEnableBidirectional)}}; {"bidirectional", no_argument, nullptr, static_cast<int>(OptIdx::kEnableBidirectional)},
{"check_data", no_argument, nullptr, static_cast<int>(OptIdx::kEnableCheckData)}};
int getopt_ret = 0; int getopt_ret = 0;
int opt_idx = 0; int opt_idx = 0;
bool size_specified = false; bool size_specified = false;
...@@ -214,6 +223,9 @@ int ParseOpts(int argc, char **argv, Opts *opts) { ...@@ -214,6 +223,9 @@ int ParseOpts(int argc, char **argv, Opts *opts) {
case static_cast<int>(OptIdx::kEnableBidirectional): case static_cast<int>(OptIdx::kEnableBidirectional):
opts->bidirectional_enabled = true; opts->bidirectional_enabled = true;
break; break;
case static_cast<int>(OptIdx::kEnableCheckData):
opts->check_data = true;
break;
default: default:
parse_err = true; parse_err = true;
} }
...@@ -258,12 +270,14 @@ int PrepareBufAndStream(BenchArgs *args) { ...@@ -258,12 +270,14 @@ int PrepareBufAndStream(BenchArgs *args) {
// Generate data to copy // Generate data to copy
sub.data_buf = static_cast<uint8_t *>(numa_alloc_onnode(args->size, args->numa_id)); sub.data_buf = static_cast<uint8_t *>(numa_alloc_onnode(args->size, args->numa_id));
if (args->check_data) {
for (int j = 0; j < args->size; j++) { for (int j = 0; j < args->size; j++) {
sub.data_buf[j] = static_cast<uint8_t>(j % uint8_mod); sub.data_buf[j] = static_cast<uint8_t>(j % uint8_mod);
} }
// Allocate check buffer // Allocate check buffer
sub.check_buf = static_cast<uint8_t *>(numa_alloc_onnode(args->size, args->numa_id)); sub.check_buf = static_cast<uint8_t *>(numa_alloc_onnode(args->size, args->numa_id));
}
// Allocate buffers for src/dst devices // Allocate buffers for src/dst devices
constexpr int num_devices = 2; constexpr int num_devices = 2;
...@@ -668,7 +682,7 @@ int RunBench(BenchArgs *args) { ...@@ -668,7 +682,7 @@ int RunBench(BenchArgs *args) {
goto destroy_event; goto destroy_event;
} }
ret = RunCopy(args); ret = RunCopy(args);
if (ret == 0) { if (ret == 0 && args->check_data) {
ret = CheckBuf(args); ret = CheckBuf(args);
} }
destroy_event: destroy_event:
...@@ -750,6 +764,7 @@ int main(int argc, char **argv) { ...@@ -750,6 +764,7 @@ int main(int argc, char **argv) {
args.num_warm_up = opts.num_warm_up; args.num_warm_up = opts.num_warm_up;
args.num_loops = opts.num_loops; args.num_loops = opts.num_loops;
args.size = opts.size; args.size = opts.size;
args.check_data = opts.check_data;
// Get number of NUMA nodes // Get number of NUMA nodes
if (numa_available()) { if (numa_available()) {
......
...@@ -86,7 +86,24 @@ superbench: ...@@ -86,7 +86,24 @@ superbench:
parallel: no parallel: no
parameters: parameters:
block_devices: [] block_devices: []
gpu-copy-bw: gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true enable: true
modes: modes:
- name: local - name: local
......
...@@ -87,7 +87,24 @@ superbench: ...@@ -87,7 +87,24 @@ superbench:
parallel: no parallel: no
parameters: parameters:
block_devices: [] block_devices: []
gpu-copy-bw: gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true enable: true
modes: modes:
- name: local - name: local
......
...@@ -102,7 +102,24 @@ superbench: ...@@ -102,7 +102,24 @@ superbench:
rand_read_runtime: 60 rand_read_runtime: 60
rand_write_runtime: 60 rand_write_runtime: 60
rand_readwrite_runtime: 60 rand_readwrite_runtime: 60
gpu-copy-bw: gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true enable: true
modes: modes:
- name: local - name: local
......
...@@ -98,7 +98,24 @@ superbench: ...@@ -98,7 +98,24 @@ superbench:
rand_read_runtime: 60 rand_read_runtime: 60
rand_write_runtime: 60 rand_write_runtime: 60
rand_readwrite_runtime: 60 rand_readwrite_runtime: 60
gpu-copy-bw: gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true enable: true
modes: modes:
- name: local - name: local
......
...@@ -79,7 +79,24 @@ superbench: ...@@ -79,7 +79,24 @@ superbench:
proc_num: 8 proc_num: 8
prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -N $(({proc_rank}/2)) prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -N $(({proc_rank}/2))
parallel: no parallel: no
gpu-copy-bw: gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true enable: true
modes: modes:
- name: local - name: local
......
...@@ -34,7 +34,7 @@ def _test_gpu_copy_bw_performance_command_generation(self, platform): ...@@ -34,7 +34,7 @@ def _test_gpu_copy_bw_performance_command_generation(self, platform):
copy_types = ['sm', 'dma'] copy_types = ['sm', 'dma']
parameters = '--mem_type %s --copy_type %s --size %d ' \ parameters = '--mem_type %s --copy_type %s --size %d ' \
'--num_warm_up %d --num_loops %d --bidirectional' % \ '--num_warm_up %d --num_loops %d --bidirectional --check_data' % \
(' '.join(mem_types), ' '.join(copy_types), size, num_warm_up, num_loops) (' '.join(mem_types), ' '.join(copy_types), size, num_warm_up, num_loops)
benchmark = benchmark_class(benchmark_name, parameters=parameters) benchmark = benchmark_class(benchmark_name, parameters=parameters)
...@@ -53,6 +53,7 @@ def _test_gpu_copy_bw_performance_command_generation(self, platform): ...@@ -53,6 +53,7 @@ def _test_gpu_copy_bw_performance_command_generation(self, platform):
assert (benchmark._args.num_warm_up == num_warm_up) assert (benchmark._args.num_warm_up == num_warm_up)
assert (benchmark._args.num_loops == num_loops) assert (benchmark._args.num_loops == num_loops)
assert (benchmark._args.bidirectional) assert (benchmark._args.bidirectional)
assert (benchmark._args.check_data)
# Check command # Check command
assert (1 == len(benchmark._commands)) assert (1 == len(benchmark._commands))
...@@ -65,6 +66,7 @@ def _test_gpu_copy_bw_performance_command_generation(self, platform): ...@@ -65,6 +66,7 @@ def _test_gpu_copy_bw_performance_command_generation(self, platform):
assert ('--num_warm_up %d' % num_warm_up in benchmark._commands[0]) assert ('--num_warm_up %d' % num_warm_up in benchmark._commands[0])
assert ('--num_loops %d' % num_loops in benchmark._commands[0]) assert ('--num_loops %d' % num_loops in benchmark._commands[0])
assert ('--bidirectional' in benchmark._commands[0]) assert ('--bidirectional' in benchmark._commands[0])
assert ('--check_data' in benchmark._commands[0])
@decorator.cuda_test @decorator.cuda_test
def test_gpu_copy_bw_performance_command_generation_cuda(self): def test_gpu_copy_bw_performance_command_generation_cuda(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment