Unverified Commit 682b2c12 authored by Ziyue Yang's avatar Ziyue Yang Committed by GitHub
Browse files

Benchmarks: Revise Code - Make data checking in gpu_copy optional (#301)

This commit makes data checking in gpu_copy optional, because it will take too long time if message size is large.
parent 85389055
......@@ -20,6 +20,7 @@
# )
# For bidirectional test, please specify parameters as the following.
# parameters='--mem_type htod dtod --copy_type sm dma --bidirectional'
# To enable data checking, please add '--check_data'.
benchmark = BenchmarkRegistry.launch_benchmark(context)
if benchmark:
......
......@@ -75,6 +75,12 @@ def add_parser_arguments(self):
help='Enable bidirectional test',
)
self._parser.add_argument(
'--check_data',
action='store_true',
help='Enable data checking',
)
def _preprocess(self):
"""Preprocess/preparation operations before the benchmarking.
......@@ -97,6 +103,9 @@ def _preprocess(self):
if self._args.bidirectional:
args += ' --bidirectional'
if self._args.check_data:
args += ' --check_data'
self._commands = ['%s %s' % (self.__bin_path, args)]
return True
......
......@@ -83,6 +83,9 @@ struct BenchArgs {
// Uses SM copy, otherwise DMA copy.
bool is_sm_copy = false;
// Whether check data after copy.
bool check_data = false;
// Sub-benchmarks in parallel.
SubBenchArgs subs[kMaxNumSubs];
};
......@@ -115,6 +118,9 @@ struct Opts {
// Whether bidirectional transfer is enabled.
bool bidirectional_enabled = false;
// Whether check data after copy.
bool check_data = false;
};
// Print usage of this program.
......@@ -128,7 +134,8 @@ void PrintUsage() {
"[--htod] "
"[--dtoh] "
"[--dtod] "
"[--bidirectional]\n");
"[--bidirectional] "
"[--check_data]\n");
}
// Parse options of this program.
......@@ -142,7 +149,8 @@ int ParseOpts(int argc, char **argv, Opts *opts) {
kEnableHToD,
kEnableDToH,
kEnableDToD,
kEnableBidirectional
kEnableBidirectional,
kEnableCheckData
};
const struct option options[] = {
{"size", required_argument, nullptr, static_cast<int>(OptIdx::kSize)},
......@@ -153,7 +161,8 @@ int ParseOpts(int argc, char **argv, Opts *opts) {
{"htod", no_argument, nullptr, static_cast<int>(OptIdx::kEnableHToD)},
{"dtoh", no_argument, nullptr, static_cast<int>(OptIdx::kEnableDToH)},
{"dtod", no_argument, nullptr, static_cast<int>(OptIdx::kEnableDToD)},
{"bidirectional", no_argument, nullptr, static_cast<int>(OptIdx::kEnableBidirectional)}};
{"bidirectional", no_argument, nullptr, static_cast<int>(OptIdx::kEnableBidirectional)},
{"check_data", no_argument, nullptr, static_cast<int>(OptIdx::kEnableCheckData)}};
int getopt_ret = 0;
int opt_idx = 0;
bool size_specified = false;
......@@ -214,6 +223,9 @@ int ParseOpts(int argc, char **argv, Opts *opts) {
case static_cast<int>(OptIdx::kEnableBidirectional):
opts->bidirectional_enabled = true;
break;
case static_cast<int>(OptIdx::kEnableCheckData):
opts->check_data = true;
break;
default:
parse_err = true;
}
......@@ -258,12 +270,14 @@ int PrepareBufAndStream(BenchArgs *args) {
// Generate data to copy
sub.data_buf = static_cast<uint8_t *>(numa_alloc_onnode(args->size, args->numa_id));
for (int j = 0; j < args->size; j++) {
sub.data_buf[j] = static_cast<uint8_t>(j % uint8_mod);
}
// Allocate check buffer
sub.check_buf = static_cast<uint8_t *>(numa_alloc_onnode(args->size, args->numa_id));
if (args->check_data) {
for (int j = 0; j < args->size; j++) {
sub.data_buf[j] = static_cast<uint8_t>(j % uint8_mod);
}
// Allocate check buffer
sub.check_buf = static_cast<uint8_t *>(numa_alloc_onnode(args->size, args->numa_id));
}
// Allocate buffers for src/dst devices
constexpr int num_devices = 2;
......@@ -668,7 +682,7 @@ int RunBench(BenchArgs *args) {
goto destroy_event;
}
ret = RunCopy(args);
if (ret == 0) {
if (ret == 0 && args->check_data) {
ret = CheckBuf(args);
}
destroy_event:
......@@ -750,6 +764,7 @@ int main(int argc, char **argv) {
args.num_warm_up = opts.num_warm_up;
args.num_loops = opts.num_loops;
args.size = opts.size;
args.check_data = opts.check_data;
// Get number of NUMA nodes
if (numa_available()) {
......
......@@ -86,7 +86,24 @@ superbench:
parallel: no
parameters:
block_devices: []
gpu-copy-bw:
gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true
modes:
- name: local
......
......@@ -87,7 +87,24 @@ superbench:
parallel: no
parameters:
block_devices: []
gpu-copy-bw:
gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true
modes:
- name: local
......
......@@ -102,7 +102,24 @@ superbench:
rand_read_runtime: 60
rand_write_runtime: 60
rand_readwrite_runtime: 60
gpu-copy-bw:
gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true
modes:
- name: local
......
......@@ -98,7 +98,24 @@ superbench:
rand_read_runtime: 60
rand_write_runtime: 60
rand_readwrite_runtime: 60
gpu-copy-bw:
gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true
modes:
- name: local
......
......@@ -79,7 +79,24 @@ superbench:
proc_num: 8
prefix: CUDA_VISIBLE_DEVICES={proc_rank} numactl -N $(({proc_rank}/2))
parallel: no
gpu-copy-bw:
gpu-copy-bw:correctness:
enable: true
modes:
- name: local
parallel: no
parameters:
mem_type:
- htod
- dtoh
- dtod
copy_type:
- sm
- dma
size: 4096
num_warm_up: 0
num_loops: 1
check_data: true
gpu-copy-bw:perf:
enable: true
modes:
- name: local
......
......@@ -34,7 +34,7 @@ def _test_gpu_copy_bw_performance_command_generation(self, platform):
copy_types = ['sm', 'dma']
parameters = '--mem_type %s --copy_type %s --size %d ' \
'--num_warm_up %d --num_loops %d --bidirectional' % \
'--num_warm_up %d --num_loops %d --bidirectional --check_data' % \
(' '.join(mem_types), ' '.join(copy_types), size, num_warm_up, num_loops)
benchmark = benchmark_class(benchmark_name, parameters=parameters)
......@@ -53,6 +53,7 @@ def _test_gpu_copy_bw_performance_command_generation(self, platform):
assert (benchmark._args.num_warm_up == num_warm_up)
assert (benchmark._args.num_loops == num_loops)
assert (benchmark._args.bidirectional)
assert (benchmark._args.check_data)
# Check command
assert (1 == len(benchmark._commands))
......@@ -65,6 +66,7 @@ def _test_gpu_copy_bw_performance_command_generation(self, platform):
assert ('--num_warm_up %d' % num_warm_up in benchmark._commands[0])
assert ('--num_loops %d' % num_loops in benchmark._commands[0])
assert ('--bidirectional' in benchmark._commands[0])
assert ('--check_data' in benchmark._commands[0])
@decorator.cuda_test
def test_gpu_copy_bw_performance_command_generation_cuda(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment