Commit 88d4ad20 authored by Ramesh Errabolu's avatar Ramesh Errabolu
Browse files

Refactor validation signal treatment in a device agnostic manner

parent d1ac47c6
......@@ -641,17 +641,25 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
}
}
// Get Cpu min and mean times for copy
// Push them into the Cpu time list
trans.cpu_min_time_.push_back(GetMinTime(cpu_time));
trans.cpu_avg_time_.push_back(GetMeanTime(cpu_time));
// Collecting Cpu time. Capture verify failures if any
// Get min and mean copy times and collect them into Cpu
// time list
double min_time = 0;
double mean_time = 0;
if (print_cpu_time_) {
min_time = (verify) ? GetMinTime(cpu_time) : VALIDATE_COPY_OP_FAILURE;
mean_time = (verify) ? GetMeanTime(cpu_time) : VALIDATE_COPY_OP_FAILURE;
trans.cpu_min_time_.push_back(min_time);
trans.cpu_avg_time_.push_back(mean_time);
}
// Collecting Gpu time. Capture verify failures if any
// Get min and mean copy times and collect them into Gpu
// time list
if (print_cpu_time_ == false) {
if (trans.copy.uses_gpu_) {
// Get Gpu min and mean copy times
// Push them into the Gpu time list
double min_time = (verify) ? GetMinTime(gpu_time) : VALIDATE_COPY_OP_FAILURE;
double mean_time = (verify) ? GetMeanTime(gpu_time) : VALIDATE_COPY_OP_FAILURE;
min_time = (verify) ? GetMinTime(gpu_time) : VALIDATE_COPY_OP_FAILURE;
mean_time = (verify) ? GetMeanTime(gpu_time) : VALIDATE_COPY_OP_FAILURE;
trans.gpu_min_time_.push_back(min_time);
trans.gpu_avg_time_.push_back(mean_time);
}
......@@ -796,7 +804,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
// Initialize version of the test
version_.major_id = 2;
version_.minor_id = 3;
version_.step_id = 7;
version_.step_id = 9;
version_.reserved = 0;
bw_iter_cnt_ = getenv("ROCM_BW_ITER_CNT");
......
......@@ -147,9 +147,7 @@ void RocmBandwidthTest::ValidateCopyUnidirFlags(uint32_t copy_mask,
// It is illegal to specify Latency and another
// secondary flag that affects a copy operation
if ((copy_ctrl_mask & DEV_COPY_LATENCY) &&
((copy_ctrl_mask & USR_BUFFER_INIT) ||
(copy_ctrl_mask & CPU_VISIBLE_TIME) ||
(copy_ctrl_mask & VALIDATE_COPY_OP))) {
(copy_ctrl_mask & VALIDATE_COPY_OP)) {
PrintHelpScreen();
exit(0);
}
......
......@@ -70,13 +70,10 @@ void RocmBandwidthTest::PrintHelpScreen() {
std::cout << std::endl;
std::cout << "\t NOTE: Mixing following options is illegal/unsupported" << std::endl;
std::cout << "\t\t Case 1: rocm_bandwidth_test -a or -A with -c" << std::endl;
std::cout << "\t\t Case 2: rocm_bandwidth_test -b or -A with -m" << std::endl;
std::cout << "\t\t Case 3: rocm_bandwidth_test -b or -A with -l" << std::endl;
std::cout << "\t\t Case 4: rocm_bandwidth_test -b or -A with -v" << std::endl;
std::cout << "\t\t Case 5: rocm_bandwidth_test -a or -s x -d y with -l and -c" << std::endl;
std::cout << "\t\t Case 6: rocm_bandwidth_test -a or -s x -d y with -l and -m" << std::endl;
std::cout << "\t\t Case 7: rocm_bandwidth_test -a or -s x -d y with -l and -v" << std::endl;
std::cout << "\t\t Case 1: rocm_bandwidth_test -a with {lm}{1,}" << std::endl;
std::cout << "\t\t Case 2: rocm_bandwidth_test -b with {clv}{1,}" << std::endl;
std::cout << "\t\t Case 3: rocm_bandwidth_test -A with {clmv}{1,}" << std::endl;
std::cout << "\t\t Case 4: rocm_bandwidth_test -s x -d y with {lmv}{2,}" << std::endl;
std::cout << std::endl;
std::cout << std::endl;
......@@ -152,11 +149,19 @@ void RocmBandwidthTest::PrintTopology() {
if (HSA_DEVICE_TYPE_CPU == node.agent.device_type_) {
std::cout << " Device Type: CPU" << std::endl;
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << " Device Name: " << node.agent.name_ << std::endl;
} else if (HSA_DEVICE_TYPE_GPU == node.agent.device_type_) {
std::cout << " Device Type: GPU" << std::endl;
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << " Device Name: " << node.agent.name_ << std::endl;
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << " Device BDF: " << node.agent.bdf_id_ << std::endl;
}
......
......@@ -418,44 +418,45 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
data_size += data_size;
}
// Copy operation does not involve a Gpu device
// Divide bandwidth with 10^9 to get size in GigaBytes (10^9)
if (trans.copy.uses_gpu_ != true) {
// Get time taken by copy operation
if ((print_cpu_time_) ||
(trans.copy.uses_gpu_ != true)) {
avg_time = trans.cpu_avg_time_[idx];
min_time = trans.cpu_min_time_[idx];
avg_bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000;
peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000;
} else {
if (print_cpu_time_) {
avg_time = trans.cpu_avg_time_[idx];
min_time = trans.cpu_min_time_[idx];
} else {
avg_time = trans.gpu_avg_time_[idx] / sys_freq;
min_time = trans.gpu_min_time_[idx] / sys_freq;
}
avg_bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000;
peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000;
avg_time = trans.gpu_avg_time_[idx];
min_time = trans.gpu_min_time_[idx];
}
trans.min_time_.push_back(min_time);
trans.avg_time_.push_back(avg_time);
// Determine if there was a validation failure
// @note: Value is set to VALIDATE_COPY_OP_FAILURE
// if copy transaction wa validated and it failed
hsa_status_t verify_status = HSA_STATUS_ERROR;
if ((avg_time != VALIDATE_COPY_OP_FAILURE) &&
(min_time != VALIDATE_COPY_OP_FAILURE)) {
verify_status = HSA_STATUS_SUCCESS;
}
// Check validation failures as that signal is
// captured via Min and Avg time values. If there
// is a failure propagate that value as computed
// bandwidth
if (validate_) {
avg_time = trans.gpu_avg_time_[idx];
min_time = trans.gpu_min_time_[idx];
if ((avg_time == VALIDATE_COPY_OP_FAILURE) &&
(min_time == VALIDATE_COPY_OP_FAILURE)) {
trans.avg_bandwidth_.push_back(avg_time);
trans.peak_bandwidth_.push_back(min_time);
continue;
}
// Adjust Gpu time if there is no validation error
if ((trans.copy.uses_gpu_) &&
(print_cpu_time_ == false) &&
(verify_status == HSA_STATUS_SUCCESS)) {
avg_time = avg_time / sys_freq;
min_time = min_time / sys_freq;
}
// Compute bandwidth - divide bandwidth with
// 10^9 not 1024^3 to get size in GigaBytes
// @note: For validation failures bandwidth
// is encoded by VALIDATE_COPY_OP_FAILURE
if (verify_status == HSA_STATUS_SUCCESS) {
avg_bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000;
peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000;
}
// Update computed bandwidth for the transaction
trans.min_time_.push_back(min_time);
trans.avg_time_.push_back(avg_time);
trans.avg_bandwidth_.push_back(avg_bandwidth);
trans.peak_bandwidth_.push_back(peak_bandwidth);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment