Commit a4e18b37 authored by Ramesh Errabolu's avatar Ramesh Errabolu
Browse files

Propagate validation error signal clearly

parent ae50c093
......@@ -117,19 +117,20 @@ void RocmBandwidthTest::InitializeSrcBuffer(size_t size, void* buf_cpy,
ErrorCheck(err_);
}
// If Copy device is a Gpu setup buffer access
// If copying agent is a CPU, use memcpy to initialize copy buffer
hsa_device_type_t cpy_dev_type = agent_list_[cpy_dev_idx].device_type_;
if (cpy_dev_type == HSA_DEVICE_TYPE_GPU) {
AcquireAccess(cpy_agent, init_src_);
hsa_signal_store_relaxed(init_signal_, 1);
copy_buffer(buf_cpy, cpy_agent,
init_src_, cpu_agent_,
size, init_signal_);
if (cpy_dev_type == HSA_DEVICE_TYPE_CPU) {
memcpy(buf_cpy, init_src_, size);
return;
}
// Copy initialization buffer into copy buffer
memcpy(buf_cpy, init_src_, size);
// Copying device is a Gpu, setup buffer access
// before copying initialization buffer
AcquireAccess(cpy_agent, init_src_);
hsa_signal_store_relaxed(init_signal_, 1);
copy_buffer(buf_cpy, cpy_agent,
init_src_, cpu_agent_,
size, init_signal_);
return;
}
......@@ -158,7 +159,7 @@ bool RocmBandwidthTest::ValidateDstBuffer(size_t max_size, size_t curr_size, voi
memcpy(validate_dst_, buf_cpy, curr_size);
}
// Copy initialization buffer into copy buffer
// Compare initialization buffer with validation buffer
err_ = (hsa_status_t)memcmp(init_src_, validate_dst_, curr_size);
if (err_ != HSA_STATUS_SUCCESS) {
exit_value_ = err_;
......@@ -637,16 +638,17 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
}
}
// Get Cpu min copy time
// Get Cpu min and mean times for copy
// Push them into the Cpu time list
trans.cpu_min_time_.push_back(GetMinTime(cpu_time));
// Get Cpu mean copy time and store to the array
trans.cpu_avg_time_.push_back(GetMeanTime(cpu_time));
if (print_cpu_time_ == false) {
if (trans.copy.uses_gpu_) {
// Get Gpu min and mean copy times
double min_time = (verify) ? GetMinTime(gpu_time) : std::numeric_limits<double>::max();
double mean_time = (verify) ? GetMeanTime(gpu_time) : std::numeric_limits<double>::max();
// Push them into the Gpu time list
double min_time = (verify) ? GetMinTime(gpu_time) : VALIDATE_COPY_OP_FAILURE;
double mean_time = (verify) ? GetMeanTime(gpu_time) : VALIDATE_COPY_OP_FAILURE;
trans.gpu_min_time_.push_back(min_time);
trans.gpu_avg_time_.push_back(mean_time);
}
......@@ -791,7 +793,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
// Initialize version of the test
version_.major_id = 2;
version_.minor_id = 3;
version_.step_id = 5;
version_.step_id = 6;
version_.reserved = 0;
bw_iter_cnt_ = getenv("ROCM_BW_ITER_CNT");
......
......@@ -472,6 +472,9 @@ class RocmBandwidthTest : public BaseTest {
static const uint32_t LINK_PROP_TYPE = 0x01;
static const uint32_t LINK_PROP_WEIGHT = 0x02;
static const uint32_t LINK_PROP_ACCESS = 0x03;
// Encodes validation failure
static const double VALIDATE_COPY_OP_FAILURE = 0xFFFFFFFF.FFFFFFFFp0;
// List used to store transactions per user request
vector<async_trans_t> trans_list_;
......
......@@ -318,7 +318,7 @@ void RocmBandwidthTest::PrintPerfMatrix(bool validate, bool peak, double* perf_m
if (validate) {
if (value == 0) {
std::cout << "N/A";
} else if (value < 1) {
} else if (value == VALIDATE_COPY_OP_FAILURE) {
std::cout << "FAIL";
} else {
std::cout << "PASS";
......
......@@ -394,7 +394,7 @@ void RocmBandwidthTest::ComputeCopyTime(std::vector<async_trans_t>& trans_list)
}
void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
// Get the frequency of Gpu Timestamping
uint64_t sys_freq = 0;
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sys_freq);
......@@ -426,12 +426,12 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
avg_bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000;
peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000;
} else {
if (print_cpu_time_ == false) {
avg_time = trans.gpu_avg_time_[idx] / sys_freq;
min_time = trans.gpu_min_time_[idx] / sys_freq;
} else {
if (print_cpu_time_) {
avg_time = trans.cpu_avg_time_[idx];
min_time = trans.cpu_min_time_[idx];
} else {
avg_time = trans.gpu_avg_time_[idx] / sys_freq;
min_time = trans.gpu_min_time_[idx] / sys_freq;
}
avg_bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000;
peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000;
......@@ -439,6 +439,23 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
trans.min_time_.push_back(min_time);
trans.avg_time_.push_back(avg_time);
// Check validation failures as that signal is
// captured via Min and Avg time values. If there
// is a failure propagate that value as computed
// bandwidth
if (validate_) {
avg_time = trans.gpu_avg_time_[idx];
min_time = trans.gpu_min_time_[idx];
if ((avg_time == VALIDATE_COPY_OP_FAILURE) &&
(min_time == VALIDATE_COPY_OP_FAILURE)) {
trans.avg_bandwidth_.push_back(avg_time);
trans.peak_bandwidth_.push_back(min_time);
continue;
}
}
// Update computed bandwidth for the transaction
trans.avg_bandwidth_.push_back(avg_bandwidth);
trans.peak_bandwidth_.push_back(peak_bandwidth);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment