Commit a4e18b37 authored by Ramesh Errabolu's avatar Ramesh Errabolu
Browse files

Propagate validation error signal clearly

parent ae50c093
...@@ -117,19 +117,20 @@ void RocmBandwidthTest::InitializeSrcBuffer(size_t size, void* buf_cpy, ...@@ -117,19 +117,20 @@ void RocmBandwidthTest::InitializeSrcBuffer(size_t size, void* buf_cpy,
ErrorCheck(err_); ErrorCheck(err_);
} }
// If Copy device is a Gpu setup buffer access // If copying agent is a CPU, use memcpy to initialize copy buffer
hsa_device_type_t cpy_dev_type = agent_list_[cpy_dev_idx].device_type_; hsa_device_type_t cpy_dev_type = agent_list_[cpy_dev_idx].device_type_;
if (cpy_dev_type == HSA_DEVICE_TYPE_GPU) { if (cpy_dev_type == HSA_DEVICE_TYPE_CPU) {
AcquireAccess(cpy_agent, init_src_); memcpy(buf_cpy, init_src_, size);
hsa_signal_store_relaxed(init_signal_, 1);
copy_buffer(buf_cpy, cpy_agent,
init_src_, cpu_agent_,
size, init_signal_);
return; return;
} }
// Copy initialization buffer into copy buffer // Copying device is a Gpu, setup buffer access
memcpy(buf_cpy, init_src_, size); // before copying initialization buffer
AcquireAccess(cpy_agent, init_src_);
hsa_signal_store_relaxed(init_signal_, 1);
copy_buffer(buf_cpy, cpy_agent,
init_src_, cpu_agent_,
size, init_signal_);
return; return;
} }
...@@ -158,7 +159,7 @@ bool RocmBandwidthTest::ValidateDstBuffer(size_t max_size, size_t curr_size, voi ...@@ -158,7 +159,7 @@ bool RocmBandwidthTest::ValidateDstBuffer(size_t max_size, size_t curr_size, voi
memcpy(validate_dst_, buf_cpy, curr_size); memcpy(validate_dst_, buf_cpy, curr_size);
} }
// Copy initialization buffer into copy buffer // Compare initialization buffer with validation buffer
err_ = (hsa_status_t)memcmp(init_src_, validate_dst_, curr_size); err_ = (hsa_status_t)memcmp(init_src_, validate_dst_, curr_size);
if (err_ != HSA_STATUS_SUCCESS) { if (err_ != HSA_STATUS_SUCCESS) {
exit_value_ = err_; exit_value_ = err_;
...@@ -637,16 +638,17 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -637,16 +638,17 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
} }
} }
// Get Cpu min copy time // Get Cpu min and mean times for copy
// Push them into the Cpu time list
trans.cpu_min_time_.push_back(GetMinTime(cpu_time)); trans.cpu_min_time_.push_back(GetMinTime(cpu_time));
// Get Cpu mean copy time and store to the array
trans.cpu_avg_time_.push_back(GetMeanTime(cpu_time)); trans.cpu_avg_time_.push_back(GetMeanTime(cpu_time));
if (print_cpu_time_ == false) { if (print_cpu_time_ == false) {
if (trans.copy.uses_gpu_) { if (trans.copy.uses_gpu_) {
// Get Gpu min and mean copy times // Get Gpu min and mean copy times
double min_time = (verify) ? GetMinTime(gpu_time) : std::numeric_limits<double>::max(); // Push them into the Gpu time list
double mean_time = (verify) ? GetMeanTime(gpu_time) : std::numeric_limits<double>::max(); double min_time = (verify) ? GetMinTime(gpu_time) : VALIDATE_COPY_OP_FAILURE;
double mean_time = (verify) ? GetMeanTime(gpu_time) : VALIDATE_COPY_OP_FAILURE;
trans.gpu_min_time_.push_back(min_time); trans.gpu_min_time_.push_back(min_time);
trans.gpu_avg_time_.push_back(mean_time); trans.gpu_avg_time_.push_back(mean_time);
} }
...@@ -791,7 +793,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() { ...@@ -791,7 +793,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
// Initialize version of the test // Initialize version of the test
version_.major_id = 2; version_.major_id = 2;
version_.minor_id = 3; version_.minor_id = 3;
version_.step_id = 5; version_.step_id = 6;
version_.reserved = 0; version_.reserved = 0;
bw_iter_cnt_ = getenv("ROCM_BW_ITER_CNT"); bw_iter_cnt_ = getenv("ROCM_BW_ITER_CNT");
......
...@@ -472,6 +472,9 @@ class RocmBandwidthTest : public BaseTest { ...@@ -472,6 +472,9 @@ class RocmBandwidthTest : public BaseTest {
static const uint32_t LINK_PROP_TYPE = 0x01; static const uint32_t LINK_PROP_TYPE = 0x01;
static const uint32_t LINK_PROP_WEIGHT = 0x02; static const uint32_t LINK_PROP_WEIGHT = 0x02;
static const uint32_t LINK_PROP_ACCESS = 0x03; static const uint32_t LINK_PROP_ACCESS = 0x03;
// Encodes validation failure
static const double VALIDATE_COPY_OP_FAILURE = 0xFFFFFFFF.FFFFFFFFp0;
// List used to store transactions per user request // List used to store transactions per user request
vector<async_trans_t> trans_list_; vector<async_trans_t> trans_list_;
......
...@@ -318,7 +318,7 @@ void RocmBandwidthTest::PrintPerfMatrix(bool validate, bool peak, double* perf_m ...@@ -318,7 +318,7 @@ void RocmBandwidthTest::PrintPerfMatrix(bool validate, bool peak, double* perf_m
if (validate) { if (validate) {
if (value == 0) { if (value == 0) {
std::cout << "N/A"; std::cout << "N/A";
} else if (value < 1) { } else if (value == VALIDATE_COPY_OP_FAILURE) {
std::cout << "FAIL"; std::cout << "FAIL";
} else { } else {
std::cout << "PASS"; std::cout << "PASS";
......
...@@ -394,7 +394,7 @@ void RocmBandwidthTest::ComputeCopyTime(std::vector<async_trans_t>& trans_list) ...@@ -394,7 +394,7 @@ void RocmBandwidthTest::ComputeCopyTime(std::vector<async_trans_t>& trans_list)
} }
void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) { void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
// Get the frequency of Gpu Timestamping // Get the frequency of Gpu Timestamping
uint64_t sys_freq = 0; uint64_t sys_freq = 0;
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sys_freq); hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sys_freq);
...@@ -426,12 +426,12 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) { ...@@ -426,12 +426,12 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
avg_bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000; avg_bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000;
peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000; peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000;
} else { } else {
if (print_cpu_time_ == false) { if (print_cpu_time_) {
avg_time = trans.gpu_avg_time_[idx] / sys_freq;
min_time = trans.gpu_min_time_[idx] / sys_freq;
} else {
avg_time = trans.cpu_avg_time_[idx]; avg_time = trans.cpu_avg_time_[idx];
min_time = trans.cpu_min_time_[idx]; min_time = trans.cpu_min_time_[idx];
} else {
avg_time = trans.gpu_avg_time_[idx] / sys_freq;
min_time = trans.gpu_min_time_[idx] / sys_freq;
} }
avg_bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000; avg_bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000;
peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000; peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000;
...@@ -439,6 +439,23 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) { ...@@ -439,6 +439,23 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
trans.min_time_.push_back(min_time); trans.min_time_.push_back(min_time);
trans.avg_time_.push_back(avg_time); trans.avg_time_.push_back(avg_time);
// Check validation failures as that signal is
// captured via Min and Avg time values. If there
// is a failure propagate that value as computed
// bandwidth
if (validate_) {
avg_time = trans.gpu_avg_time_[idx];
min_time = trans.gpu_min_time_[idx];
if ((avg_time == VALIDATE_COPY_OP_FAILURE) &&
(min_time == VALIDATE_COPY_OP_FAILURE)) {
trans.avg_bandwidth_.push_back(avg_time);
trans.peak_bandwidth_.push_back(min_time);
continue;
}
}
// Update computed bandwidth for the transaction
trans.avg_bandwidth_.push_back(avg_bandwidth); trans.avg_bandwidth_.push_back(avg_bandwidth);
trans.peak_bandwidth_.push_back(peak_bandwidth); trans.peak_bandwidth_.push_back(peak_bandwidth);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment