Unverified Commit 48f82ce6 authored by rerrabolu's avatar rerrabolu Committed by GitHub
Browse files

Merge pull request #3 from RadeonOpenCompute/addValidationMode

Update test to support runs in Validation Mode
parents d460bf9b 27996113
...@@ -62,7 +62,7 @@ const uint32_t RocmBandwidthTest::SIZE_LIST[] = { 1 * 1024, ...@@ -62,7 +62,7 @@ const uint32_t RocmBandwidthTest::SIZE_LIST[] = { 1 * 1024,
256 * 1024 * 1024, 512 * 1024 * 1024 }; 256 * 1024 * 1024, 512 * 1024 * 1024 };
uint32_t RocmBandwidthTest::GetIterationNum() { uint32_t RocmBandwidthTest::GetIterationNum() {
return num_iteration_ * 1.2 + 1; return (validate_) ? 1 : (num_iteration_ * 1.2 + 1);
} }
void RocmBandwidthTest::AcquireAccess(hsa_agent_t agent, void* ptr) { void RocmBandwidthTest::AcquireAccess(hsa_agent_t agent, void* ptr) {
...@@ -222,7 +222,6 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -222,7 +222,6 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
// Initialize size of buffer to equal the largest element of allocation // Initialize size of buffer to equal the largest element of allocation
uint32_t size_len = size_list_.size(); uint32_t size_len = size_list_.size();
uint32_t max_size = size_list_.back(); uint32_t max_size = size_list_.back();
// uint32_t max_size = size_list_.back() * 1024 * 1024;
// Bind to resources such as pool and agents that are involved // Bind to resources such as pool and agents that are involved
// in both forward and reverse copy operations // in both forward and reverse copy operations
...@@ -230,14 +229,11 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -230,14 +229,11 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
void* buf_dst_fwd; void* buf_dst_fwd;
void* buf_src_rev; void* buf_src_rev;
void* buf_dst_rev; void* buf_dst_rev;
void* host_src_fwd; void* validation_dst;
void* host_dst_fwd; void* validation_src;
void* host_src_rev;
void* host_dst_rev;
hsa_signal_t signal_fwd; hsa_signal_t signal_fwd;
hsa_signal_t signal_rev; hsa_signal_t signal_rev;
hsa_signal_t host_signal_fwd; hsa_signal_t validation_signal;
hsa_signal_t host_signal_rev;
uint32_t src_idx = trans.copy.src_idx_; uint32_t src_idx = trans.copy.src_idx_;
uint32_t dst_idx = trans.copy.dst_idx_; uint32_t dst_idx = trans.copy.dst_idx_;
uint32_t src_dev_idx_fwd = pool_list_[src_idx].agent_index_; uint32_t src_dev_idx_fwd = pool_list_[src_idx].agent_index_;
...@@ -272,36 +268,19 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -272,36 +268,19 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
signal_rev); signal_rev);
} }
if (verify_) { if (validate_) {
AllocateHostBuffers(max_size, AllocateHostBuffers(max_size,
src_dev_idx_fwd, src_dev_idx_fwd,
dst_dev_idx_fwd, dst_dev_idx_fwd,
host_src_fwd, host_dst_fwd, validation_src, validation_dst,
buf_src_fwd, buf_dst_fwd, buf_src_fwd, buf_dst_fwd,
src_agent_fwd, dst_agent_fwd, src_agent_fwd, dst_agent_fwd,
host_signal_fwd); validation_signal);
if (bidir) {
AllocateHostBuffers(max_size,
src_dev_idx_rev,
dst_dev_idx_rev,
host_src_rev, host_dst_rev,
buf_src_rev, buf_dst_rev,
src_agent_rev, dst_agent_rev,
host_signal_rev);
}
// Initialize source buffer with values from verification buffer // Initialize source buffer with values from verification buffer
copy_buffer(buf_src_fwd, src_agent_fwd, copy_buffer(buf_src_fwd, src_agent_fwd,
host_src_fwd, cpu_agent_, validation_src, cpu_agent_,
max_size, host_signal_fwd); max_size, validation_signal);
ErrorCheck(err_);
if (bidir) {
copy_buffer(buf_src_rev, src_agent_rev,
host_src_rev, cpu_agent_,
max_size, host_signal_rev);
ErrorCheck(err_);
}
} }
// Bind the number of iterations // Bind the number of iterations
...@@ -313,7 +292,6 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -313,7 +292,6 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
// This should not be happening // This should not be happening
uint32_t curr_size = size_list_[idx]; uint32_t curr_size = size_list_[idx];
// uint32_t curr_size = size_list_[idx] * 1024 * 1024;
if (curr_size > max_size) { if (curr_size > max_size) {
break; break;
} }
...@@ -331,17 +309,11 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -331,17 +309,11 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
hsa_signal_store_relaxed(signal_rev, 1); hsa_signal_store_relaxed(signal_rev, 1);
} }
if (verify_) { if (validate_) {
AcquirePoolAcceses(src_dev_idx_fwd, AcquirePoolAcceses(src_dev_idx_fwd,
src_agent_fwd, buf_src_fwd, src_agent_fwd, buf_src_fwd,
dst_dev_idx_fwd, dst_dev_idx_fwd,
dst_agent_fwd, buf_dst_fwd); dst_agent_fwd, buf_dst_fwd);
if (bidir) {
AcquirePoolAcceses(src_dev_idx_rev,
src_agent_rev, buf_src_rev,
dst_dev_idx_rev,
dst_agent_rev, buf_dst_rev);
}
} }
// Create a timer object and reset signals // Create a timer object and reset signals
...@@ -403,41 +375,25 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -403,41 +375,25 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
} }
} }
if (verify_) { if (validate_) {
// Re-Establish access to destination buffer and host buffer // Re-Establish access to destination buffer and host buffer
AcquirePoolAcceses(dst_dev_idx_fwd, AcquirePoolAcceses(dst_dev_idx_fwd,
dst_agent_fwd, buf_dst_fwd, dst_agent_fwd, buf_dst_fwd,
cpu_index_, cpu_agent_, host_dst_fwd); cpu_index_, cpu_agent_, validation_dst);
// Init dst buffer with values from outbuffer of copy operation // Init dst buffer with values from outbuffer of copy operation
hsa_signal_store_relaxed(host_signal_fwd, 1); hsa_signal_store_relaxed(validation_signal, 1);
copy_buffer(host_dst_fwd, cpu_agent_, copy_buffer(validation_dst, cpu_agent_,
buf_dst_fwd, dst_agent_fwd, buf_dst_fwd, dst_agent_fwd,
curr_size, host_signal_fwd); curr_size, validation_signal);
ErrorCheck(err_);
// Compare output equals input // Compare output equals input
err_ = (hsa_status_t)memcmp(host_src_fwd, host_dst_fwd, curr_size); err_ = (hsa_status_t)memcmp(validation_src, validation_dst, curr_size);
ErrorCheck(err_); if (err_ != HSA_STATUS_SUCCESS) {
PrintCopyAccessError(src_idx, dst_idx);
if (bidir) {
// Re-Establish access to destination buffer and host buffer
AcquirePoolAcceses(dst_dev_idx_rev,
dst_agent_rev, buf_dst_rev,
cpu_index_, cpu_agent_, host_dst_rev);
hsa_signal_store_relaxed(host_signal_rev, 1);
copy_buffer(host_dst_rev, cpu_agent_,
buf_dst_rev, dst_agent_rev,
curr_size, host_signal_rev);
ErrorCheck(err_);
// Compare output equals input
err_ = (hsa_status_t)memcmp(host_src_rev, host_dst_rev, curr_size);
ErrorCheck(err_);
} }
ErrorCheck(err_);
} }
} }
...@@ -464,9 +420,10 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -464,9 +420,10 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
ReleaseBuffers(bidir, buf_src_fwd, buf_src_rev, ReleaseBuffers(bidir, buf_src_fwd, buf_src_rev,
buf_dst_fwd, buf_dst_rev, signal_fwd, signal_rev); buf_dst_fwd, buf_dst_rev, signal_fwd, signal_rev);
if (verify_) { if (validate_) {
ReleaseBuffers(bidir, host_src_fwd, host_src_rev, hsa_signal_t fake_signal{0};
host_dst_fwd, host_dst_rev, host_signal_fwd, host_signal_rev); ReleaseBuffers(false, validation_src, NULL,
validation_dst, NULL, validation_signal, fake_signal);
} }
} }
...@@ -560,7 +517,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() { ...@@ -560,7 +517,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
access_matrix_ = NULL; access_matrix_ = NULL;
active_agents_list_ = NULL; active_agents_list_ = NULL;
verify_ = false; validate_ = false;
print_cpu_time_ = false; print_cpu_time_ = false;
bw_default_run_ = getenv("ROCM_BW_DEFAULT_RUN"); bw_default_run_ = getenv("ROCM_BW_DEFAULT_RUN");
......
...@@ -250,6 +250,7 @@ class RocmBandwidthTest : public BaseTest { ...@@ -250,6 +250,7 @@ class RocmBandwidthTest : public BaseTest {
void DisplayIOTime(async_trans_t& trans) const; void DisplayIOTime(async_trans_t& trans) const;
void DisplayCopyTime(async_trans_t& trans) const; void DisplayCopyTime(async_trans_t& trans) const;
void DisplayCopyTimeMatrix(bool peak) const; void DisplayCopyTimeMatrix(bool peak) const;
void DisplayValidationMatrix() const;
private: private:
...@@ -410,10 +411,10 @@ class RocmBandwidthTest : public BaseTest { ...@@ -410,10 +411,10 @@ class RocmBandwidthTest : public BaseTest {
// Flag to print Cpu time // Flag to print Cpu time
bool print_cpu_time_; bool print_cpu_time_;
// Determines if user has requested verification // Determines if user has requested validation
bool verify_; bool validate_;
// CPU agent used for verification // CPU agent used for validation
int32_t cpu_index_; int32_t cpu_index_;
hsa_agent_t cpu_agent_; hsa_agent_t cpu_agent_;
......
...@@ -109,9 +109,10 @@ void RocmBandwidthTest::ParseArguments() { ...@@ -109,9 +109,10 @@ void RocmBandwidthTest::ParseArguments() {
print_topology = true; print_topology = true;
break; break;
// Set verification flag to true // Set validation mode flag to true
case 'v': case 'v':
verify_ = true; validate_ = true;
req_copy_all_unidir_ = REQ_COPY_ALL_UNIDIR;
break; break;
// Collect list of agents involved in bidirectional copy operation // Collect list of agents involved in bidirectional copy operation
...@@ -227,7 +228,7 @@ void RocmBandwidthTest::ParseArguments() { ...@@ -227,7 +228,7 @@ void RocmBandwidthTest::ParseArguments() {
} }
// Initialize buffer list if full copying in unidirectional mode is enabled // Initialize buffer list if full copying in unidirectional mode is enabled
if (copy_all_uni) { if ((copy_all_uni) || (validate_)) {
uint32_t size = pool_list_.size(); uint32_t size = pool_list_.size();
for (uint32_t idx = 0; idx < size; idx++) { for (uint32_t idx = 0; idx < size; idx++) {
src_list_.push_back(idx); src_list_.push_back(idx);
...@@ -248,7 +249,7 @@ void RocmBandwidthTest::ParseArguments() { ...@@ -248,7 +249,7 @@ void RocmBandwidthTest::ParseArguments() {
if (size_list_.size() == 0) { if (size_list_.size() == 0) {
uint32_t size_len = sizeof(SIZE_LIST)/sizeof(uint32_t); uint32_t size_len = sizeof(SIZE_LIST)/sizeof(uint32_t);
for (uint32_t idx = 0; idx < size_len; idx++) { for (uint32_t idx = 0; idx < size_len; idx++) {
if ((copy_all_bi) || (copy_all_uni)) { if ((copy_all_bi) || (copy_all_uni) || (validate_)) {
if (idx == 16) { if (idx == 16) {
size_list_.push_back(SIZE_LIST[idx]); size_list_.push_back(SIZE_LIST[idx]);
} }
......
...@@ -278,12 +278,16 @@ void RocmBandwidthTest::PrintCopyAccessError(uint32_t src_idx, uint32_t dst_idx) ...@@ -278,12 +278,16 @@ void RocmBandwidthTest::PrintCopyAccessError(uint32_t src_idx, uint32_t dst_idx)
hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_; hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_;
std::cout << std::endl; std::cout << std::endl;
std::cout << "Index of Src Memory: " << src_idx << std::endl; //std::cout << "Index of Src Memory: " << src_idx << std::endl;
std::cout << "Index of Dst Memory: " << dst_idx << std::endl; //std::cout << "Index of Dst Memory: " << dst_idx << std::endl;
std::cout << "Index of Src Device: " << src_dev_idx << std::endl; std::cout << "Src Device: Index "
std::cout << "Index of Dst Device: " << dst_dev_idx << std::endl; << src_dev_idx
std::cout << "Device Type of Src Device: " << src_dev_type << std::endl; << ", Type: "
std::cout << "Device Type of Dst Device: " << dst_dev_type << std::endl; << ((src_dev_type == HSA_DEVICE_TYPE_CPU) ? "CPU" : "GPU") << std::endl;
std::cout << "Dst Device: Index "
<< dst_dev_idx
<< ", Type: "
<< ((dst_dev_type == HSA_DEVICE_TYPE_CPU) ? "CPU" : "GPU") << std::endl;
std::cout << "Rocm Device hosting Src Memory cannot ACCESS Dst Memory" << std::endl; std::cout << "Rocm Device hosting Src Memory cannot ACCESS Dst Memory" << std::endl;
std::cout << std::endl; std::cout << std::endl;
} }
......
...@@ -147,12 +147,10 @@ void RocmBandwidthTest::Display() const { ...@@ -147,12 +147,10 @@ void RocmBandwidthTest::Display() const {
return; return;
} }
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) { if (validate_) {
if (bw_default_run_ == NULL) { DisplayDevInfo();
DisplayDevInfo(); PrintAccessMatrix();
PrintAccessMatrix(); DisplayValidationMatrix();
}
DisplayCopyTimeMatrix(true);
return; return;
} }
...@@ -164,6 +162,15 @@ void RocmBandwidthTest::Display() const { ...@@ -164,6 +162,15 @@ void RocmBandwidthTest::Display() const {
return; return;
} }
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
if (bw_default_run_ == NULL) {
DisplayDevInfo();
PrintAccessMatrix();
}
DisplayCopyTimeMatrix(true);
return;
}
for (uint32_t idx = 0; idx < trans_size; idx++) { for (uint32_t idx = 0; idx < trans_size; idx++) {
async_trans_t trans = trans_list_[idx]; async_trans_t trans = trans_list_[idx];
if ((trans.req_type_ == REQ_COPY_BIDIR) || if ((trans.req_type_ == REQ_COPY_BIDIR) ||
...@@ -283,6 +290,71 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const { ...@@ -283,6 +290,71 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
std::cout << std::endl; std::cout << std::endl;
} }
void RocmBandwidthTest::DisplayValidationMatrix() const {
double* perf_matrix = new double[agent_index_ * agent_index_]();
uint32_t trans_size = trans_list_.size();
for (uint32_t idx = 0; idx < trans_size; idx++) {
async_trans_t trans = trans_list_[idx];
uint32_t src_idx = trans.copy.src_idx_;
uint32_t dst_idx = trans.copy.dst_idx_;
uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
perf_matrix[(src_dev_idx * agent_index_) + dst_dev_idx] = trans.peak_bandwidth_[0];
}
uint32_t format = 10;
std::cout.setf(ios::left);
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "Data Path Validation";
std::cout << std::endl;
std::cout << std::endl;
std::cout.precision(6);
std::cout << std::fixed;
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "D/D";
format = 12;
for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
std::cout.width(format);
std::stringstream agent_id;
agent_id << idx0;
std::cout << agent_id.str();
}
std::cout << std::endl;
std::cout << std::endl;
for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
format = 10;
std::cout.width(format);
std::cout << "";
std::stringstream agent_id;
agent_id << idx0;
std::cout.width(format);
std::cout << agent_id.str();
for (uint32_t idx1 = 0; idx1 < agent_index_; idx1++) {
format = 12;
std::cout.width(format);
double value = perf_matrix[(idx0 * agent_index_) + idx1];
if (value == 0) {
std::cout << "N/A";
} else {
std::cout << "PASS";
// std::cout << perf_matrix[(idx0 * agent_index_) + idx1];
}
}
std::cout << std::endl;
std::cout << std::endl;
}
std::cout << std::endl;
}
void RocmBandwidthTest::DisplayDevInfo() const { void RocmBandwidthTest::DisplayDevInfo() const {
uint32_t format = 10; uint32_t format = 10;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment