Commit 27996113 authored by Ramesh Errabolu's avatar Ramesh Errabolu
Browse files

Update help screen with Validation Mode

parent d460bf9b
......@@ -62,7 +62,7 @@ const uint32_t RocmBandwidthTest::SIZE_LIST[] = { 1 * 1024,
256 * 1024 * 1024, 512 * 1024 * 1024 };
uint32_t RocmBandwidthTest::GetIterationNum() {
return num_iteration_ * 1.2 + 1;
return (validate_) ? 1 : (num_iteration_ * 1.2 + 1);
}
void RocmBandwidthTest::AcquireAccess(hsa_agent_t agent, void* ptr) {
......@@ -222,7 +222,6 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
// Initialize size of buffer to equal the largest element of allocation
uint32_t size_len = size_list_.size();
uint32_t max_size = size_list_.back();
// uint32_t max_size = size_list_.back() * 1024 * 1024;
// Bind to resources such as pool and agents that are involved
// in both forward and reverse copy operations
......@@ -230,14 +229,11 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
void* buf_dst_fwd;
void* buf_src_rev;
void* buf_dst_rev;
void* host_src_fwd;
void* host_dst_fwd;
void* host_src_rev;
void* host_dst_rev;
void* validation_dst;
void* validation_src;
hsa_signal_t signal_fwd;
hsa_signal_t signal_rev;
hsa_signal_t host_signal_fwd;
hsa_signal_t host_signal_rev;
hsa_signal_t validation_signal;
uint32_t src_idx = trans.copy.src_idx_;
uint32_t dst_idx = trans.copy.dst_idx_;
uint32_t src_dev_idx_fwd = pool_list_[src_idx].agent_index_;
......@@ -272,36 +268,19 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
signal_rev);
}
if (verify_) {
if (validate_) {
AllocateHostBuffers(max_size,
src_dev_idx_fwd,
dst_dev_idx_fwd,
host_src_fwd, host_dst_fwd,
validation_src, validation_dst,
buf_src_fwd, buf_dst_fwd,
src_agent_fwd, dst_agent_fwd,
host_signal_fwd);
if (bidir) {
AllocateHostBuffers(max_size,
src_dev_idx_rev,
dst_dev_idx_rev,
host_src_rev, host_dst_rev,
buf_src_rev, buf_dst_rev,
src_agent_rev, dst_agent_rev,
host_signal_rev);
}
validation_signal);
// Initialize source buffer with values from verification buffer
copy_buffer(buf_src_fwd, src_agent_fwd,
host_src_fwd, cpu_agent_,
max_size, host_signal_fwd);
ErrorCheck(err_);
if (bidir) {
copy_buffer(buf_src_rev, src_agent_rev,
host_src_rev, cpu_agent_,
max_size, host_signal_rev);
ErrorCheck(err_);
}
validation_src, cpu_agent_,
max_size, validation_signal);
}
// Bind the number of iterations
......@@ -313,7 +292,6 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
// This should not be happening
uint32_t curr_size = size_list_[idx];
// uint32_t curr_size = size_list_[idx] * 1024 * 1024;
if (curr_size > max_size) {
break;
}
......@@ -331,17 +309,11 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
hsa_signal_store_relaxed(signal_rev, 1);
}
if (verify_) {
if (validate_) {
AcquirePoolAcceses(src_dev_idx_fwd,
src_agent_fwd, buf_src_fwd,
dst_dev_idx_fwd,
dst_agent_fwd, buf_dst_fwd);
if (bidir) {
AcquirePoolAcceses(src_dev_idx_rev,
src_agent_rev, buf_src_rev,
dst_dev_idx_rev,
dst_agent_rev, buf_dst_rev);
}
}
// Create a timer object and reset signals
......@@ -403,41 +375,25 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
}
}
if (verify_) {
if (validate_) {
// Re-Establish access to destination buffer and host buffer
AcquirePoolAcceses(dst_dev_idx_fwd,
dst_agent_fwd, buf_dst_fwd,
cpu_index_, cpu_agent_, host_dst_fwd);
cpu_index_, cpu_agent_, validation_dst);
// Init dst buffer with values from outbuffer of copy operation
hsa_signal_store_relaxed(host_signal_fwd, 1);
copy_buffer(host_dst_fwd, cpu_agent_,
hsa_signal_store_relaxed(validation_signal, 1);
copy_buffer(validation_dst, cpu_agent_,
buf_dst_fwd, dst_agent_fwd,
curr_size, host_signal_fwd);
ErrorCheck(err_);
curr_size, validation_signal);
// Compare output equals input
err_ = (hsa_status_t)memcmp(host_src_fwd, host_dst_fwd, curr_size);
ErrorCheck(err_);
if (bidir) {
// Re-Establish access to destination buffer and host buffer
AcquirePoolAcceses(dst_dev_idx_rev,
dst_agent_rev, buf_dst_rev,
cpu_index_, cpu_agent_, host_dst_rev);
hsa_signal_store_relaxed(host_signal_rev, 1);
copy_buffer(host_dst_rev, cpu_agent_,
buf_dst_rev, dst_agent_rev,
curr_size, host_signal_rev);
ErrorCheck(err_);
// Compare output equals input
err_ = (hsa_status_t)memcmp(host_src_rev, host_dst_rev, curr_size);
ErrorCheck(err_);
err_ = (hsa_status_t)memcmp(validation_src, validation_dst, curr_size);
if (err_ != HSA_STATUS_SUCCESS) {
PrintCopyAccessError(src_idx, dst_idx);
}
ErrorCheck(err_);
}
}
......@@ -464,9 +420,10 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
ReleaseBuffers(bidir, buf_src_fwd, buf_src_rev,
buf_dst_fwd, buf_dst_rev, signal_fwd, signal_rev);
if (verify_) {
ReleaseBuffers(bidir, host_src_fwd, host_src_rev,
host_dst_fwd, host_dst_rev, host_signal_fwd, host_signal_rev);
if (validate_) {
hsa_signal_t fake_signal{0};
ReleaseBuffers(false, validation_src, NULL,
validation_dst, NULL, validation_signal, fake_signal);
}
}
......@@ -560,7 +517,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
access_matrix_ = NULL;
active_agents_list_ = NULL;
verify_ = false;
validate_ = false;
print_cpu_time_ = false;
bw_default_run_ = getenv("ROCM_BW_DEFAULT_RUN");
......
......@@ -250,6 +250,7 @@ class RocmBandwidthTest : public BaseTest {
void DisplayIOTime(async_trans_t& trans) const;
void DisplayCopyTime(async_trans_t& trans) const;
void DisplayCopyTimeMatrix(bool peak) const;
void DisplayValidationMatrix() const;
private:
......@@ -410,10 +411,10 @@ class RocmBandwidthTest : public BaseTest {
// Flag to print Cpu time
bool print_cpu_time_;
// Determines if user has requested verification
bool verify_;
// Determines if user has requested validation
bool validate_;
// CPU agent used for verification
// CPU agent used for validation
int32_t cpu_index_;
hsa_agent_t cpu_agent_;
......
......@@ -109,9 +109,10 @@ void RocmBandwidthTest::ParseArguments() {
print_topology = true;
break;
// Set verification flag to true
// Set validation mode flag to true
case 'v':
verify_ = true;
validate_ = true;
req_copy_all_unidir_ = REQ_COPY_ALL_UNIDIR;
break;
// Collect list of agents involved in bidirectional copy operation
......@@ -227,7 +228,7 @@ void RocmBandwidthTest::ParseArguments() {
}
// Initialize buffer list if full copying in unidirectional mode is enabled
if (copy_all_uni) {
if ((copy_all_uni) || (validate_)) {
uint32_t size = pool_list_.size();
for (uint32_t idx = 0; idx < size; idx++) {
src_list_.push_back(idx);
......@@ -248,7 +249,7 @@ void RocmBandwidthTest::ParseArguments() {
if (size_list_.size() == 0) {
uint32_t size_len = sizeof(SIZE_LIST)/sizeof(uint32_t);
for (uint32_t idx = 0; idx < size_len; idx++) {
if ((copy_all_bi) || (copy_all_uni)) {
if ((copy_all_bi) || (copy_all_uni) || (validate_)) {
if (idx == 16) {
size_list_.push_back(SIZE_LIST[idx]);
}
......
......@@ -278,12 +278,16 @@ void RocmBandwidthTest::PrintCopyAccessError(uint32_t src_idx, uint32_t dst_idx)
hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_;
std::cout << std::endl;
std::cout << "Index of Src Memory: " << src_idx << std::endl;
std::cout << "Index of Dst Memory: " << dst_idx << std::endl;
std::cout << "Index of Src Device: " << src_dev_idx << std::endl;
std::cout << "Index of Dst Device: " << dst_dev_idx << std::endl;
std::cout << "Device Type of Src Device: " << src_dev_type << std::endl;
std::cout << "Device Type of Dst Device: " << dst_dev_type << std::endl;
//std::cout << "Index of Src Memory: " << src_idx << std::endl;
//std::cout << "Index of Dst Memory: " << dst_idx << std::endl;
std::cout << "Src Device: Index "
<< src_dev_idx
<< ", Type: "
<< ((src_dev_type == HSA_DEVICE_TYPE_CPU) ? "CPU" : "GPU") << std::endl;
std::cout << "Dst Device: Index "
<< dst_dev_idx
<< ", Type: "
<< ((dst_dev_type == HSA_DEVICE_TYPE_CPU) ? "CPU" : "GPU") << std::endl;
std::cout << "Rocm Device hosting Src Memory cannot ACCESS Dst Memory" << std::endl;
std::cout << std::endl;
}
......
......@@ -147,12 +147,10 @@ void RocmBandwidthTest::Display() const {
return;
}
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
if (bw_default_run_ == NULL) {
DisplayDevInfo();
PrintAccessMatrix();
}
DisplayCopyTimeMatrix(true);
if (validate_) {
DisplayDevInfo();
PrintAccessMatrix();
DisplayValidationMatrix();
return;
}
......@@ -164,6 +162,15 @@ void RocmBandwidthTest::Display() const {
return;
}
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
if (bw_default_run_ == NULL) {
DisplayDevInfo();
PrintAccessMatrix();
}
DisplayCopyTimeMatrix(true);
return;
}
for (uint32_t idx = 0; idx < trans_size; idx++) {
async_trans_t trans = trans_list_[idx];
if ((trans.req_type_ == REQ_COPY_BIDIR) ||
......@@ -283,6 +290,71 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
std::cout << std::endl;
}
void RocmBandwidthTest::DisplayValidationMatrix() const {
double* perf_matrix = new double[agent_index_ * agent_index_]();
uint32_t trans_size = trans_list_.size();
for (uint32_t idx = 0; idx < trans_size; idx++) {
async_trans_t trans = trans_list_[idx];
uint32_t src_idx = trans.copy.src_idx_;
uint32_t dst_idx = trans.copy.dst_idx_;
uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
perf_matrix[(src_dev_idx * agent_index_) + dst_dev_idx] = trans.peak_bandwidth_[0];
}
uint32_t format = 10;
std::cout.setf(ios::left);
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "Data Path Validation";
std::cout << std::endl;
std::cout << std::endl;
std::cout.precision(6);
std::cout << std::fixed;
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "D/D";
format = 12;
for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
std::cout.width(format);
std::stringstream agent_id;
agent_id << idx0;
std::cout << agent_id.str();
}
std::cout << std::endl;
std::cout << std::endl;
for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
format = 10;
std::cout.width(format);
std::cout << "";
std::stringstream agent_id;
agent_id << idx0;
std::cout.width(format);
std::cout << agent_id.str();
for (uint32_t idx1 = 0; idx1 < agent_index_; idx1++) {
format = 12;
std::cout.width(format);
double value = perf_matrix[(idx0 * agent_index_) + idx1];
if (value == 0) {
std::cout << "N/A";
} else {
std::cout << "PASS";
// std::cout << perf_matrix[(idx0 * agent_index_) + idx1];
}
}
std::cout << std::endl;
std::cout << std::endl;
}
std::cout << std::endl;
}
void RocmBandwidthTest::DisplayDevInfo() const {
uint32_t format = 10;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment