Commit f84877bb authored by Ramesh Errabolu's avatar Ramesh Errabolu
Browse files

Remove duplicate transactions from bidirectional copy requests

parent b644bc0d
...@@ -326,6 +326,9 @@ class RocmBandwidthTest : public BaseTest { ...@@ -326,6 +326,9 @@ class RocmBandwidthTest : public BaseTest {
hsa_device_type_t dev_type, hsa_device_type_t dev_type,
bool fine_grained); bool fine_grained);
// Find the mirror transaction if present
bool FindMirrorRequest(uint32_t src_idx, uint32_t dst_idx);
// @brief: Check if agent and access memory pool, if so, set // @brief: Check if agent and access memory pool, if so, set
// access to the agent, if not, exit // access to the agent, if not, exit
void AcquireAccess(hsa_agent_t agent, void* ptr); void AcquireAccess(hsa_agent_t agent, void* ptr);
......
...@@ -231,10 +231,12 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const { ...@@ -231,10 +231,12 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
uint32_t dst_idx = trans.copy.dst_idx_; uint32_t dst_idx = trans.copy.dst_idx_;
uint32_t src_dev_idx = pool_list_[src_idx].agent_index_; uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_; uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
if (peak) {
perf_matrix[(src_dev_idx * agent_index_) + dst_dev_idx] = trans.peak_bandwidth_[0]; // For COPY_ALL_UNIDIR and COPY_ALL_BIDIR we use only one copy size
} else { double bandwidth = (peak) ? trans.peak_bandwidth_[0] : trans.avg_bandwidth_[0];
perf_matrix[(src_dev_idx * agent_index_) + dst_dev_idx] = trans.avg_bandwidth_[0]; perf_matrix[(src_dev_idx * agent_index_) + dst_dev_idx] = bandwidth;
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
perf_matrix[(dst_dev_idx * agent_index_) + src_dev_idx] = bandwidth;
} }
} }
...@@ -314,6 +316,9 @@ void RocmBandwidthTest::DisplayValidationMatrix() const { ...@@ -314,6 +316,9 @@ void RocmBandwidthTest::DisplayValidationMatrix() const {
uint32_t src_dev_idx = pool_list_[src_idx].agent_index_; uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_; uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
perf_matrix[(src_dev_idx * agent_index_) + dst_dev_idx] = trans.peak_bandwidth_[0]; perf_matrix[(src_dev_idx * agent_index_) + dst_dev_idx] = trans.peak_bandwidth_[0];
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
perf_matrix[(dst_dev_idx * agent_index_) + src_dev_idx] = trans.peak_bandwidth_[0];
}
} }
uint32_t format = 10; uint32_t format = 10;
......
...@@ -43,6 +43,20 @@ ...@@ -43,6 +43,20 @@
#include "common.hpp" #include "common.hpp"
#include "rocm_bandwidth_test.hpp" #include "rocm_bandwidth_test.hpp"
bool RocmBandwidthTest::FindMirrorRequest(uint32_t src_idx, uint32_t dst_idx) {
uint32_t size = trans_list_.size();
for (uint32_t idx = 0; idx < size; idx++) {
async_trans_t& mirror = trans_list_[idx];
if ((src_idx == mirror.copy.dst_idx_) &&
(dst_idx == mirror.copy.src_idx_)) {
return true;
}
}
return false;
}
bool RocmBandwidthTest::BuildReadOrWriteTrans(uint32_t req_type, bool RocmBandwidthTest::BuildReadOrWriteTrans(uint32_t req_type,
vector<size_t>& in_list) { vector<size_t>& in_list) {
...@@ -157,6 +171,13 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type, ...@@ -157,6 +171,13 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
if (src_dev_idx == dst_dev_idx) { if (src_dev_idx == dst_dev_idx) {
continue; continue;
} }
if (src_dev_idx > dst_dev_idx) {
bool mirror = FindMirrorRequest(src_idx, dst_idx);
if (mirror) {
continue;
}
}
} }
// Determine if accessibility to dst pool for src agent is not denied // Determine if accessibility to dst pool for src agent is not denied
...@@ -171,6 +192,17 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type, ...@@ -171,6 +192,17 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
} }
} }
// For bidirectional copies determine both access paths are valid
// Both paths are valid when one of the devices is a CPU. This is
// not true when both of the devices are GPU's.
if ((req_type == REQ_COPY_ALL_BIDIR) ||
(req_type == REQ_COPY_ALL_UNIDIR)) {
path_exists = access_matrix_[(dst_dev_idx * agent_index_) + src_dev_idx];
if (path_exists == 0) {
continue;
}
}
// Update the list of agents active in any copy operation // Update the list of agents active in any copy operation
if (active_agents_list_ == NULL) { if (active_agents_list_ == NULL) {
active_agents_list_ = new uint32_t[agent_index_](); active_agents_list_ = new uint32_t[agent_index_]();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment