Unverified Commit 4f4dcfbe authored by Ramesh Errabolu's avatar Ramesh Errabolu Committed by GitHub
Browse files

Merge pull request #26 from RadeonOpenCompute/rbtLatency

Enable copy overhead measurement
parents e809e43f c6f6ed57
...@@ -60,7 +60,17 @@ const uint32_t RocmBandwidthTest::SIZE_LIST[] = { 1 * 1024, ...@@ -60,7 +60,17 @@ const uint32_t RocmBandwidthTest::SIZE_LIST[] = { 1 * 1024,
4 * 1024 * 1024, 8 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024,
16 * 1024 * 1024, 32 * 1024 * 1024, 16 * 1024 * 1024, 32 * 1024 * 1024,
64 * 1024 * 1024, 128 * 1024 * 1024, 64 * 1024 * 1024, 128 * 1024 * 1024,
256 * 1024 * 1024, 512 * 1024 * 1024 }; 256 * 1024 * 1024, 512 * 1024 * 1024};
const uint32_t RocmBandwidthTest::LATENCY_SIZE_LIST[] = { 1,
2, 4, 8,
16, 32, 64,
128, 256, 512,
1 * 1024, 2 * 1024,
4 * 1024, 8 * 1024,
16 * 1024, 32 * 1024,
64 * 1024, 128 * 1024,
256 * 1024, 512 * 1024 };
uint32_t RocmBandwidthTest::GetIterationNum() { uint32_t RocmBandwidthTest::GetIterationNum() {
return (validate_) ? 1 : (num_iteration_ * 1.2 + 1); return (validate_) ? 1 : (num_iteration_ * 1.2 + 1);
...@@ -76,24 +86,15 @@ void RocmBandwidthTest::AcquirePoolAcceses(uint32_t src_dev_idx, ...@@ -76,24 +86,15 @@ void RocmBandwidthTest::AcquirePoolAcceses(uint32_t src_dev_idx,
uint32_t dst_dev_idx, uint32_t dst_dev_idx,
hsa_agent_t dst_agent, void* dst) { hsa_agent_t dst_agent, void* dst) {
if (access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx] == 2) {
AcquireAccess(src_agent, dst);
AcquireAccess(dst_agent, src);
return;
}
// determine which one is a cpu and call acquire on the other agent // determine which one is a cpu and call acquire on the other agent
hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_; hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_;
hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_; if (src_dev_type == HSA_DEVICE_TYPE_GPU) {
if (src_dev_type == HSA_DEVICE_TYPE_CPU) {
AcquireAccess(dst_agent, src);
return;
}
if (dst_dev_type == HSA_DEVICE_TYPE_CPU) {
AcquireAccess(src_agent, dst); AcquireAccess(src_agent, dst);
return; } else {
AcquireAccess(dst_agent, src);
} }
assert(false && "Inconsistent state");
return;
} }
void RocmBandwidthTest::AllocateHostBuffers(uint32_t size, void RocmBandwidthTest::AllocateHostBuffers(uint32_t size,
...@@ -541,11 +542,12 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() { ...@@ -541,11 +542,12 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
access_matrix_ = NULL; access_matrix_ = NULL;
active_agents_list_ = NULL; active_agents_list_ = NULL;
latency_ = false;
validate_ = false; validate_ = false;
print_cpu_time_ = false; print_cpu_time_ = false;
// Initialize version of the test // Initialize version of the test
version_.major_id = 1; version_.major_id = 2;
version_.minor_id = 0; version_.minor_id = 0;
version_.step_id = 0; version_.step_id = 0;
version_.reserved = 0; version_.reserved = 0;
......
...@@ -233,6 +233,10 @@ class RocmBandwidthTest : public BaseTest { ...@@ -233,6 +233,10 @@ class RocmBandwidthTest : public BaseTest {
// build list of transactions // build list of transactions
void ParseArguments(); void ParseArguments();
// @brief Validate user input of primary operations
void ValidateInputFlags(uint32_t pf_cnt,
uint32_t copy_mask, uint32_t copy_ctrl_mask);
// @brief: Print the list of transactions // @brief: Print the list of transactions
void PrintTransList(); void PrintTransList();
...@@ -406,6 +410,14 @@ class RocmBandwidthTest : public BaseTest { ...@@ -406,6 +410,14 @@ class RocmBandwidthTest : public BaseTest {
uint32_t req_copy_unidir_; uint32_t req_copy_unidir_;
uint32_t req_copy_all_bidir_; uint32_t req_copy_all_bidir_;
uint32_t req_copy_all_unidir_; uint32_t req_copy_all_unidir_;
static const uint32_t USR_SRC_FLAG = 0x01;
static const uint32_t USR_DST_FLAG = 0x02;
static const uint32_t USR_BUFFER_SIZE = 0x01;
static const uint32_t USR_VISIBLE_TIME = 0x02;
static const uint32_t DEV_COPY_LATENCY = 0x04;
static const uint32_t VALIDATE_COPY_OP = 0x08;
// List used to store transactions per user request // List used to store transactions per user request
vector<async_trans_t> trans_list_; vector<async_trans_t> trans_list_;
...@@ -444,6 +456,9 @@ class RocmBandwidthTest : public BaseTest { ...@@ -444,6 +456,9 @@ class RocmBandwidthTest : public BaseTest {
// Determines if user has requested validation // Determines if user has requested validation
bool validate_; bool validate_;
// Determines the latency overhead of copy operations
bool latency_;
// CPU agent used for validation // CPU agent used for validation
int32_t cpu_index_; int32_t cpu_index_;
hsa_agent_t cpu_agent_; hsa_agent_t cpu_agent_;
...@@ -451,8 +466,8 @@ class RocmBandwidthTest : public BaseTest { ...@@ -451,8 +466,8 @@ class RocmBandwidthTest : public BaseTest {
// System region // System region
hsa_amd_memory_pool_t sys_pool_; hsa_amd_memory_pool_t sys_pool_;
// static const uint32_t SIZE_LIST[4];
static const uint32_t SIZE_LIST[20]; static const uint32_t SIZE_LIST[20];
static const uint32_t LATENCY_SIZE_LIST[20];
// Exit value to return in case of error // Exit value to return in case of error
int32_t exit_value_; int32_t exit_value_;
......
...@@ -77,13 +77,83 @@ static bool ParseOptionValue(char* value, vector<uint32_t>&value_list) { ...@@ -77,13 +77,83 @@ static bool ParseOptionValue(char* value, vector<uint32_t>&value_list) {
return true; return true;
} }
void RocmBandwidthTest::ValidateInputFlags(uint32_t pf_cnt,
uint32_t copy_mask, uint32_t copy_ctrl_mask) {
// Input can't have more than two Primary flags
if (pf_cnt > 2) {
PrintHelpScreen();
exit(0);
}
// Input specifies unidirectional copy among subset of devices
if (pf_cnt == 2) {
if (copy_mask != (USR_SRC_FLAG | USR_DST_FLAG)) {
PrintHelpScreen();
exit(0);
}
}
// Rewrite input if user is requesting validation
if (pf_cnt == 0) {
if (copy_ctrl_mask & VALIDATE_COPY_OP) {
req_copy_all_unidir_ = REQ_COPY_ALL_UNIDIR;
}
}
// User input for primary operation is valid.
// Determine secondary flags are legal
// Case 1: It is illegal to specify copy size for copy
// operations involving all devices
if (((req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) ||
(req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) &&
(copy_ctrl_mask & USR_BUFFER_SIZE)) {
PrintHelpScreen();
exit(0);
}
//
// Case 2: It is illegal to specify Latency for bidirectional
// copy operations or all-unidirectional
if (((req_copy_bidir_ == REQ_COPY_BIDIR) ||
(req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) ||
(req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) &&
(copy_ctrl_mask & DEV_COPY_LATENCY)) {
PrintHelpScreen();
exit(0);
}
//
// Case 3: It is illegal to specify Latency and another secondary
// flag that affects a copy operation
if ((copy_ctrl_mask & DEV_COPY_LATENCY) &&
((copy_ctrl_mask & USR_BUFFER_SIZE) ||
(copy_ctrl_mask & USR_VISIBLE_TIME) ||
(copy_ctrl_mask & VALIDATE_COPY_OP))) {
PrintHelpScreen();
exit(0);
}
//
// Case 4: It is illegal to request Cpu time along with validation
// of copy operation
if ((copy_ctrl_mask & VALIDATE_COPY_OP) &&
((copy_ctrl_mask & USR_BUFFER_SIZE) ||
(copy_ctrl_mask & USR_VISIBLE_TIME))) {
PrintHelpScreen();
exit(0);
}
}
void RocmBandwidthTest::ParseArguments() { void RocmBandwidthTest::ParseArguments() {
bool print_help = false; bool print_help = false;
bool copy_all_bi = false;
bool copy_all_uni = false;
bool print_version = false; bool print_version = false;
bool print_topology = false; bool print_topology = false;
uint32_t copy_mask = 0;
uint32_t copy_ctrl_mask = 0;
uint32_t num_primary_flags = 0;
// This will suppress prints from getopt implementation // This will suppress prints from getopt implementation
// In case of error, it will return the character '?' as // In case of error, it will return the character '?' as
...@@ -92,49 +162,45 @@ void RocmBandwidthTest::ParseArguments() { ...@@ -92,49 +162,45 @@ void RocmBandwidthTest::ParseArguments() {
int opt; int opt;
bool status; bool status;
while ((opt = getopt(usr_argc_, usr_argv_, "hqvctaAb:s:d:r:w:m:")) != -1) { while ((opt = getopt(usr_argc_, usr_argv_, "hqtclvaAb:s:d:r:w:m:")) != -1) {
switch (opt) { switch (opt) {
// Print help screen // Print help screen
case 'h': case 'h':
print_help = true; print_help = true;
num_primary_flags++;
break; break;
// Print version of the test // Print version of the test
case 'q': case 'q':
print_version = true; print_version = true;
break; num_primary_flags++;
// Print Cpu time
case 'c':
print_cpu_time_ = true;
break; break;
// Print system topology // Print system topology
case 't': case 't':
print_topology = true; print_topology = true;
num_primary_flags++;
break; break;
// Set validation mode flag to true // Enable Unidirectional copy among all valid buffers
case 'v': case 'a':
validate_ = true; num_primary_flags++;
req_copy_all_unidir_ = REQ_COPY_ALL_UNIDIR; req_copy_all_unidir_ = REQ_COPY_ALL_UNIDIR;
break; break;
// Collect list of agents involved in bidirectional copy operation // Enable Bidirectional copy among all valid buffers
case 'b': case 'A':
status = ParseOptionValue(optarg, bidir_list_); num_primary_flags++;
if (status) { req_copy_all_bidir_ = REQ_COPY_ALL_BIDIR;
req_copy_bidir_ = REQ_COPY_BIDIR;
break;
}
print_help = true;
break; break;
// Collect list of source buffers involved in unidirectional copy operation // Collect list of source buffers involved in unidirectional copy operation
case 's': case 's':
status = ParseOptionValue(optarg, src_list_); status = ParseOptionValue(optarg, src_list_);
if (status) { if (status) {
num_primary_flags++;
copy_mask |= USR_SRC_FLAG;
req_copy_unidir_ = REQ_COPY_UNIDIR; req_copy_unidir_ = REQ_COPY_UNIDIR;
break; break;
} }
...@@ -145,12 +211,51 @@ void RocmBandwidthTest::ParseArguments() { ...@@ -145,12 +211,51 @@ void RocmBandwidthTest::ParseArguments() {
case 'd': case 'd':
status = ParseOptionValue(optarg, dst_list_); status = ParseOptionValue(optarg, dst_list_);
if (status) { if (status) {
num_primary_flags++;
copy_mask |= USR_DST_FLAG;
req_copy_unidir_ = REQ_COPY_UNIDIR; req_copy_unidir_ = REQ_COPY_UNIDIR;
break; break;
} }
print_help = true; print_help = true;
break; break;
// Collect list of agents involved in bidirectional copy operation
case 'b':
status = ParseOptionValue(optarg, bidir_list_);
if (status) {
req_copy_bidir_ = REQ_COPY_BIDIR;
break;
}
print_help = true;
break;
// Size of buffers to use in copy and read/write operations
case 'm':
status = ParseOptionValue(optarg, size_list_);
if (status == false) {
print_help = true;
}
copy_ctrl_mask |= USR_BUFFER_SIZE;
break;
// Print Cpu time
case 'c':
print_cpu_time_ = true;
copy_ctrl_mask |= USR_VISIBLE_TIME;
break;
// Set Latency mode flag to true
case 'l':
latency_ = true;
copy_ctrl_mask |= DEV_COPY_LATENCY;
break;
// Set validation mode flag to true
case 'v':
validate_ = true;
copy_ctrl_mask |= VALIDATE_COPY_OP;
break;
// Collect request to read a buffer // Collect request to read a buffer
case 'r': case 'r':
req_read_ = REQ_READ; req_read_ = REQ_READ;
...@@ -169,33 +274,13 @@ void RocmBandwidthTest::ParseArguments() { ...@@ -169,33 +274,13 @@ void RocmBandwidthTest::ParseArguments() {
} }
break; break;
// Size of buffers to use in copy and read/write operations
case 'm':
status = ParseOptionValue(optarg, size_list_);
if (status == false) {
print_help = true;
}
break;
// Enable Unidirectional copy among all valid buffers
case 'a':
copy_all_uni = true;
req_copy_all_unidir_ = REQ_COPY_ALL_UNIDIR;
break;
// Enable Bidirectional copy among all valid buffers
case 'A':
copy_all_bi = true;
req_copy_all_bidir_ = REQ_COPY_ALL_BIDIR;
break;
// getopt implementation returns the value of the unknown // getopt implementation returns the value of the unknown
// option or an option with missing operand in the variable // option or an option with missing operand in the variable
// optopt // optopt
case '?': case '?':
std::cout << "Argument is illegal or needs value: " << '?' << std::endl; std::cout << "Argument is illegal or needs value: " << '?' << std::endl;
if ((optopt == 'b' || optopt == 's' || optopt == 'd' || optopt == 'e')) { if ((optopt == 'b' || optopt == 's' || optopt == 'd' || optopt == 'm')) {
std::cout << "Error: Option -b -s -d and -e require argument" << std::endl; std::cout << "Error: Option -b -s -d and -m require argument" << std::endl;
} }
print_help = true; print_help = true;
break; break;
...@@ -204,6 +289,9 @@ void RocmBandwidthTest::ParseArguments() { ...@@ -204,6 +289,9 @@ void RocmBandwidthTest::ParseArguments() {
break; break;
} }
} }
// Determine input of primary flags is valid
ValidateInputFlags(num_primary_flags, copy_mask, copy_ctrl_mask);
// Print help screen if user option has "-h" // Print help screen if user option has "-h"
if (print_help) { if (print_help) {
...@@ -232,27 +320,18 @@ void RocmBandwidthTest::ParseArguments() { ...@@ -232,27 +320,18 @@ void RocmBandwidthTest::ParseArguments() {
exit(0); exit(0);
} }
// Invalidate request if user has requested full // Initialize buffer list if full copying in unidirectional
// copying for both unidirectional and bidirectional // or bidirectional mode is enabled
if ((copy_all_bi) && (copy_all_uni)) { if ((req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR) ||
PrintHelpScreen(); (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR)) {
exit(0);
}
// Initialize buffer list if full copying in unidirectional mode is enabled
if ((copy_all_uni) || (validate_)) {
uint32_t size = pool_list_.size(); uint32_t size = pool_list_.size();
for (uint32_t idx = 0; idx < size; idx++) { for (uint32_t idx = 0; idx < size; idx++) {
src_list_.push_back(idx); if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
dst_list_.push_back(idx); bidir_list_.push_back(idx);
} } else {
} src_list_.push_back(idx);
dst_list_.push_back(idx);
// Initialize buffer list if full copying in bidirectional mode is enabled }
if (copy_all_bi) {
uint32_t size = pool_list_.size();
for (uint32_t idx = 0; idx < size; idx++) {
bidir_list_.push_back(idx);
} }
} }
...@@ -261,12 +340,43 @@ void RocmBandwidthTest::ParseArguments() { ...@@ -261,12 +340,43 @@ void RocmBandwidthTest::ParseArguments() {
if (size_list_.size() == 0) { if (size_list_.size() == 0) {
uint32_t size_len = sizeof(SIZE_LIST)/sizeof(uint32_t); uint32_t size_len = sizeof(SIZE_LIST)/sizeof(uint32_t);
for (uint32_t idx = 0; idx < size_len; idx++) { for (uint32_t idx = 0; idx < size_len; idx++) {
if ((copy_all_bi) || (copy_all_uni) || (validate_)) {
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
if (idx == 16) { if (idx == 16) {
size_list_.push_back(SIZE_LIST[idx]); size_list_.push_back(SIZE_LIST[idx]);
} }
} else { }
size_list_.push_back(SIZE_LIST[idx]);
if (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR) {
if (idx == 16) {
if (latency_ == false) {
size_list_.push_back(SIZE_LIST[idx]);
} else {
size_list_.push_back(LATENCY_SIZE_LIST[3]); // size of 8 bytes
}
}
}
if (req_copy_unidir_ == REQ_COPY_UNIDIR) {
if (latency_) {
size_list_.push_back(LATENCY_SIZE_LIST[idx]);
} else if (validate_) {
if (idx == 16) {
size_list_.push_back(SIZE_LIST[idx]);
}
} else {
size_list_.push_back(SIZE_LIST[idx]);
}
}
if (req_copy_bidir_ == REQ_COPY_BIDIR) {
if (validate_) {
if (idx == 16) {
size_list_.push_back(SIZE_LIST[idx]);
}
} else {
size_list_.push_back(SIZE_LIST[idx]);
}
} }
} }
} else { } else {
...@@ -275,6 +385,7 @@ void RocmBandwidthTest::ParseArguments() { ...@@ -275,6 +385,7 @@ void RocmBandwidthTest::ParseArguments() {
size_list_[idx] = size_list_[idx] * 1024 * 1024; size_list_[idx] = size_list_[idx] * 1024 * 1024;
} }
} }
std::sort(size_list_.begin(), size_list_.end()); std::sort(size_list_.begin(), size_list_.end());
} }
...@@ -52,6 +52,7 @@ void RocmBandwidthTest::PrintHelpScreen() { ...@@ -52,6 +52,7 @@ void RocmBandwidthTest::PrintHelpScreen() {
std::cout << "\t -h Prints the help screen" << std::endl; std::cout << "\t -h Prints the help screen" << std::endl;
std::cout << "\t -q Query version of the test" << std::endl; std::cout << "\t -q Query version of the test" << std::endl;
std::cout << "\t -v Run the test in validation mode" << std::endl; std::cout << "\t -v Run the test in validation mode" << std::endl;
std::cout << "\t -l Run test to collect Latency data" << std::endl;
std::cout << "\t -c Time the operation using CPU Timers" << std::endl; std::cout << "\t -c Time the operation using CPU Timers" << std::endl;
std::cout << "\t -t Prints system topology and allocatable memory info" << std::endl; std::cout << "\t -t Prints system topology and allocatable memory info" << std::endl;
std::cout << "\t -m List of buffer sizes to use, specified in Megabytes" << std::endl; std::cout << "\t -m List of buffer sizes to use, specified in Megabytes" << std::endl;
...@@ -61,6 +62,17 @@ void RocmBandwidthTest::PrintHelpScreen() { ...@@ -61,6 +62,17 @@ void RocmBandwidthTest::PrintHelpScreen() {
std::cout << "\t -a Perform Unidirectional Copy involving all device combinations" << std::endl; std::cout << "\t -a Perform Unidirectional Copy involving all device combinations" << std::endl;
std::cout << "\t -A Perform Bidirectional Copy involving all device combinations" << std::endl; std::cout << "\t -A Perform Bidirectional Copy involving all device combinations" << std::endl;
std::cout << std::endl; std::cout << std::endl;
std::cout << "\t NOTE: Mixing following options is illegal/unsupported" << std::endl;
std::cout << "\t\t Case 1: rocm_bandwidth_test -a or -A with -m" << std::endl;
std::cout << "\t\t Case 2: rocm_bandwidth_test -b or -A with -l" << std::endl;
std::cout << "\t\t Case 3: rocm_bandwidth_test -a or -s x -d with -l and -c" << std::endl;
std::cout << "\t\t Case 4: rocm_bandwidth_test -a or -s x -d with -l and -m" << std::endl;
std::cout << "\t\t Case 5: rocm_bandwidth_test -a or -s x -d with -l and -v" << std::endl;
std::cout << "\t\t Case 6: rocm_bandwidth_test -a or -A -b or -s x -d y with -v and -c" << std::endl;
std::cout << "\t\t Case 7: rocm_bandwidth_test -a or -A -b or -s x -d y with -v and -m" << std::endl;
std::cout << std::endl;
std::cout << std::endl; std::cout << std::endl;
...@@ -119,11 +131,6 @@ void RocmBandwidthTest::PrintTopology() { ...@@ -119,11 +131,6 @@ void RocmBandwidthTest::PrintTopology() {
std::cout << " Allocatable Memory Size (KB): " std::cout << " Allocatable Memory Size (KB): "
<< node.pool_list.at(jdx).allocable_size_ / 1024 << std::endl; << node.pool_list.at(jdx).allocable_size_ / 1024 << std::endl;
/*
std::cout << " is fine-grained: "
<< node.pool_list.at(jdx).is_fine_grained_ << std::endl;
*/
} }
std::cout << std::endl; std::cout << std::endl;
} }
......
...@@ -48,11 +48,13 @@ ...@@ -48,11 +48,13 @@
#include <algorithm> #include <algorithm>
static void printRecord(uint32_t size, double avg_time, static void printRecord(uint32_t size, double avg_time,
double bandwidth, double min_time, double avg_bandwidth, double min_time,
double peak_bandwidth) { double peak_bandwidth) {
std::stringstream size_str; std::stringstream size_str;
if (size < 1024 * 1024) { if (size < 1024) {
size_str << size << " Bytes";
} else if (size < 1024 * 1024) {
size_str << size / 1024 << " KB"; size_str << size / 1024 << " KB";
} else { } else {
size_str << size / (1024 * 1024) << " MB"; size_str << size / (1024 * 1024) << " MB";
...@@ -66,7 +68,7 @@ static void printRecord(uint32_t size, double avg_time, ...@@ -66,7 +68,7 @@ static void printRecord(uint32_t size, double avg_time,
std::cout.width(format); std::cout.width(format);
std::cout << (avg_time * 1e6); std::cout << (avg_time * 1e6);
std::cout.width(format); std::cout.width(format);
std::cout << bandwidth; std::cout << avg_bandwidth;
std::cout.width(format); std::cout.width(format);
std::cout << (min_time * 1e6); std::cout << (min_time * 1e6);
std::cout.width(format); std::cout.width(format);
...@@ -175,6 +177,11 @@ void RocmBandwidthTest::Display() const { ...@@ -175,6 +177,11 @@ void RocmBandwidthTest::Display() const {
return; return;
} }
if ((req_copy_bidir_ == REQ_COPY_BIDIR) ||
(req_copy_unidir_ == REQ_COPY_UNIDIR)) {
PrintVersion();
}
for (uint32_t idx = 0; idx < trans_size; idx++) { for (uint32_t idx = 0; idx < trans_size; idx++) {
async_trans_t trans = trans_list_[idx]; async_trans_t trans = trans_list_[idx];
if ((trans.req_type_ == REQ_COPY_BIDIR) || if ((trans.req_type_ == REQ_COPY_BIDIR) ||
......
...@@ -206,78 +206,38 @@ void RocmBandwidthTest::PopulateAccessMatrix() { ...@@ -206,78 +206,38 @@ void RocmBandwidthTest::PopulateAccessMatrix() {
uint32_t size = pool_list_.size(); uint32_t size = pool_list_.size();
for (uint32_t src_idx = 0; src_idx < size; src_idx++) { for (uint32_t src_idx = 0; src_idx < size; src_idx++) {
// Determine if the pool belongs to Cpu and is coarse-grained // Get handle of Src agent of the pool
uint32_t src_dev_idx = pool_list_[src_idx].agent_index_; uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_;
/*
* This block of code makes sense only if both Fine and Coarse
* grained memory pools are captured. This does not make sense
* if only of them is captured
if (src_dev_type == HSA_DEVICE_TYPE_CPU) {
bool src_fine_grained = pool_list_[src_idx].is_fine_grained_;
if (src_fine_grained == false) {
continue;
}
}
*/
hsa_agent_t src_agent = pool_list_[src_idx].owner_agent_; hsa_agent_t src_agent = pool_list_[src_idx].owner_agent_;
hsa_amd_memory_pool_t src_pool = pool_list_[src_idx].pool_; hsa_amd_memory_pool_t src_pool = pool_list_[src_idx].pool_;
hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_;
for (uint32_t dst_idx = 0; dst_idx < size; dst_idx++) { for (uint32_t dst_idx = 0; dst_idx < size; dst_idx++) {
// Determine if the pool belongs to Cpu and is coarse-grained // Get handle of Dst pool
uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_; uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_;
/*
* This block of code makes sense only if both Fine and Coarse
* grained memory pools are captured. This does not make sense
* if only of them is captured
if (dst_dev_type == HSA_DEVICE_TYPE_CPU) {
bool dst_fine_grained = pool_list_[dst_idx].is_fine_grained_;
if (dst_fine_grained == false) {
continue;
}
}
*/
hsa_agent_t dst_agent = pool_list_[dst_idx].owner_agent_; hsa_agent_t dst_agent = pool_list_[dst_idx].owner_agent_;
hsa_amd_memory_pool_t dst_pool = pool_list_[dst_idx].pool_; hsa_amd_memory_pool_t dst_pool = pool_list_[dst_idx].pool_;
hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_;
// Determine if accessibility to dst pool for src agent is not denied // Determine if src agent has access to dst pool
hsa_amd_memory_pool_access_t access1; hsa_amd_memory_pool_access_t access;
status = hsa_amd_agent_memory_pool_get_info(src_agent, dst_pool, status = hsa_amd_agent_memory_pool_get_info(src_agent, dst_pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access1); HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
ErrorCheck(status); ErrorCheck(status);
// Determine if accessibility to src pool for dst agent is not denied if ((src_dev_type == HSA_DEVICE_TYPE_CPU) &&
hsa_amd_memory_pool_access_t access2; (dst_dev_type == HSA_DEVICE_TYPE_GPU) &&
status = hsa_amd_agent_memory_pool_get_info(dst_agent, src_pool, (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED)) {
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access2); status = hsa_amd_agent_memory_pool_get_info(dst_agent, src_pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
// Access between the two agents is Non-Existent ErrorCheck(status);
if ((access1 == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) &&
(access2 == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED)) {
access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx] = 0;
}
// Access between the two agents is Unidirectional
if ((access1 == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) ||
(access2 == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED)) {
if ((src_dev_type == HSA_DEVICE_TYPE_GPU) &&
(dst_dev_type == HSA_DEVICE_TYPE_GPU)) {
access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx] = 0;
} else {
access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx] = 1;
}
} }
// Access between the two agents is Bidirectional // Access between the two agents is Non-Existent
if ((access1 != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) && uint32_t path;
(access2 != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED)) { path = (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) ? 0 : 1;
access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx] = 2; access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx] = path;
}
} }
} }
} }
......
...@@ -125,48 +125,22 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type, ...@@ -125,48 +125,22 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
uint32_t src_size = src_list.size(); uint32_t src_size = src_list.size();
uint32_t dst_size = dst_list.size(); uint32_t dst_size = dst_list.size();
// hsa_status_t status;
// hsa_amd_memory_pool_access_t access;
for (uint32_t idx = 0; idx < src_size; idx++) { for (uint32_t idx = 0; idx < src_size; idx++) {
// Retrieve Roc runtime handles for Src memory pool and agents // Retrieve Roc runtime handles for Src memory pool and agents
uint32_t src_idx = src_list[idx]; uint32_t src_idx = src_list[idx];
uint32_t src_dev_idx = pool_list_[src_idx].agent_index_; uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
// hsa_agent_t src_agent = pool_list_[src_idx].owner_agent_;
hsa_amd_memory_pool_t src_pool = pool_list_[src_idx].pool_; hsa_amd_memory_pool_t src_pool = pool_list_[src_idx].pool_;
// bool src_fine_grained = pool_list_[src_idx].is_fine_grained_;
hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_; hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_;
/*
* This block of code makes sense only if both Fine and Coarse
* grained memory pools are captured. This does not make sense
* if only of them is captured
filter_out = FilterCpuPool(req_type, src_dev_type, src_fine_grained);
if (filter_out) {
continue;
}
*/
for (uint32_t jdx = 0; jdx < dst_size; jdx++) { for (uint32_t jdx = 0; jdx < dst_size; jdx++) {
// Retrieve Roc runtime handles for Dst memory pool and agents // Retrieve Roc runtime handles for Dst memory pool and agents
uint32_t dst_idx = dst_list[jdx]; uint32_t dst_idx = dst_list[jdx];
uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_; uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
// hsa_agent_t dst_agent = pool_list_[dst_idx].owner_agent_;
hsa_amd_memory_pool_t dst_pool = pool_list_[dst_idx].pool_; hsa_amd_memory_pool_t dst_pool = pool_list_[dst_idx].pool_;
// bool dst_fine_grained = pool_list_[dst_idx].is_fine_grained_;
hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_; hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_;
/*
* This block of code makes sense only if both Fine and Coarse
* grained memory pools are captured. This does not make sense
* if only of them is captured
filter_out = FilterCpuPool(req_type, dst_dev_type, dst_fine_grained);
if (filter_out) {
continue;
}
*/
// Filter out transactions that involve only Cpu agents/devices // Filter out transactions that involve only Cpu agents/devices
// without regard to type of request, default run, partial or full // without regard to type of request, default run, partial or full
// unidirectional or bidirectional copies // unidirectional or bidirectional copies
...@@ -185,7 +159,7 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type, ...@@ -185,7 +159,7 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
} }
} }
// Determine if accessibility to src pool for dst agent is not denied // Determine if accessibility to dst pool for src agent is not denied
uint32_t path_exists = access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx]; uint32_t path_exists = access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx];
if (path_exists == 0) { if (path_exists == 0) {
if ((req_type == REQ_COPY_ALL_BIDIR) || if ((req_type == REQ_COPY_ALL_BIDIR) ||
...@@ -325,6 +299,7 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) { ...@@ -325,6 +299,7 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
} }
// Copy operation does not involve a Gpu device // Copy operation does not involve a Gpu device
// Divide bandwidth with 10^9 to get size in GigaBytes (10^9)
if (trans.copy.uses_gpu_ != true) { if (trans.copy.uses_gpu_ != true) {
avg_time = trans.cpu_avg_time_[idx]; avg_time = trans.cpu_avg_time_[idx];
min_time = trans.cpu_min_time_[idx]; min_time = trans.cpu_min_time_[idx];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment