Commit b9c55660 authored by Ramesh Errabolu's avatar Ramesh Errabolu
Browse files

Print number of hops on a link

parent 4208e414
......@@ -197,9 +197,24 @@ double RocmBandwidthTest::GetGpuCopyTime(bool bidir,
hsa_amd_profiling_async_copy_time_t async_time_rev = {0};
err_= hsa_amd_profiling_get_async_copy_time(signal_rev, &async_time_rev);
ErrorCheck(err_);
// Compute time taken to copy
double start = min(async_time_fwd.start, async_time_rev.start);
double end = max(async_time_fwd.end, async_time_rev.end);
return(end - start);
double copy_time = end - start;
// Forward copy completed before Reverse began
if (async_time_fwd.end < async_time_rev.start) {
return (copy_time - (async_time_rev.start - async_time_fwd.end));
}
// Reverse copy completed before Forward began
if (async_time_rev.end < async_time_fwd.start) {
return (copy_time - (async_time_fwd.start - async_time_rev.end));
}
// Forward and Reverse copies overlapped
return copy_time;
}
void RocmBandwidthTest::copy_buffer(void* dst, hsa_agent_t dst_agent,
......@@ -544,7 +559,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
// Initialize version of the test
version_.major_id = 2;
version_.minor_id = 1;
version_.minor_id = 2;
version_.step_id = 0;
version_.reserved = 0;
......
......@@ -207,13 +207,9 @@ class RocmBandwidthTest : public BaseTest {
// @brief: Discover the topology of pools on Rocm Platform
void DiscoverTopology();
// @brief: Populate link type for the set of agents
void DiscoverLinkType();
void BindLinkType(uint32_t idx1, uint32_t idx2);
// @brief: Populate link weight for the set of agents
void DiscoverLinkWeight();
void BindLinkWeight(uint32_t idx1, uint32_t idx2);
// @brief: Populate link properties for the set of agents
void DiscoverLinkProps();
void BindLinkProps(uint32_t idx1, uint32_t idx2);
// @brief: Populates the access matrix
void PopulateAccessMatrix();
......@@ -221,14 +217,10 @@ class RocmBandwidthTest : public BaseTest {
// @brief: Print topology info
void PrintTopology();
// @brief: Print link type matrix
void PrintLinkTypeMatrix() const;
// @brief: Print link weight matrix
void PrintLinkWeightMatrix() const;
// @brief: Print access matrix
void PrintAccessMatrix() const;
// @brief: Print in matrix form various
// properties such as access, link weight,
// link type and number of hops, etc
void PrintLinkPropsMatrix(uint32_t key) const;
// @brief: Print info on agents in system
void PrintAgentsList();
......@@ -343,6 +335,15 @@ class RocmBandwidthTest : public BaseTest {
// Populate the Bus Device Function of Gpu device
friend void PopulateBDF(uint32_t bdf_id, agent_info_t *agent_info);
// Compute the type and weight of a link
friend uint32_t GetLinkType(hsa_device_type_t src_dev_type,
hsa_device_type_t dst_dev_type,
hsa_amd_memory_pool_link_info_t* link_info, uint32_t hops);
friend uint32_t GetLinkWeight(hsa_amd_memory_pool_link_info_t* link_info, uint32_t hops);
// Return value of input key as string
friend std::string GetValueAsString(uint32_t key, uint32_t value);
// Structure of Version used to identify an instance of RocmBandwidthTest
struct RocmBandwidthVersion {
......@@ -433,9 +434,14 @@ class RocmBandwidthTest : public BaseTest {
static const uint32_t LINK_TYPE_SELF = 0x00;
static const uint32_t LINK_TYPE_PCIE = 0x01;
static const uint32_t LINK_TYPE_XGMI = 0x02;
static const uint32_t LINK_TYPE_MULTI_HOPS = 0x03;
static const uint32_t LINK_TYPE_IGNORED = 0x03;
static const uint32_t LINK_TYPE_NO_PATH = 0xFFFFFFFF;
static const uint32_t LINK_PROP_HOPS = 0x00;
static const uint32_t LINK_PROP_TYPE = 0x01;
static const uint32_t LINK_PROP_WEIGHT = 0x02;
static const uint32_t LINK_PROP_ACCESS = 0x03;
// List used to store transactions per user request
vector<async_trans_t> trans_list_;
......@@ -444,8 +450,10 @@ class RocmBandwidthTest : public BaseTest {
// Matrix used to track Access among agents
uint32_t* access_matrix_;
uint32_t* link_hops_matrix_;
uint32_t* link_type_matrix_;
uint32_t* link_weight_matrix_;
uint32_t* direct_access_matrix_;
// Env key to determine if Fine-grained or
// Coarse-grained pool should be filtered out
......
......@@ -317,10 +317,11 @@ void RocmBandwidthTest::ParseArguments() {
// Print system topology if user option has "-t"
if (print_topology) {
PrintVersion();
PrintTopology();
PrintAccessMatrix();
PrintLinkWeightMatrix();
PrintLinkTypeMatrix();
PrintLinkPropsMatrix(LINK_PROP_ACCESS);
PrintLinkPropsMatrix(LINK_PROP_TYPE);
PrintLinkPropsMatrix(LINK_PROP_WEIGHT);
exit(0);
}
......
......@@ -43,6 +43,11 @@
#include "common.hpp"
#include "rocm_bandwidth_test.hpp"
#include <assert.h>
#include <iostream>
#include <string>
#include <sstream>
// @Brief: Print Help Menu Screen
void RocmBandwidthTest::PrintHelpScreen() {
......@@ -137,94 +142,37 @@ void RocmBandwidthTest::PrintTopology() {
std::cout << std::endl;
}
void RocmBandwidthTest::PrintAccessMatrix() const {
uint32_t format = 10;
std::cout.setf(ios::left);
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "Device Access";
std::cout << std::endl;
std::cout << std::endl;
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "D/D";
for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
std::cout.width(format);
std::cout << idx0;
}
std::cout << std::endl;
std::cout << std::endl;
for (uint32_t src_idx = 0; src_idx < agent_index_; src_idx++) {
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << src_idx;
for (uint32_t dst_idx = 0; dst_idx < agent_index_; dst_idx++) {
uint32_t path_exists = access_matrix_[(src_idx * agent_index_) + dst_idx];
std::cout.width(format);
std::cout << path_exists;
}
std::cout << std::endl;
std::cout << std::endl;
}
std::cout << std::endl;
}
void RocmBandwidthTest::PrintLinkTypeMatrix() const {
uint32_t format = 10;
std::cout.setf(ios::left);
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "Device Link Types: P = PCIe, X = xGMI, N/A = Not Applicable";
std::cout << std::endl;
std::cout << std::endl;
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "D/D";
for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
std::cout.width(format);
std::cout << idx0;
}
std::cout << std::endl;
std::cout << std::endl;
for (uint32_t src_idx = 0; src_idx < agent_index_; src_idx++) {
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << src_idx;
for (uint32_t dst_idx = 0; dst_idx < agent_index_; dst_idx++) {
uint32_t link_type = link_type_matrix_[(src_idx * agent_index_) + dst_idx];
std::cout.width(format);
if (link_type == LINK_TYPE_XGMI) {
std::cout << "X";
} else if (link_type == LINK_TYPE_PCIE) {
std::cout << "P";
} else if ((link_type == LINK_TYPE_SELF) ||
(link_type == LINK_TYPE_NO_PATH) ||
(link_type == LINK_TYPE_MULTI_HOPS)) {
std::cout << "N/A";
std::string GetValueAsString(uint32_t key, uint32_t value) {
std::stringstream ss;
switch(key) {
case RocmBandwidthTest::LINK_PROP_ACCESS:
ss << value;
return ss.str();
break;
case RocmBandwidthTest::LINK_PROP_HOPS:
case RocmBandwidthTest::LINK_PROP_WEIGHT:
ss << value;
return (value == 0xFFFFFFFF) ? std::string("N/A") : ss.str();
break;
case RocmBandwidthTest::LINK_PROP_TYPE:
if ((value == RocmBandwidthTest::LINK_TYPE_SELF) ||
(value == RocmBandwidthTest::LINK_TYPE_NO_PATH) ||
(value == RocmBandwidthTest::LINK_TYPE_IGNORED)) {
return std::string("N/A");
} else if (value == RocmBandwidthTest::LINK_TYPE_XGMI) {
return std::string("X");
} else if (value == RocmBandwidthTest::LINK_TYPE_PCIE) {
return std::string("P");
}
}
std::cout << std::endl;
std::cout << std::endl;
break;
}
std::cout << std::endl;
std::cout << "An illegal key to get value for" << std::endl;
assert(false);
}
void RocmBandwidthTest::PrintLinkWeightMatrix() const {
void RocmBandwidthTest::PrintLinkPropsMatrix(uint32_t key) const {
uint32_t format = 10;
std::cout.setf(ios::left);
......@@ -232,7 +180,24 @@ void RocmBandwidthTest::PrintLinkWeightMatrix() const {
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "Device Numa Distance";
switch(key) {
case LINK_PROP_ACCESS:
std::cout << "Inter-Device Access";
break;
case LINK_PROP_TYPE:
std::cout << "Inter-Device Link Type: P = PCIe, X = xGMI, N/A = Not Applicable";
break;
case LINK_PROP_HOPS:
std::cout << "Inter-Device Link Hops";
break;
case LINK_PROP_WEIGHT:
std::cout << "Inter-Device Numa Distance";
break;
default:
std::cout << "An illegal key to print matrix" << std::endl;
assert(false);
}
std::cout << std::endl;
std::cout << std::endl;
......@@ -253,13 +218,23 @@ void RocmBandwidthTest::PrintLinkWeightMatrix() const {
std::cout.width(format);
std::cout << src_idx;
for (uint32_t dst_idx = 0; dst_idx < agent_index_; dst_idx++) {
uint32_t link_weight = link_weight_matrix_[(src_idx * agent_index_) + dst_idx];
std::cout.width(format);
if (link_weight == 0xFFFFFFFF) {
std::cout << "N/A";
} else {
std::cout << link_weight;
uint32_t value = 0x00;
switch(key) {
case LINK_PROP_ACCESS:
value = direct_access_matrix_[(src_idx * agent_index_) + dst_idx];
break;
case LINK_PROP_TYPE:
value = link_type_matrix_[(src_idx * agent_index_) + dst_idx];
break;
case LINK_PROP_HOPS:
value = link_hops_matrix_[(src_idx * agent_index_) + dst_idx];
break;
case LINK_PROP_WEIGHT:
value = link_weight_matrix_[(src_idx * agent_index_) + dst_idx];
break;
}
std::cout.width(format);
std::cout << GetValueAsString(key, value);
}
std::cout << std::endl;
std::cout << std::endl;
......
......@@ -152,7 +152,7 @@ void RocmBandwidthTest::Display() const {
if (validate_) {
PrintVersion();
DisplayDevInfo();
PrintAccessMatrix();
PrintLinkPropsMatrix(LINK_PROP_ACCESS);
DisplayValidationMatrix();
return;
}
......@@ -160,9 +160,9 @@ void RocmBandwidthTest::Display() const {
if (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR) {
PrintVersion();
DisplayDevInfo();
PrintAccessMatrix();
PrintLinkWeightMatrix();
PrintLinkTypeMatrix();
PrintLinkPropsMatrix(LINK_PROP_ACCESS);
PrintLinkPropsMatrix(LINK_PROP_WEIGHT);
PrintLinkPropsMatrix(LINK_PROP_TYPE);
DisplayCopyTimeMatrix(true);
return;
}
......@@ -171,9 +171,9 @@ void RocmBandwidthTest::Display() const {
if (bw_default_run_ == NULL) {
PrintVersion();
DisplayDevInfo();
PrintAccessMatrix();
PrintLinkWeightMatrix();
PrintLinkTypeMatrix();
PrintLinkPropsMatrix(LINK_PROP_ACCESS);
PrintLinkPropsMatrix(LINK_PROP_WEIGHT);
PrintLinkPropsMatrix(LINK_PROP_TYPE);
}
DisplayCopyTimeMatrix(true);
return;
......
......@@ -201,6 +201,7 @@ void RocmBandwidthTest::PopulateAccessMatrix() {
// Allocate memory to hold access lists
access_matrix_ = new uint32_t[agent_index_ * agent_index_]();
direct_access_matrix_ = new uint32_t[agent_index_ * agent_index_]();
hsa_status_t status;
uint32_t size = pool_list_.size();
......@@ -225,6 +226,11 @@ void RocmBandwidthTest::PopulateAccessMatrix() {
status = hsa_amd_agent_memory_pool_get_info(src_agent, dst_pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
ErrorCheck(status);
// Record if Src device can access or not
uint32_t path;
path = (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) ? 0 : 1;
direct_access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx] = path;
if ((src_dev_type == HSA_DEVICE_TYPE_CPU) &&
(dst_dev_type == HSA_DEVICE_TYPE_GPU) &&
......@@ -235,7 +241,6 @@ void RocmBandwidthTest::PopulateAccessMatrix() {
}
// Access between the two agents is Non-Existent
uint32_t path;
path = (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) ? 0 : 1;
access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx] = path;
}
......@@ -250,88 +255,54 @@ void RocmBandwidthTest::DiscoverTopology() {
// Populate the access, link type and weight matrices
// Access matrix must be populated first
PopulateAccessMatrix();
DiscoverLinkType();
DiscoverLinkWeight();
DiscoverLinkProps();
}
void RocmBandwidthTest::BindLinkType(uint32_t idx1, uint32_t idx2) {
uint32_t GetLinkType(hsa_device_type_t src_dev_type,
hsa_device_type_t dst_dev_type,
hsa_amd_memory_pool_link_info_t* link_info, uint32_t hops) {
// Agent has no pools so no need to look for link type distance
if (agent_pool_list_[idx2].pool_list.size() == 0) {
link_type_matrix_[(idx1 * agent_index_) + idx2] = LINK_TYPE_NO_PATH;
return;
// Link type is ignored, linkinfo is illegal
// Currently Thunk collapses multi-hop paths into one
// while accumulating their numa weight
// @note: Thunk retains the original link type
if (hops != 1) {
return RocmBandwidthTest::LINK_TYPE_IGNORED;
}
uint32_t hops = 0;
hsa_agent_t agent1 = agent_list_[idx1].agent_;
hsa_amd_memory_pool_t& pool = agent_pool_list_[idx2].pool_list[0].pool_;
err_ = hsa_amd_agent_memory_pool_get_info(agent1, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS, &hops);
if (hops < 1) {
link_type_matrix_[(idx1 * agent_index_) + idx2] = LINK_TYPE_NO_PATH;
return;
// Return link type only if it specified as XGMI
if ((link_info[0]).link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) {
return RocmBandwidthTest::LINK_TYPE_XGMI;
}
hsa_amd_memory_pool_link_info_t* link_info;
uint32_t link_info_sz = hops * sizeof(hsa_amd_memory_pool_link_info_t);
link_info = (hsa_amd_memory_pool_link_info_t *)malloc(link_info_sz);
memset(link_info, 0, (hops * sizeof(hsa_amd_memory_pool_link_info_t)));
err_ = hsa_amd_agent_memory_pool_get_info(agent1, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, link_info);
// Initialize link type based on Src and Dst devices plus link
// type reported by ROCr library
hsa_device_type_t src_dev_type = agent_list_[idx1].device_type_;
hsa_device_type_t dst_dev_type = agent_list_[idx2].device_type_;
link_type_matrix_[(idx1 * agent_index_) + idx2] = LINK_TYPE_NO_PATH;
// Update link matrix if there is one hop. Currently Thunk
// accumulates numa weight of the multiple hops into one link
if (hops == 1) {
if ((link_info[0]).link_type == HSA_AMD_LINK_INFO_TYPE_XGMI) {
link_type_matrix_[(idx1 * agent_index_) + idx2] = LINK_TYPE_XGMI;
free(link_info);
return;
}
// Update link type to be PCIE if one or both devices are GPU's
if ((src_dev_type == HSA_DEVICE_TYPE_GPU) ||
(dst_dev_type == HSA_DEVICE_TYPE_GPU)) {
link_type_matrix_[(idx1 * agent_index_) + idx2] = LINK_TYPE_PCIE;
free(link_info);
return;
}
// In this case all we know is there is a path involving
// one or more links. Since it binding either two GPU's or
// one Gpu and one Cpu, we infer it to be of type PCIe
if ((src_dev_type == HSA_DEVICE_TYPE_GPU) ||
(dst_dev_type == HSA_DEVICE_TYPE_GPU)) {
return RocmBandwidthTest::LINK_TYPE_PCIE;
}
// This should not be happening
link_type_matrix_[(idx1 * agent_index_) + idx2] = LINK_TYPE_MULTI_HOPS;
free(link_info);
// This occurs when both devices are CPU's
return RocmBandwidthTest::LINK_TYPE_IGNORED;
}
void RocmBandwidthTest::DiscoverLinkType() {
uint32_t GetLinkWeight(hsa_amd_memory_pool_link_info_t* link_info, uint32_t hops) {
// Allocate space if it is first time
if (link_type_matrix_ == NULL) {
link_type_matrix_ = new uint32_t[agent_index_ * agent_index_]();
}
agent_info_t agent_info;
for (uint32_t idx1 = 0; idx1 < agent_index_; idx1++) {
for (uint32_t idx2 = 0; idx2 < agent_index_; idx2++) {
if (idx1 == idx2) {
link_type_matrix_[(idx1 * agent_index_) + idx2] = LINK_TYPE_SELF;
continue;
}
BindLinkType(idx1, idx2);
}
uint32_t weight = 0;
for(uint32_t hopIdx = 0; hopIdx < hops; hopIdx++) {
weight += (link_info[hopIdx]).numa_distance;
}
return weight;
}
void RocmBandwidthTest::BindLinkWeight(uint32_t idx1, uint32_t idx2) {
void RocmBandwidthTest::BindLinkProps(uint32_t idx1, uint32_t idx2) {
// Agent has no pools so no need to look for numa distance
if (agent_pool_list_[idx2].pool_list.size() == 0) {
link_hops_matrix_[(idx1 * agent_index_) + idx2] = 0xFFFFFFFF;
link_weight_matrix_[(idx1 * agent_index_) + idx2] = 0xFFFFFFFF;
link_type_matrix_[(idx1 * agent_index_) + idx2] = LINK_TYPE_NO_PATH;
return;
}
......@@ -341,7 +312,9 @@ void RocmBandwidthTest::BindLinkWeight(uint32_t idx1, uint32_t idx2) {
err_ = hsa_amd_agent_memory_pool_get_info(agent1, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS, &hops);
if (hops < 1) {
link_hops_matrix_[(idx1 * agent_index_) + idx2] = 0xFFFFFFFF;
link_weight_matrix_[(idx1 * agent_index_) + idx2] = 0xFFFFFFFF;
link_type_matrix_[(idx1 * agent_index_) + idx2] = LINK_TYPE_NO_PATH;
return;
}
......@@ -351,17 +324,27 @@ void RocmBandwidthTest::BindLinkWeight(uint32_t idx1, uint32_t idx2) {
memset(link_info, 0, (hops * sizeof(hsa_amd_memory_pool_link_info_t)));
err_ = hsa_amd_agent_memory_pool_get_info(agent1, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, link_info);
link_weight_matrix_[(idx1 * agent_index_) + idx2] = 0;
for(uint32_t hopIdx = 0; hopIdx < hops; hopIdx++) {
link_weight_matrix_[(idx1 * agent_index_) + idx2] += (link_info[hopIdx]).numa_distance;
}
link_hops_matrix_[(idx1 * agent_index_) + idx2] = hops;
link_weight_matrix_[(idx1 * agent_index_) + idx2] = GetLinkWeight(link_info, hops);
// Initialize link type based on Src and Dst devices plus link
// type reported by ROCr library
hsa_device_type_t src_dev_type = agent_list_[idx1].device_type_;
hsa_device_type_t dst_dev_type = agent_list_[idx2].device_type_;
link_type_matrix_[(idx1 * agent_index_) + idx2] = GetLinkType(src_dev_type,
dst_dev_type, link_info, hops);
// Free the allocated link block
free(link_info);
}
void RocmBandwidthTest::DiscoverLinkWeight() {
void RocmBandwidthTest::DiscoverLinkProps() {
// Allocate space if it is first time
if (link_weight_matrix_ == NULL) {
link_type_matrix_ = new uint32_t[agent_index_ * agent_index_]();
link_hops_matrix_ = new uint32_t[agent_index_ * agent_index_]();
link_weight_matrix_ = new uint32_t[agent_index_ * agent_index_]();
}
......@@ -369,10 +352,12 @@ void RocmBandwidthTest::DiscoverLinkWeight() {
for (uint32_t idx1 = 0; idx1 < agent_index_; idx1++) {
for (uint32_t idx2 = 0; idx2 < agent_index_; idx2++) {
if (idx1 == idx2) {
link_hops_matrix_[(idx1 * agent_index_) + idx2] = 0;
link_weight_matrix_[(idx1 *agent_index_) + idx2] = 0;
link_type_matrix_[(idx1 * agent_index_) + idx2] = LINK_TYPE_SELF;
continue;
}
BindLinkWeight(idx1, idx2);
BindLinkProps(idx1, idx2);
}
}
}
......
......@@ -172,11 +172,9 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
continue;
}
if (src_dev_idx > dst_dev_idx) {
bool mirror = FindMirrorRequest(src_idx, dst_idx);
if (mirror) {
continue;
}
bool mirror = FindMirrorRequest(src_idx, dst_idx);
if (mirror) {
continue;
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment