Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
rocm_bandwidth_test
Commits
fca6eaa8
Unverified
Commit
fca6eaa8
authored
May 07, 2019
by
Ramesh Errabolu
Committed by
GitHub
May 07, 2019
Browse files
Merge pull request #35 from RadeonOpenCompute/printLinkHops
Print number of hops on a link
parents
4208e414
b9c55660
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
177 additions
and
195 deletions
+177
-195
rocm_bandwidth_test.cpp
rocm_bandwidth_test.cpp
+17
-2
rocm_bandwidth_test.hpp
rocm_bandwidth_test.hpp
+24
-16
rocm_bandwidth_test_parse.cpp
rocm_bandwidth_test_parse.cpp
+4
-3
rocm_bandwidth_test_print.cpp
rocm_bandwidth_test_print.cpp
+66
-91
rocm_bandwidth_test_report.cpp
rocm_bandwidth_test_report.cpp
+7
-7
rocm_bandwidth_test_topology.cpp
rocm_bandwidth_test_topology.cpp
+56
-71
rocm_bandwidth_test_trans.cpp
rocm_bandwidth_test_trans.cpp
+3
-5
No files found.
rocm_bandwidth_test.cpp
View file @
fca6eaa8
...
@@ -197,9 +197,24 @@ double RocmBandwidthTest::GetGpuCopyTime(bool bidir,
...
@@ -197,9 +197,24 @@ double RocmBandwidthTest::GetGpuCopyTime(bool bidir,
hsa_amd_profiling_async_copy_time_t
async_time_rev
=
{
0
};
hsa_amd_profiling_async_copy_time_t
async_time_rev
=
{
0
};
err_
=
hsa_amd_profiling_get_async_copy_time
(
signal_rev
,
&
async_time_rev
);
err_
=
hsa_amd_profiling_get_async_copy_time
(
signal_rev
,
&
async_time_rev
);
ErrorCheck
(
err_
);
ErrorCheck
(
err_
);
// Compute time taken to copy
double
start
=
min
(
async_time_fwd
.
start
,
async_time_rev
.
start
);
double
start
=
min
(
async_time_fwd
.
start
,
async_time_rev
.
start
);
double
end
=
max
(
async_time_fwd
.
end
,
async_time_rev
.
end
);
double
end
=
max
(
async_time_fwd
.
end
,
async_time_rev
.
end
);
return
(
end
-
start
);
double
copy_time
=
end
-
start
;
// Forward copy completed before Reverse began
if
(
async_time_fwd
.
end
<
async_time_rev
.
start
)
{
return
(
copy_time
-
(
async_time_rev
.
start
-
async_time_fwd
.
end
));
}
// Reverse copy completed before Forward began
if
(
async_time_rev
.
end
<
async_time_fwd
.
start
)
{
return
(
copy_time
-
(
async_time_fwd
.
start
-
async_time_rev
.
end
));
}
// Forward and Reverse copies overlapped
return
copy_time
;
}
}
void
RocmBandwidthTest
::
copy_buffer
(
void
*
dst
,
hsa_agent_t
dst_agent
,
void
RocmBandwidthTest
::
copy_buffer
(
void
*
dst
,
hsa_agent_t
dst_agent
,
...
@@ -544,7 +559,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
...
@@ -544,7 +559,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
// Initialize version of the test
// Initialize version of the test
version_
.
major_id
=
2
;
version_
.
major_id
=
2
;
version_
.
minor_id
=
1
;
version_
.
minor_id
=
2
;
version_
.
step_id
=
0
;
version_
.
step_id
=
0
;
version_
.
reserved
=
0
;
version_
.
reserved
=
0
;
...
...
rocm_bandwidth_test.hpp
View file @
fca6eaa8
...
@@ -207,13 +207,9 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -207,13 +207,9 @@ class RocmBandwidthTest : public BaseTest {
// @brief: Discover the topology of pools on Rocm Platform
// @brief: Discover the topology of pools on Rocm Platform
void
DiscoverTopology
();
void
DiscoverTopology
();
// @brief: Populate link type for the set of agents
// @brief: Populate link properties for the set of agents
void
DiscoverLinkType
();
void
DiscoverLinkProps
();
void
BindLinkType
(
uint32_t
idx1
,
uint32_t
idx2
);
void
BindLinkProps
(
uint32_t
idx1
,
uint32_t
idx2
);
// @brief: Populate link weight for the set of agents
void
DiscoverLinkWeight
();
void
BindLinkWeight
(
uint32_t
idx1
,
uint32_t
idx2
);
// @brief: Populates the access matrix
// @brief: Populates the access matrix
void
PopulateAccessMatrix
();
void
PopulateAccessMatrix
();
...
@@ -221,14 +217,10 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -221,14 +217,10 @@ class RocmBandwidthTest : public BaseTest {
// @brief: Print topology info
// @brief: Print topology info
void
PrintTopology
();
void
PrintTopology
();
// @brief: Print link type matrix
// @brief: Print in matrix form various
void
PrintLinkTypeMatrix
()
const
;
// properties such as access, link weight,
// link type and number of hops, etc
// @brief: Print link weight matrix
void
PrintLinkPropsMatrix
(
uint32_t
key
)
const
;
void
PrintLinkWeightMatrix
()
const
;
// @brief: Print access matrix
void
PrintAccessMatrix
()
const
;
// @brief: Print info on agents in system
// @brief: Print info on agents in system
void
PrintAgentsList
();
void
PrintAgentsList
();
...
@@ -343,6 +335,15 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -343,6 +335,15 @@ class RocmBandwidthTest : public BaseTest {
// Populate the Bus Device Function of Gpu device
// Populate the Bus Device Function of Gpu device
friend
void
PopulateBDF
(
uint32_t
bdf_id
,
agent_info_t
*
agent_info
);
friend
void
PopulateBDF
(
uint32_t
bdf_id
,
agent_info_t
*
agent_info
);
// Compute the type and weight of a link
friend
uint32_t
GetLinkType
(
hsa_device_type_t
src_dev_type
,
hsa_device_type_t
dst_dev_type
,
hsa_amd_memory_pool_link_info_t
*
link_info
,
uint32_t
hops
);
friend
uint32_t
GetLinkWeight
(
hsa_amd_memory_pool_link_info_t
*
link_info
,
uint32_t
hops
);
// Return value of input key as string
friend
std
::
string
GetValueAsString
(
uint32_t
key
,
uint32_t
value
);
// Structure of Version used to identify an instance of RocmBandwidthTest
// Structure of Version used to identify an instance of RocmBandwidthTest
struct
RocmBandwidthVersion
{
struct
RocmBandwidthVersion
{
...
@@ -433,9 +434,14 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -433,9 +434,14 @@ class RocmBandwidthTest : public BaseTest {
static
const
uint32_t
LINK_TYPE_SELF
=
0x00
;
static
const
uint32_t
LINK_TYPE_SELF
=
0x00
;
static
const
uint32_t
LINK_TYPE_PCIE
=
0x01
;
static
const
uint32_t
LINK_TYPE_PCIE
=
0x01
;
static
const
uint32_t
LINK_TYPE_XGMI
=
0x02
;
static
const
uint32_t
LINK_TYPE_XGMI
=
0x02
;
static
const
uint32_t
LINK_TYPE_
MULTI_HOPS
=
0x03
;
static
const
uint32_t
LINK_TYPE_
IGNORED
=
0x03
;
static
const
uint32_t
LINK_TYPE_NO_PATH
=
0xFFFFFFFF
;
static
const
uint32_t
LINK_TYPE_NO_PATH
=
0xFFFFFFFF
;
static
const
uint32_t
LINK_PROP_HOPS
=
0x00
;
static
const
uint32_t
LINK_PROP_TYPE
=
0x01
;
static
const
uint32_t
LINK_PROP_WEIGHT
=
0x02
;
static
const
uint32_t
LINK_PROP_ACCESS
=
0x03
;
// List used to store transactions per user request
// List used to store transactions per user request
vector
<
async_trans_t
>
trans_list_
;
vector
<
async_trans_t
>
trans_list_
;
...
@@ -444,8 +450,10 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -444,8 +450,10 @@ class RocmBandwidthTest : public BaseTest {
// Matrix used to track Access among agents
// Matrix used to track Access among agents
uint32_t
*
access_matrix_
;
uint32_t
*
access_matrix_
;
uint32_t
*
link_hops_matrix_
;
uint32_t
*
link_type_matrix_
;
uint32_t
*
link_type_matrix_
;
uint32_t
*
link_weight_matrix_
;
uint32_t
*
link_weight_matrix_
;
uint32_t
*
direct_access_matrix_
;
// Env key to determine if Fine-grained or
// Env key to determine if Fine-grained or
// Coarse-grained pool should be filtered out
// Coarse-grained pool should be filtered out
...
...
rocm_bandwidth_test_parse.cpp
View file @
fca6eaa8
...
@@ -317,10 +317,11 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -317,10 +317,11 @@ void RocmBandwidthTest::ParseArguments() {
// Print system topology if user option has "-t"
// Print system topology if user option has "-t"
if
(
print_topology
)
{
if
(
print_topology
)
{
PrintVersion
();
PrintTopology
();
PrintTopology
();
Print
AccessMatrix
(
);
Print
LinkPropsMatrix
(
LINK_PROP_ACCESS
);
PrintLink
Weight
Matrix
();
PrintLink
Props
Matrix
(
LINK_PROP_TYPE
);
PrintLink
Type
Matrix
();
PrintLink
Props
Matrix
(
LINK_PROP_WEIGHT
);
exit
(
0
);
exit
(
0
);
}
}
...
...
rocm_bandwidth_test_print.cpp
View file @
fca6eaa8
...
@@ -43,6 +43,11 @@
...
@@ -43,6 +43,11 @@
#include "common.hpp"
#include "common.hpp"
#include "rocm_bandwidth_test.hpp"
#include "rocm_bandwidth_test.hpp"
#include <assert.h>
#include <iostream>
#include <string>
#include <sstream>
// @Brief: Print Help Menu Screen
// @Brief: Print Help Menu Screen
void
RocmBandwidthTest
::
PrintHelpScreen
()
{
void
RocmBandwidthTest
::
PrintHelpScreen
()
{
...
@@ -137,94 +142,37 @@ void RocmBandwidthTest::PrintTopology() {
...
@@ -137,94 +142,37 @@ void RocmBandwidthTest::PrintTopology() {
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
}
}
void
RocmBandwidthTest
::
PrintAccessMatrix
()
const
{
std
::
string
GetValueAsString
(
uint32_t
key
,
uint32_t
value
)
{
uint32_t
format
=
10
;
std
::
stringstream
ss
;
std
::
cout
.
setf
(
ios
::
left
);
switch
(
key
)
{
std
::
cout
.
width
(
format
);
case
RocmBandwidthTest
::
LINK_PROP_ACCESS
:
std
::
cout
<<
""
;
ss
<<
value
;
std
::
cout
.
width
(
format
);
return
ss
.
str
();
break
;
std
::
cout
<<
"Device Access"
;
case
RocmBandwidthTest
::
LINK_PROP_HOPS
:
std
::
cout
<<
std
::
endl
;
case
RocmBandwidthTest
::
LINK_PROP_WEIGHT
:
std
::
cout
<<
std
::
endl
;
ss
<<
value
;
return
(
value
==
0xFFFFFFFF
)
?
std
::
string
(
"N/A"
)
:
ss
.
str
();
std
::
cout
.
width
(
format
);
break
;
std
::
cout
<<
""
;
case
RocmBandwidthTest
::
LINK_PROP_TYPE
:
std
::
cout
.
width
(
format
);
if
((
value
==
RocmBandwidthTest
::
LINK_TYPE_SELF
)
||
std
::
cout
<<
"D/D"
;
(
value
==
RocmBandwidthTest
::
LINK_TYPE_NO_PATH
)
||
for
(
uint32_t
idx0
=
0
;
idx0
<
agent_index_
;
idx0
++
)
{
(
value
==
RocmBandwidthTest
::
LINK_TYPE_IGNORED
))
{
std
::
cout
.
width
(
format
);
return
std
::
string
(
"N/A"
);
std
::
cout
<<
idx0
;
}
else
if
(
value
==
RocmBandwidthTest
::
LINK_TYPE_XGMI
)
{
}
return
std
::
string
(
"X"
);
std
::
cout
<<
std
::
endl
;
}
else
if
(
value
==
RocmBandwidthTest
::
LINK_TYPE_PCIE
)
{
std
::
cout
<<
std
::
endl
;
return
std
::
string
(
"P"
);
for
(
uint32_t
src_idx
=
0
;
src_idx
<
agent_index_
;
src_idx
++
)
{
std
::
cout
.
width
(
format
);
std
::
cout
<<
""
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
src_idx
;
for
(
uint32_t
dst_idx
=
0
;
dst_idx
<
agent_index_
;
dst_idx
++
)
{
uint32_t
path_exists
=
access_matrix_
[(
src_idx
*
agent_index_
)
+
dst_idx
];
std
::
cout
.
width
(
format
);
std
::
cout
<<
path_exists
;
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
}
void
RocmBandwidthTest
::
PrintLinkTypeMatrix
()
const
{
uint32_t
format
=
10
;
std
::
cout
.
setf
(
ios
::
left
);
std
::
cout
.
width
(
format
);
std
::
cout
<<
""
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
"Device Link Types: P = PCIe, X = xGMI, N/A = Not Applicable"
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
""
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
"D/D"
;
for
(
uint32_t
idx0
=
0
;
idx0
<
agent_index_
;
idx0
++
)
{
std
::
cout
.
width
(
format
);
std
::
cout
<<
idx0
;
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
for
(
uint32_t
src_idx
=
0
;
src_idx
<
agent_index_
;
src_idx
++
)
{
std
::
cout
.
width
(
format
);
std
::
cout
<<
""
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
src_idx
;
for
(
uint32_t
dst_idx
=
0
;
dst_idx
<
agent_index_
;
dst_idx
++
)
{
uint32_t
link_type
=
link_type_matrix_
[(
src_idx
*
agent_index_
)
+
dst_idx
];
std
::
cout
.
width
(
format
);
if
(
link_type
==
LINK_TYPE_XGMI
)
{
std
::
cout
<<
"X"
;
}
else
if
(
link_type
==
LINK_TYPE_PCIE
)
{
std
::
cout
<<
"P"
;
}
else
if
((
link_type
==
LINK_TYPE_SELF
)
||
(
link_type
==
LINK_TYPE_NO_PATH
)
||
(
link_type
==
LINK_TYPE_MULTI_HOPS
))
{
std
::
cout
<<
"N/A"
;
}
}
}
break
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
}
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
"An illegal key to get value for"
<<
std
::
endl
;
assert
(
false
);
}
}
void
RocmBandwidthTest
::
PrintLink
Weight
Matrix
()
const
{
void
RocmBandwidthTest
::
PrintLink
Props
Matrix
(
uint32_t
key
)
const
{
uint32_t
format
=
10
;
uint32_t
format
=
10
;
std
::
cout
.
setf
(
ios
::
left
);
std
::
cout
.
setf
(
ios
::
left
);
...
@@ -232,7 +180,24 @@ void RocmBandwidthTest::PrintLinkWeightMatrix() const {
...
@@ -232,7 +180,24 @@ void RocmBandwidthTest::PrintLinkWeightMatrix() const {
std
::
cout
.
width
(
format
);
std
::
cout
.
width
(
format
);
std
::
cout
<<
""
;
std
::
cout
<<
""
;
std
::
cout
.
width
(
format
);
std
::
cout
.
width
(
format
);
std
::
cout
<<
"Device Numa Distance"
;
switch
(
key
)
{
case
LINK_PROP_ACCESS
:
std
::
cout
<<
"Inter-Device Access"
;
break
;
case
LINK_PROP_TYPE
:
std
::
cout
<<
"Inter-Device Link Type: P = PCIe, X = xGMI, N/A = Not Applicable"
;
break
;
case
LINK_PROP_HOPS
:
std
::
cout
<<
"Inter-Device Link Hops"
;
break
;
case
LINK_PROP_WEIGHT
:
std
::
cout
<<
"Inter-Device Numa Distance"
;
break
;
default:
std
::
cout
<<
"An illegal key to print matrix"
<<
std
::
endl
;
assert
(
false
);
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
...
@@ -253,13 +218,23 @@ void RocmBandwidthTest::PrintLinkWeightMatrix() const {
...
@@ -253,13 +218,23 @@ void RocmBandwidthTest::PrintLinkWeightMatrix() const {
std
::
cout
.
width
(
format
);
std
::
cout
.
width
(
format
);
std
::
cout
<<
src_idx
;
std
::
cout
<<
src_idx
;
for
(
uint32_t
dst_idx
=
0
;
dst_idx
<
agent_index_
;
dst_idx
++
)
{
for
(
uint32_t
dst_idx
=
0
;
dst_idx
<
agent_index_
;
dst_idx
++
)
{
uint32_t
link_weight
=
link_weight_matrix_
[(
src_idx
*
agent_index_
)
+
dst_idx
];
uint32_t
value
=
0x00
;
std
::
cout
.
width
(
format
);
switch
(
key
)
{
if
(
link_weight
==
0xFFFFFFFF
)
{
case
LINK_PROP_ACCESS
:
std
::
cout
<<
"N/A"
;
value
=
direct_access_matrix_
[(
src_idx
*
agent_index_
)
+
dst_idx
];
}
else
{
break
;
std
::
cout
<<
link_weight
;
case
LINK_PROP_TYPE
:
value
=
link_type_matrix_
[(
src_idx
*
agent_index_
)
+
dst_idx
];
break
;
case
LINK_PROP_HOPS
:
value
=
link_hops_matrix_
[(
src_idx
*
agent_index_
)
+
dst_idx
];
break
;
case
LINK_PROP_WEIGHT
:
value
=
link_weight_matrix_
[(
src_idx
*
agent_index_
)
+
dst_idx
];
break
;
}
}
std
::
cout
.
width
(
format
);
std
::
cout
<<
GetValueAsString
(
key
,
value
);
}
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
...
...
rocm_bandwidth_test_report.cpp
View file @
fca6eaa8
...
@@ -152,7 +152,7 @@ void RocmBandwidthTest::Display() const {
...
@@ -152,7 +152,7 @@ void RocmBandwidthTest::Display() const {
if
(
validate_
)
{
if
(
validate_
)
{
PrintVersion
();
PrintVersion
();
DisplayDevInfo
();
DisplayDevInfo
();
Print
AccessMatrix
(
);
Print
LinkPropsMatrix
(
LINK_PROP_ACCESS
);
DisplayValidationMatrix
();
DisplayValidationMatrix
();
return
;
return
;
}
}
...
@@ -160,9 +160,9 @@ void RocmBandwidthTest::Display() const {
...
@@ -160,9 +160,9 @@ void RocmBandwidthTest::Display() const {
if
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
)
{
if
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
)
{
PrintVersion
();
PrintVersion
();
DisplayDevInfo
();
DisplayDevInfo
();
Print
AccessMatrix
(
);
Print
LinkPropsMatrix
(
LINK_PROP_ACCESS
);
PrintLink
Weight
Matrix
();
PrintLink
Props
Matrix
(
LINK_PROP_WEIGHT
);
PrintLink
Type
Matrix
();
PrintLink
Props
Matrix
(
LINK_PROP_TYPE
);
DisplayCopyTimeMatrix
(
true
);
DisplayCopyTimeMatrix
(
true
);
return
;
return
;
}
}
...
@@ -171,9 +171,9 @@ void RocmBandwidthTest::Display() const {
...
@@ -171,9 +171,9 @@ void RocmBandwidthTest::Display() const {
if
(
bw_default_run_
==
NULL
)
{
if
(
bw_default_run_
==
NULL
)
{
PrintVersion
();
PrintVersion
();
DisplayDevInfo
();
DisplayDevInfo
();
Print
AccessMatrix
(
);
Print
LinkPropsMatrix
(
LINK_PROP_ACCESS
);
PrintLink
Weight
Matrix
();
PrintLink
Props
Matrix
(
LINK_PROP_WEIGHT
);
PrintLink
Type
Matrix
();
PrintLink
Props
Matrix
(
LINK_PROP_TYPE
);
}
}
DisplayCopyTimeMatrix
(
true
);
DisplayCopyTimeMatrix
(
true
);
return
;
return
;
...
...
rocm_bandwidth_test_topology.cpp
View file @
fca6eaa8
...
@@ -201,6 +201,7 @@ void RocmBandwidthTest::PopulateAccessMatrix() {
...
@@ -201,6 +201,7 @@ void RocmBandwidthTest::PopulateAccessMatrix() {
// Allocate memory to hold access lists
// Allocate memory to hold access lists
access_matrix_
=
new
uint32_t
[
agent_index_
*
agent_index_
]();
access_matrix_
=
new
uint32_t
[
agent_index_
*
agent_index_
]();
direct_access_matrix_
=
new
uint32_t
[
agent_index_
*
agent_index_
]();
hsa_status_t
status
;
hsa_status_t
status
;
uint32_t
size
=
pool_list_
.
size
();
uint32_t
size
=
pool_list_
.
size
();
...
@@ -225,6 +226,11 @@ void RocmBandwidthTest::PopulateAccessMatrix() {
...
@@ -225,6 +226,11 @@ void RocmBandwidthTest::PopulateAccessMatrix() {
status
=
hsa_amd_agent_memory_pool_get_info
(
src_agent
,
dst_pool
,
status
=
hsa_amd_agent_memory_pool_get_info
(
src_agent
,
dst_pool
,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS
,
&
access
);
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS
,
&
access
);
ErrorCheck
(
status
);
ErrorCheck
(
status
);
// Record if Src device can access or not
uint32_t
path
;
path
=
(
access
==
HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED
)
?
0
:
1
;
direct_access_matrix_
[(
src_dev_idx
*
agent_index_
)
+
dst_dev_idx
]
=
path
;
if
((
src_dev_type
==
HSA_DEVICE_TYPE_CPU
)
&&
if
((
src_dev_type
==
HSA_DEVICE_TYPE_CPU
)
&&
(
dst_dev_type
==
HSA_DEVICE_TYPE_GPU
)
&&
(
dst_dev_type
==
HSA_DEVICE_TYPE_GPU
)
&&
...
@@ -235,7 +241,6 @@ void RocmBandwidthTest::PopulateAccessMatrix() {
...
@@ -235,7 +241,6 @@ void RocmBandwidthTest::PopulateAccessMatrix() {
}
}
// Access between the two agents is Non-Existent
// Access between the two agents is Non-Existent
uint32_t
path
;
path
=
(
access
==
HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED
)
?
0
:
1
;
path
=
(
access
==
HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED
)
?
0
:
1
;
access_matrix_
[(
src_dev_idx
*
agent_index_
)
+
dst_dev_idx
]
=
path
;
access_matrix_
[(
src_dev_idx
*
agent_index_
)
+
dst_dev_idx
]
=
path
;
}
}
...
@@ -250,88 +255,54 @@ void RocmBandwidthTest::DiscoverTopology() {
...
@@ -250,88 +255,54 @@ void RocmBandwidthTest::DiscoverTopology() {
// Populate the access, link type and weight matrices
// Populate the access, link type and weight matrices
// Access matrix must be populated first
// Access matrix must be populated first
PopulateAccessMatrix
();
PopulateAccessMatrix
();
DiscoverLinkType
();
DiscoverLinkProps
();
DiscoverLinkWeight
();
}
}
void
RocmBandwidthTest
::
BindLinkType
(
uint32_t
idx1
,
uint32_t
idx2
)
{
uint32_t
GetLinkType
(
hsa_device_type_t
src_dev_type
,
hsa_device_type_t
dst_dev_type
,
hsa_amd_memory_pool_link_info_t
*
link_info
,
uint32_t
hops
)
{
// Agent has no pools so no need to look for link type distance
// Link type is ignored, linkinfo is illegal
if
(
agent_pool_list_
[
idx2
].
pool_list
.
size
()
==
0
)
{
// Currently Thunk collapses multi-hop paths into one
link_type_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
LINK_TYPE_NO_PATH
;
// while accumulating their numa weight
return
;
// @note: Thunk retains the original link type
if
(
hops
!=
1
)
{
return
RocmBandwidthTest
::
LINK_TYPE_IGNORED
;
}
}
uint32_t
hops
=
0
;
// Return link type only if it specified as XGMI
hsa_agent_t
agent1
=
agent_list_
[
idx1
].
agent_
;
if
((
link_info
[
0
]).
link_type
==
HSA_AMD_LINK_INFO_TYPE_XGMI
)
{
hsa_amd_memory_pool_t
&
pool
=
agent_pool_list_
[
idx2
].
pool_list
[
0
].
pool_
;
return
RocmBandwidthTest
::
LINK_TYPE_XGMI
;
err_
=
hsa_amd_agent_memory_pool_get_info
(
agent1
,
pool
,
HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS
,
&
hops
);
if
(
hops
<
1
)
{
link_type_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
LINK_TYPE_NO_PATH
;
return
;
}
}
hsa_amd_memory_pool_link_info_t
*
link_info
;
// In this case all we know is there is a path involving
uint32_t
link_info_sz
=
hops
*
sizeof
(
hsa_amd_memory_pool_link_info_t
);
// one or more links. Since it binding either two GPU's or
link_info
=
(
hsa_amd_memory_pool_link_info_t
*
)
malloc
(
link_info_sz
);
// one Gpu and one Cpu, we infer it to be of type PCIe
memset
(
link_info
,
0
,
(
hops
*
sizeof
(
hsa_amd_memory_pool_link_info_t
)));
if
((
src_dev_type
==
HSA_DEVICE_TYPE_GPU
)
||
err_
=
hsa_amd_agent_memory_pool_get_info
(
agent1
,
pool
,
(
dst_dev_type
==
HSA_DEVICE_TYPE_GPU
))
{
HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO
,
link_info
);
return
RocmBandwidthTest
::
LINK_TYPE_PCIE
;
// Initialize link type based on Src and Dst devices plus link
// type reported by ROCr library
hsa_device_type_t
src_dev_type
=
agent_list_
[
idx1
].
device_type_
;
hsa_device_type_t
dst_dev_type
=
agent_list_
[
idx2
].
device_type_
;
link_type_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
LINK_TYPE_NO_PATH
;
// Update link matrix if there is one hop. Currently Thunk
// accumulates numa weight of the multiple hops into one link
if
(
hops
==
1
)
{
if
((
link_info
[
0
]).
link_type
==
HSA_AMD_LINK_INFO_TYPE_XGMI
)
{
link_type_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
LINK_TYPE_XGMI
;
free
(
link_info
);
return
;
}
// Update link type to be PCIE if one or both devices are GPU's
if
((
src_dev_type
==
HSA_DEVICE_TYPE_GPU
)
||
(
dst_dev_type
==
HSA_DEVICE_TYPE_GPU
))
{
link_type_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
LINK_TYPE_PCIE
;
free
(
link_info
);
return
;
}
}
}
// This should not be happening
// This occurs when both devices are CPU's
link_type_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
LINK_TYPE_MULTI_HOPS
;
return
RocmBandwidthTest
::
LINK_TYPE_IGNORED
;
free
(
link_info
);
}
}
void
RocmBandwidthTest
::
DiscoverLinkType
(
)
{
uint32_t
GetLinkWeight
(
hsa_amd_memory_pool_link_info_t
*
link_info
,
uint32_t
hops
)
{
// Allocate space if it is first time
uint32_t
weight
=
0
;
if
(
link_type_matrix_
==
NULL
)
{
for
(
uint32_t
hopIdx
=
0
;
hopIdx
<
hops
;
hopIdx
++
)
{
link_type_matrix_
=
new
uint32_t
[
agent_index_
*
agent_index_
]();
weight
+=
(
link_info
[
hopIdx
]).
numa_distance
;
}
agent_info_t
agent_info
;
for
(
uint32_t
idx1
=
0
;
idx1
<
agent_index_
;
idx1
++
)
{
for
(
uint32_t
idx2
=
0
;
idx2
<
agent_index_
;
idx2
++
)
{
if
(
idx1
==
idx2
)
{
link_type_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
LINK_TYPE_SELF
;
continue
;
}
BindLinkType
(
idx1
,
idx2
);
}
}
}
return
weight
;
}
}
void
RocmBandwidthTest
::
BindLink
Weight
(
uint32_t
idx1
,
uint32_t
idx2
)
{
void
RocmBandwidthTest
::
BindLink
Props
(
uint32_t
idx1
,
uint32_t
idx2
)
{
// Agent has no pools so no need to look for numa distance
// Agent has no pools so no need to look for numa distance
if
(
agent_pool_list_
[
idx2
].
pool_list
.
size
()
==
0
)
{
if
(
agent_pool_list_
[
idx2
].
pool_list
.
size
()
==
0
)
{
link_hops_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
0xFFFFFFFF
;
link_weight_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
0xFFFFFFFF
;
link_weight_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
0xFFFFFFFF
;
link_type_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
LINK_TYPE_NO_PATH
;
return
;
return
;
}
}
...
@@ -341,7 +312,9 @@ void RocmBandwidthTest::BindLinkWeight(uint32_t idx1, uint32_t idx2) {
...
@@ -341,7 +312,9 @@ void RocmBandwidthTest::BindLinkWeight(uint32_t idx1, uint32_t idx2) {
err_
=
hsa_amd_agent_memory_pool_get_info
(
agent1
,
pool
,
err_
=
hsa_amd_agent_memory_pool_get_info
(
agent1
,
pool
,
HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS
,
&
hops
);
HSA_AMD_AGENT_MEMORY_POOL_INFO_NUM_LINK_HOPS
,
&
hops
);
if
(
hops
<
1
)
{
if
(
hops
<
1
)
{
link_hops_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
0xFFFFFFFF
;
link_weight_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
0xFFFFFFFF
;
link_weight_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
0xFFFFFFFF
;
link_type_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
LINK_TYPE_NO_PATH
;
return
;
return
;
}
}
...
@@ -351,17 +324,27 @@ void RocmBandwidthTest::BindLinkWeight(uint32_t idx1, uint32_t idx2) {
...
@@ -351,17 +324,27 @@ void RocmBandwidthTest::BindLinkWeight(uint32_t idx1, uint32_t idx2) {
memset
(
link_info
,
0
,
(
hops
*
sizeof
(
hsa_amd_memory_pool_link_info_t
)));
memset
(
link_info
,
0
,
(
hops
*
sizeof
(
hsa_amd_memory_pool_link_info_t
)));
err_
=
hsa_amd_agent_memory_pool_get_info
(
agent1
,
pool
,
err_
=
hsa_amd_agent_memory_pool_get_info
(
agent1
,
pool
,
HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO
,
link_info
);
HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO
,
link_info
);
link_weight_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
0
;
for
(
uint32_t
hopIdx
=
0
;
hopIdx
<
hops
;
hopIdx
++
)
{
link_weight_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
+=
(
link_info
[
hopIdx
]).
numa_distance
;
link_hops_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
hops
;
}
link_weight_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
GetLinkWeight
(
link_info
,
hops
);
// Initialize link type based on Src and Dst devices plus link
// type reported by ROCr library
hsa_device_type_t
src_dev_type
=
agent_list_
[
idx1
].
device_type_
;
hsa_device_type_t
dst_dev_type
=
agent_list_
[
idx2
].
device_type_
;
link_type_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
GetLinkType
(
src_dev_type
,
dst_dev_type
,
link_info
,
hops
);
// Free the allocated link block
free
(
link_info
);
free
(
link_info
);
}
}
void
RocmBandwidthTest
::
DiscoverLink
Weight
()
{
void
RocmBandwidthTest
::
DiscoverLink
Props
()
{
// Allocate space if it is first time
// Allocate space if it is first time
if
(
link_weight_matrix_
==
NULL
)
{
if
(
link_weight_matrix_
==
NULL
)
{
link_type_matrix_
=
new
uint32_t
[
agent_index_
*
agent_index_
]();
link_hops_matrix_
=
new
uint32_t
[
agent_index_
*
agent_index_
]();
link_weight_matrix_
=
new
uint32_t
[
agent_index_
*
agent_index_
]();
link_weight_matrix_
=
new
uint32_t
[
agent_index_
*
agent_index_
]();
}
}
...
@@ -369,10 +352,12 @@ void RocmBandwidthTest::DiscoverLinkWeight() {
...
@@ -369,10 +352,12 @@ void RocmBandwidthTest::DiscoverLinkWeight() {
for
(
uint32_t
idx1
=
0
;
idx1
<
agent_index_
;
idx1
++
)
{
for
(
uint32_t
idx1
=
0
;
idx1
<
agent_index_
;
idx1
++
)
{
for
(
uint32_t
idx2
=
0
;
idx2
<
agent_index_
;
idx2
++
)
{
for
(
uint32_t
idx2
=
0
;
idx2
<
agent_index_
;
idx2
++
)
{
if
(
idx1
==
idx2
)
{
if
(
idx1
==
idx2
)
{
link_hops_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
0
;
link_weight_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
0
;
link_weight_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
0
;
link_type_matrix_
[(
idx1
*
agent_index_
)
+
idx2
]
=
LINK_TYPE_SELF
;
continue
;
continue
;
}
}
BindLink
Weight
(
idx1
,
idx2
);
BindLink
Props
(
idx1
,
idx2
);
}
}
}
}
}
}
...
...
rocm_bandwidth_test_trans.cpp
View file @
fca6eaa8
...
@@ -172,11 +172,9 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
...
@@ -172,11 +172,9 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
continue
;
continue
;
}
}
if
(
src_dev_idx
>
dst_dev_idx
)
{
bool
mirror
=
FindMirrorRequest
(
src_idx
,
dst_idx
);
bool
mirror
=
FindMirrorRequest
(
src_idx
,
dst_idx
);
if
(
mirror
)
{
if
(
mirror
)
{
continue
;
continue
;
}
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment