Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
rocm_bandwidth_test
Commits
4f4dcfbe
Unverified
Commit
4f4dcfbe
authored
Jan 18, 2019
by
Ramesh Errabolu
Committed by
GitHub
Jan 18, 2019
Browse files
Merge pull request #26 from RadeonOpenCompute/rbtLatency
Enable copy overhead measurement
parents
e809e43f
c6f6ed57
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
249 additions
and
172 deletions
+249
-172
ROCmBandwithTest_UserGuide.pdf
ROCmBandwithTest_UserGuide.pdf
+0
-0
rocm_bandwidth_test.cpp
rocm_bandwidth_test.cpp
+18
-16
rocm_bandwidth_test.hpp
rocm_bandwidth_test.hpp
+16
-1
rocm_bandwidth_test_parse.cpp
rocm_bandwidth_test_parse.cpp
+174
-63
rocm_bandwidth_test_print.cpp
rocm_bandwidth_test_print.cpp
+12
-5
rocm_bandwidth_test_report.cpp
rocm_bandwidth_test_report.cpp
+10
-3
rocm_bandwidth_test_topology.cpp
rocm_bandwidth_test_topology.cpp
+17
-57
rocm_bandwidth_test_trans.cpp
rocm_bandwidth_test_trans.cpp
+2
-27
No files found.
ROCmBandwithTest_UserGuide.pdf
View file @
4f4dcfbe
No preview for this file type
rocm_bandwidth_test.cpp
View file @
4f4dcfbe
...
...
@@ -60,7 +60,17 @@ const uint32_t RocmBandwidthTest::SIZE_LIST[] = { 1 * 1024,
4
*
1024
*
1024
,
8
*
1024
*
1024
,
16
*
1024
*
1024
,
32
*
1024
*
1024
,
64
*
1024
*
1024
,
128
*
1024
*
1024
,
256
*
1024
*
1024
,
512
*
1024
*
1024
};
256
*
1024
*
1024
,
512
*
1024
*
1024
};
const
uint32_t
RocmBandwidthTest
::
LATENCY_SIZE_LIST
[]
=
{
1
,
2
,
4
,
8
,
16
,
32
,
64
,
128
,
256
,
512
,
1
*
1024
,
2
*
1024
,
4
*
1024
,
8
*
1024
,
16
*
1024
,
32
*
1024
,
64
*
1024
,
128
*
1024
,
256
*
1024
,
512
*
1024
};
uint32_t
RocmBandwidthTest
::
GetIterationNum
()
{
return
(
validate_
)
?
1
:
(
num_iteration_
*
1.2
+
1
);
...
...
@@ -76,24 +86,15 @@ void RocmBandwidthTest::AcquirePoolAcceses(uint32_t src_dev_idx,
uint32_t
dst_dev_idx
,
hsa_agent_t
dst_agent
,
void
*
dst
)
{
if
(
access_matrix_
[(
src_dev_idx
*
agent_index_
)
+
dst_dev_idx
]
==
2
)
{
AcquireAccess
(
src_agent
,
dst
);
AcquireAccess
(
dst_agent
,
src
);
return
;
}
// determine which one is a cpu and call acquire on the other agent
hsa_device_type_t
src_dev_type
=
agent_list_
[
src_dev_idx
].
device_type_
;
hsa_device_type_t
dst_dev_type
=
agent_list_
[
dst_dev_idx
].
device_type_
;
if
(
src_dev_type
==
HSA_DEVICE_TYPE_CPU
)
{
AcquireAccess
(
dst_agent
,
src
);
return
;
}
if
(
dst_dev_type
==
HSA_DEVICE_TYPE_CPU
)
{
if
(
src_dev_type
==
HSA_DEVICE_TYPE_GPU
)
{
AcquireAccess
(
src_agent
,
dst
);
return
;
}
else
{
AcquireAccess
(
dst_agent
,
src
);
}
assert
(
false
&&
"Inconsistent state"
);
return
;
}
void
RocmBandwidthTest
::
AllocateHostBuffers
(
uint32_t
size
,
...
...
@@ -541,11 +542,12 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
access_matrix_
=
NULL
;
active_agents_list_
=
NULL
;
latency_
=
false
;
validate_
=
false
;
print_cpu_time_
=
false
;
// Initialize version of the test
version_
.
major_id
=
1
;
version_
.
major_id
=
2
;
version_
.
minor_id
=
0
;
version_
.
step_id
=
0
;
version_
.
reserved
=
0
;
...
...
rocm_bandwidth_test.hpp
View file @
4f4dcfbe
...
...
@@ -233,6 +233,10 @@ class RocmBandwidthTest : public BaseTest {
// build list of transactions
void
ParseArguments
();
// @brief Validate user input of primary operations
void
ValidateInputFlags
(
uint32_t
pf_cnt
,
uint32_t
copy_mask
,
uint32_t
copy_ctrl_mask
);
// @brief: Print the list of transactions
void
PrintTransList
();
...
...
@@ -406,6 +410,14 @@ class RocmBandwidthTest : public BaseTest {
uint32_t
req_copy_unidir_
;
uint32_t
req_copy_all_bidir_
;
uint32_t
req_copy_all_unidir_
;
static
const
uint32_t
USR_SRC_FLAG
=
0x01
;
static
const
uint32_t
USR_DST_FLAG
=
0x02
;
static
const
uint32_t
USR_BUFFER_SIZE
=
0x01
;
static
const
uint32_t
USR_VISIBLE_TIME
=
0x02
;
static
const
uint32_t
DEV_COPY_LATENCY
=
0x04
;
static
const
uint32_t
VALIDATE_COPY_OP
=
0x08
;
// List used to store transactions per user request
vector
<
async_trans_t
>
trans_list_
;
...
...
@@ -444,6 +456,9 @@ class RocmBandwidthTest : public BaseTest {
// Determines if user has requested validation
bool
validate_
;
// Determines the latency overhead of copy operations
bool
latency_
;
// CPU agent used for validation
int32_t
cpu_index_
;
hsa_agent_t
cpu_agent_
;
...
...
@@ -451,8 +466,8 @@ class RocmBandwidthTest : public BaseTest {
// System region
hsa_amd_memory_pool_t
sys_pool_
;
// static const uint32_t SIZE_LIST[4];
static
const
uint32_t
SIZE_LIST
[
20
];
static
const
uint32_t
LATENCY_SIZE_LIST
[
20
];
// Exit value to return in case of error
int32_t
exit_value_
;
...
...
rocm_bandwidth_test_parse.cpp
View file @
4f4dcfbe
...
...
@@ -77,13 +77,83 @@ static bool ParseOptionValue(char* value, vector<uint32_t>&value_list) {
return
true
;
}
void
RocmBandwidthTest
::
ValidateInputFlags
(
uint32_t
pf_cnt
,
uint32_t
copy_mask
,
uint32_t
copy_ctrl_mask
)
{
// Input can't have more than two Primary flags
if
(
pf_cnt
>
2
)
{
PrintHelpScreen
();
exit
(
0
);
}
// Input specifies unidirectional copy among subset of devices
if
(
pf_cnt
==
2
)
{
if
(
copy_mask
!=
(
USR_SRC_FLAG
|
USR_DST_FLAG
))
{
PrintHelpScreen
();
exit
(
0
);
}
}
// Rewrite input if user is requesting validation
if
(
pf_cnt
==
0
)
{
if
(
copy_ctrl_mask
&
VALIDATE_COPY_OP
)
{
req_copy_all_unidir_
=
REQ_COPY_ALL_UNIDIR
;
}
}
// User input for primary operation is valid.
// Determine secondary flags are legal
// Case 1: It is illegal to specify copy size for copy
// operations involving all devices
if
(((
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
||
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
))
&&
(
copy_ctrl_mask
&
USR_BUFFER_SIZE
))
{
PrintHelpScreen
();
exit
(
0
);
}
//
// Case 2: It is illegal to specify Latency for bidirectional
// copy operations or all-unidirectional
if
(((
req_copy_bidir_
==
REQ_COPY_BIDIR
)
||
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
||
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
))
&&
(
copy_ctrl_mask
&
DEV_COPY_LATENCY
))
{
PrintHelpScreen
();
exit
(
0
);
}
//
// Case 3: It is illegal to specify Latency and another secondary
// flag that affects a copy operation
if
((
copy_ctrl_mask
&
DEV_COPY_LATENCY
)
&&
((
copy_ctrl_mask
&
USR_BUFFER_SIZE
)
||
(
copy_ctrl_mask
&
USR_VISIBLE_TIME
)
||
(
copy_ctrl_mask
&
VALIDATE_COPY_OP
)))
{
PrintHelpScreen
();
exit
(
0
);
}
//
// Case 4: It is illegal to request Cpu time along with validation
// of copy operation
if
((
copy_ctrl_mask
&
VALIDATE_COPY_OP
)
&&
((
copy_ctrl_mask
&
USR_BUFFER_SIZE
)
||
(
copy_ctrl_mask
&
USR_VISIBLE_TIME
)))
{
PrintHelpScreen
();
exit
(
0
);
}
}
void
RocmBandwidthTest
::
ParseArguments
()
{
bool
print_help
=
false
;
bool
copy_all_bi
=
false
;
bool
copy_all_uni
=
false
;
bool
print_version
=
false
;
bool
print_topology
=
false
;
uint32_t
copy_mask
=
0
;
uint32_t
copy_ctrl_mask
=
0
;
uint32_t
num_primary_flags
=
0
;
// This will suppress prints from getopt implementation
// In case of error, it will return the character '?' as
...
...
@@ -92,49 +162,45 @@ void RocmBandwidthTest::ParseArguments() {
int
opt
;
bool
status
;
while
((
opt
=
getopt
(
usr_argc_
,
usr_argv_
,
"hq
vct
aAb:s:d:r:w:m:"
))
!=
-
1
)
{
while
((
opt
=
getopt
(
usr_argc_
,
usr_argv_
,
"hq
tclv
aAb:s:d:r:w:m:"
))
!=
-
1
)
{
switch
(
opt
)
{
// Print help screen
case
'h'
:
print_help
=
true
;
num_primary_flags
++
;
break
;
// Print version of the test
case
'q'
:
print_version
=
true
;
break
;
// Print Cpu time
case
'c'
:
print_cpu_time_
=
true
;
num_primary_flags
++
;
break
;
// Print system topology
case
't'
:
print_topology
=
true
;
num_primary_flags
++
;
break
;
//
Set validation mode flag to true
case
'
v
'
:
validate_
=
true
;
//
Enable Unidirectional copy among all valid buffers
case
'
a
'
:
num_primary_flags
++
;
req_copy_all_unidir_
=
REQ_COPY_ALL_UNIDIR
;
break
;
// Collect list of agents involved in bidirectional copy operation
case
'b'
:
status
=
ParseOptionValue
(
optarg
,
bidir_list_
);
if
(
status
)
{
req_copy_bidir_
=
REQ_COPY_BIDIR
;
break
;
}
print_help
=
true
;
// Enable Bidirectional copy among all valid buffers
case
'A'
:
num_primary_flags
++
;
req_copy_all_bidir_
=
REQ_COPY_ALL_BIDIR
;
break
;
// Collect list of source buffers involved in unidirectional copy operation
case
's'
:
status
=
ParseOptionValue
(
optarg
,
src_list_
);
if
(
status
)
{
num_primary_flags
++
;
copy_mask
|=
USR_SRC_FLAG
;
req_copy_unidir_
=
REQ_COPY_UNIDIR
;
break
;
}
...
...
@@ -145,12 +211,51 @@ void RocmBandwidthTest::ParseArguments() {
case
'd'
:
status
=
ParseOptionValue
(
optarg
,
dst_list_
);
if
(
status
)
{
num_primary_flags
++
;
copy_mask
|=
USR_DST_FLAG
;
req_copy_unidir_
=
REQ_COPY_UNIDIR
;
break
;
}
print_help
=
true
;
break
;
// Collect list of agents involved in bidirectional copy operation
case
'b'
:
status
=
ParseOptionValue
(
optarg
,
bidir_list_
);
if
(
status
)
{
req_copy_bidir_
=
REQ_COPY_BIDIR
;
break
;
}
print_help
=
true
;
break
;
// Size of buffers to use in copy and read/write operations
case
'm'
:
status
=
ParseOptionValue
(
optarg
,
size_list_
);
if
(
status
==
false
)
{
print_help
=
true
;
}
copy_ctrl_mask
|=
USR_BUFFER_SIZE
;
break
;
// Print Cpu time
case
'c'
:
print_cpu_time_
=
true
;
copy_ctrl_mask
|=
USR_VISIBLE_TIME
;
break
;
// Set Latency mode flag to true
case
'l'
:
latency_
=
true
;
copy_ctrl_mask
|=
DEV_COPY_LATENCY
;
break
;
// Set validation mode flag to true
case
'v'
:
validate_
=
true
;
copy_ctrl_mask
|=
VALIDATE_COPY_OP
;
break
;
// Collect request to read a buffer
case
'r'
:
req_read_
=
REQ_READ
;
...
...
@@ -169,33 +274,13 @@ void RocmBandwidthTest::ParseArguments() {
}
break
;
// Size of buffers to use in copy and read/write operations
case
'm'
:
status
=
ParseOptionValue
(
optarg
,
size_list_
);
if
(
status
==
false
)
{
print_help
=
true
;
}
break
;
// Enable Unidirectional copy among all valid buffers
case
'a'
:
copy_all_uni
=
true
;
req_copy_all_unidir_
=
REQ_COPY_ALL_UNIDIR
;
break
;
// Enable Bidirectional copy among all valid buffers
case
'A'
:
copy_all_bi
=
true
;
req_copy_all_bidir_
=
REQ_COPY_ALL_BIDIR
;
break
;
// getopt implementation returns the value of the unknown
// option or an option with missing operand in the variable
// optopt
case
'?'
:
std
::
cout
<<
"Argument is illegal or needs value: "
<<
'?'
<<
std
::
endl
;
if
((
optopt
==
'b'
||
optopt
==
's'
||
optopt
==
'd'
||
optopt
==
'
e
'
))
{
std
::
cout
<<
"Error: Option -b -s -d and -
e
require argument"
<<
std
::
endl
;
if
((
optopt
==
'b'
||
optopt
==
's'
||
optopt
==
'd'
||
optopt
==
'
m
'
))
{
std
::
cout
<<
"Error: Option -b -s -d and -
m
require argument"
<<
std
::
endl
;
}
print_help
=
true
;
break
;
...
...
@@ -204,6 +289,9 @@ void RocmBandwidthTest::ParseArguments() {
break
;
}
}
// Determine input of primary flags is valid
ValidateInputFlags
(
num_primary_flags
,
copy_mask
,
copy_ctrl_mask
);
// Print help screen if user option has "-h"
if
(
print_help
)
{
...
...
@@ -232,27 +320,18 @@ void RocmBandwidthTest::ParseArguments() {
exit
(
0
);
}
// Invalidate request if user has requested full
// copying for both unidirectional and bidirectional
if
((
copy_all_bi
)
&&
(
copy_all_uni
))
{
PrintHelpScreen
();
exit
(
0
);
}
// Initialize buffer list if full copying in unidirectional mode is enabled
if
((
copy_all_uni
)
||
(
validate_
))
{
// Initialize buffer list if full copying in unidirectional
// or bidirectional mode is enabled
if
((
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
)
||
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
))
{
uint32_t
size
=
pool_list_
.
size
();
for
(
uint32_t
idx
=
0
;
idx
<
size
;
idx
++
)
{
src_list_
.
push_back
(
idx
);
dst_list_
.
push_back
(
idx
);
}
}
// Initialize buffer list if full copying in bidirectional mode is enabled
if
(
copy_all_bi
)
{
uint32_t
size
=
pool_list_
.
size
();
for
(
uint32_t
idx
=
0
;
idx
<
size
;
idx
++
)
{
bidir_list_
.
push_back
(
idx
);
if
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
{
bidir_list_
.
push_back
(
idx
);
}
else
{
src_list_
.
push_back
(
idx
);
dst_list_
.
push_back
(
idx
);
}
}
}
...
...
@@ -261,12 +340,43 @@ void RocmBandwidthTest::ParseArguments() {
if
(
size_list_
.
size
()
==
0
)
{
uint32_t
size_len
=
sizeof
(
SIZE_LIST
)
/
sizeof
(
uint32_t
);
for
(
uint32_t
idx
=
0
;
idx
<
size_len
;
idx
++
)
{
if
((
copy_all_bi
)
||
(
copy_all_uni
)
||
(
validate_
))
{
if
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
{
if
(
idx
==
16
)
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
else
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
if
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
)
{
if
(
idx
==
16
)
{
if
(
latency_
==
false
)
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
else
{
size_list_
.
push_back
(
LATENCY_SIZE_LIST
[
3
]);
// size of 8 bytes
}
}
}
if
(
req_copy_unidir_
==
REQ_COPY_UNIDIR
)
{
if
(
latency_
)
{
size_list_
.
push_back
(
LATENCY_SIZE_LIST
[
idx
]);
}
else
if
(
validate_
)
{
if
(
idx
==
16
)
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
else
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
if
(
req_copy_bidir_
==
REQ_COPY_BIDIR
)
{
if
(
validate_
)
{
if
(
idx
==
16
)
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
else
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
}
}
else
{
...
...
@@ -275,6 +385,7 @@ void RocmBandwidthTest::ParseArguments() {
size_list_
[
idx
]
=
size_list_
[
idx
]
*
1024
*
1024
;
}
}
std
::
sort
(
size_list_
.
begin
(),
size_list_
.
end
());
}
rocm_bandwidth_test_print.cpp
View file @
4f4dcfbe
...
...
@@ -52,6 +52,7 @@ void RocmBandwidthTest::PrintHelpScreen() {
std
::
cout
<<
"
\t
-h Prints the help screen"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-q Query version of the test"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-v Run the test in validation mode"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-l Run test to collect Latency data"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-c Time the operation using CPU Timers"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-t Prints system topology and allocatable memory info"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-m List of buffer sizes to use, specified in Megabytes"
<<
std
::
endl
;
...
...
@@ -61,6 +62,17 @@ void RocmBandwidthTest::PrintHelpScreen() {
std
::
cout
<<
"
\t
-a Perform Unidirectional Copy involving all device combinations"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-A Perform Bidirectional Copy involving all device combinations"
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
"
\t
NOTE: Mixing following options is illegal/unsupported"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 1: rocm_bandwidth_test -a or -A with -m"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 2: rocm_bandwidth_test -b or -A with -l"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 3: rocm_bandwidth_test -a or -s x -d with -l and -c"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 4: rocm_bandwidth_test -a or -s x -d with -l and -m"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 5: rocm_bandwidth_test -a or -s x -d with -l and -v"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 6: rocm_bandwidth_test -a or -A -b or -s x -d y with -v and -c"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 7: rocm_bandwidth_test -a or -A -b or -s x -d y with -v and -m"
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
...
...
@@ -119,11 +131,6 @@ void RocmBandwidthTest::PrintTopology() {
std
::
cout
<<
" Allocatable Memory Size (KB): "
<<
node
.
pool_list
.
at
(
jdx
).
allocable_size_
/
1024
<<
std
::
endl
;
/*
std::cout << " is fine-grained: "
<< node.pool_list.at(jdx).is_fine_grained_ << std::endl;
*/
}
std
::
cout
<<
std
::
endl
;
}
...
...
rocm_bandwidth_test_report.cpp
View file @
4f4dcfbe
...
...
@@ -48,11 +48,13 @@
#include <algorithm>
static
void
printRecord
(
uint32_t
size
,
double
avg_time
,
double
bandwidth
,
double
min_time
,
double
avg_
bandwidth
,
double
min_time
,
double
peak_bandwidth
)
{
std
::
stringstream
size_str
;
if
(
size
<
1024
*
1024
)
{
if
(
size
<
1024
)
{
size_str
<<
size
<<
" Bytes"
;
}
else
if
(
size
<
1024
*
1024
)
{
size_str
<<
size
/
1024
<<
" KB"
;
}
else
{
size_str
<<
size
/
(
1024
*
1024
)
<<
" MB"
;
...
...
@@ -66,7 +68,7 @@ static void printRecord(uint32_t size, double avg_time,
std
::
cout
.
width
(
format
);
std
::
cout
<<
(
avg_time
*
1e6
);
std
::
cout
.
width
(
format
);
std
::
cout
<<
bandwidth
;
std
::
cout
<<
avg_
bandwidth
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
(
min_time
*
1e6
);
std
::
cout
.
width
(
format
);
...
...
@@ -175,6 +177,11 @@ void RocmBandwidthTest::Display() const {
return
;
}
if
((
req_copy_bidir_
==
REQ_COPY_BIDIR
)
||
(
req_copy_unidir_
==
REQ_COPY_UNIDIR
))
{
PrintVersion
();
}
for
(
uint32_t
idx
=
0
;
idx
<
trans_size
;
idx
++
)
{
async_trans_t
trans
=
trans_list_
[
idx
];
if
((
trans
.
req_type_
==
REQ_COPY_BIDIR
)
||
...
...
rocm_bandwidth_test_topology.cpp
View file @
4f4dcfbe
...
...
@@ -206,78 +206,38 @@ void RocmBandwidthTest::PopulateAccessMatrix() {
uint32_t
size
=
pool_list_
.
size
();
for
(
uint32_t
src_idx
=
0
;
src_idx
<
size
;
src_idx
++
)
{
//
D
et
ermine if the pool belongs to Cpu and is coarse-grained
//
G
et
handle of Src agent of the pool
uint32_t
src_dev_idx
=
pool_list_
[
src_idx
].
agent_index_
;
hsa_device_type_t
src_dev_type
=
agent_list_
[
src_dev_idx
].
device_type_
;
/*
* This block of code makes sense only if both Fine and Coarse
* grained memory pools are captured. This does not make sense
* if only of them is captured
if (src_dev_type == HSA_DEVICE_TYPE_CPU) {
bool src_fine_grained = pool_list_[src_idx].is_fine_grained_;
if (src_fine_grained == false) {
continue;
}
}
*/
hsa_agent_t
src_agent
=
pool_list_
[
src_idx
].
owner_agent_
;
hsa_amd_memory_pool_t
src_pool
=
pool_list_
[
src_idx
].
pool_
;
hsa_device_type_t
src_dev_type
=
agent_list_
[
src_dev_idx
].
device_type_
;
for
(
uint32_t
dst_idx
=
0
;
dst_idx
<
size
;
dst_idx
++
)
{
//
D
et
ermine if the pool belongs to Cpu and is coarse-grained
//
G
et
handle of Dst pool
uint32_t
dst_dev_idx
=
pool_list_
[
dst_idx
].
agent_index_
;
hsa_device_type_t
dst_dev_type
=
agent_list_
[
dst_dev_idx
].
device_type_
;
/*
* This block of code makes sense only if both Fine and Coarse
* grained memory pools are captured. This does not make sense
* if only of them is captured
if (dst_dev_type == HSA_DEVICE_TYPE_CPU) {
bool dst_fine_grained = pool_list_[dst_idx].is_fine_grained_;
if (dst_fine_grained == false) {
continue;
}
}
*/
hsa_agent_t
dst_agent
=
pool_list_
[
dst_idx
].
owner_agent_
;
hsa_amd_memory_pool_t
dst_pool
=
pool_list_
[
dst_idx
].
pool_
;
hsa_device_type_t
dst_dev_type
=
agent_list_
[
dst_dev_idx
].
device_type_
;
// Determine if
accessibility to dst pool for src agent is not denied
hsa_amd_memory_pool_access_t
access
1
;
// Determine if
src agent has access to dst pool
hsa_amd_memory_pool_access_t
access
;
status
=
hsa_amd_agent_memory_pool_get_info
(
src_agent
,
dst_pool
,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS
,
&
access
1
);
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS
,
&
access
);
ErrorCheck
(
status
);
// Determine if accessibility to src pool for dst agent is not denied
hsa_amd_memory_pool_access_t
access2
;
status
=
hsa_amd_agent_memory_pool_get_info
(
dst_agent
,
src_pool
,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS
,
&
access2
);
// Access between the two agents is Non-Existent
if
((
access1
==
HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED
)
&&
(
access2
==
HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED
))
{
access_matrix_
[(
src_dev_idx
*
agent_index_
)
+
dst_dev_idx
]
=
0
;
}
// Access between the two agents is Unidirectional
if
((
access1
==
HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED
)
||
(
access2
==
HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED
))
{
if
((
src_dev_type
==
HSA_DEVICE_TYPE_GPU
)
&&
(
dst_dev_type
==
HSA_DEVICE_TYPE_GPU
))
{
access_matrix_
[(
src_dev_idx
*
agent_index_
)
+
dst_dev_idx
]
=
0
;
}
else
{
access_matrix_
[(
src_dev_idx
*
agent_index_
)
+
dst_dev_idx
]
=
1
;
}
if
((
src_dev_type
==
HSA_DEVICE_TYPE_CPU
)
&&
(
dst_dev_type
==
HSA_DEVICE_TYPE_GPU
)
&&
(
access
==
HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED
))
{
status
=
hsa_amd_agent_memory_pool_get_info
(
dst_agent
,
src_pool
,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS
,
&
access
);
ErrorCheck
(
status
);
}
// Access between the two agents is Bidirectional
if
((
access1
!=
HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED
)
&&
(
access2
!=
HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED
))
{
access_matrix_
[(
src_dev_idx
*
agent_index_
)
+
dst_dev_idx
]
=
2
;
}
// Access between the two agents is Non-Existent
uint32_t
path
;
path
=
(
access
==
HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED
)
?
0
:
1
;
access_matrix_
[(
src_dev_idx
*
agent_index_
)
+
dst_dev_idx
]
=
path
;
}
}
}
...
...
rocm_bandwidth_test_trans.cpp
View file @
4f4dcfbe
...
...
@@ -125,48 +125,22 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
uint32_t
src_size
=
src_list
.
size
();
uint32_t
dst_size
=
dst_list
.
size
();
// hsa_status_t status;
// hsa_amd_memory_pool_access_t access;
for
(
uint32_t
idx
=
0
;
idx
<
src_size
;
idx
++
)
{
// Retrieve Roc runtime handles for Src memory pool and agents
uint32_t
src_idx
=
src_list
[
idx
];
uint32_t
src_dev_idx
=
pool_list_
[
src_idx
].
agent_index_
;
// hsa_agent_t src_agent = pool_list_[src_idx].owner_agent_;
hsa_amd_memory_pool_t
src_pool
=
pool_list_
[
src_idx
].
pool_
;
// bool src_fine_grained = pool_list_[src_idx].is_fine_grained_;
hsa_device_type_t
src_dev_type
=
agent_list_
[
src_dev_idx
].
device_type_
;
/*
* This block of code makes sense only if both Fine and Coarse
* grained memory pools are captured. This does not make sense
* if only of them is captured
filter_out = FilterCpuPool(req_type, src_dev_type, src_fine_grained);
if (filter_out) {
continue;
}
*/
for
(
uint32_t
jdx
=
0
;
jdx
<
dst_size
;
jdx
++
)
{
// Retrieve Roc runtime handles for Dst memory pool and agents
uint32_t
dst_idx
=
dst_list
[
jdx
];
uint32_t
dst_dev_idx
=
pool_list_
[
dst_idx
].
agent_index_
;
// hsa_agent_t dst_agent = pool_list_[dst_idx].owner_agent_;
hsa_amd_memory_pool_t
dst_pool
=
pool_list_
[
dst_idx
].
pool_
;
// bool dst_fine_grained = pool_list_[dst_idx].is_fine_grained_;
hsa_device_type_t
dst_dev_type
=
agent_list_
[
dst_dev_idx
].
device_type_
;
/*
* This block of code makes sense only if both Fine and Coarse
* grained memory pools are captured. This does not make sense
* if only of them is captured
filter_out = FilterCpuPool(req_type, dst_dev_type, dst_fine_grained);
if (filter_out) {
continue;
}
*/
// Filter out transactions that involve only Cpu agents/devices
// without regard to type of request, default run, partial or full
// unidirectional or bidirectional copies
...
...
@@ -185,7 +159,7 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
}
}
// Determine if accessibility to
src
pool for
dst
agent is not denied
// Determine if accessibility to
dst
pool for
src
agent is not denied
uint32_t
path_exists
=
access_matrix_
[(
src_dev_idx
*
agent_index_
)
+
dst_dev_idx
];
if
(
path_exists
==
0
)
{
if
((
req_type
==
REQ_COPY_ALL_BIDIR
)
||
...
...
@@ -325,6 +299,7 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
}
// Copy operation does not involve a Gpu device
// Divide bandwidth with 10^9 to get size in GigaBytes (10^9)
if
(
trans
.
copy
.
uses_gpu_
!=
true
)
{
avg_time
=
trans
.
cpu_avg_time_
[
idx
];
min_time
=
trans
.
cpu_min_time_
[
idx
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment