Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
rocm_bandwidth_test
Commits
b389fac3
Commit
b389fac3
authored
May 15, 2019
by
Ramesh Errabolu
Browse files
Changes to support Concurrent copies
parent
f5ee8791
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
436 additions
and
71 deletions
+436
-71
rocm_bandwidth_test.cpp
rocm_bandwidth_test.cpp
+216
-2
rocm_bandwidth_test.hpp
rocm_bandwidth_test.hpp
+26
-9
rocm_bandwidth_test_parse.cpp
rocm_bandwidth_test_parse.cpp
+39
-2
rocm_bandwidth_test_print.cpp
rocm_bandwidth_test_print.cpp
+5
-0
rocm_bandwidth_test_report.cpp
rocm_bandwidth_test_report.cpp
+6
-2
rocm_bandwidth_test_trans.cpp
rocm_bandwidth_test_trans.cpp
+126
-36
rocm_bandwidth_test_validate.cpp
rocm_bandwidth_test_validate.cpp
+18
-20
No files found.
rocm_bandwidth_test.cpp
View file @
b389fac3
...
@@ -159,6 +159,88 @@ bool RocmBandwidthTest::ValidateDstBuffer(size_t max_size, size_t curr_size, voi
...
@@ -159,6 +159,88 @@ bool RocmBandwidthTest::ValidateDstBuffer(size_t max_size, size_t curr_size, voi
return
(
err_
==
HSA_STATUS_SUCCESS
);
return
(
err_
==
HSA_STATUS_SUCCESS
);
}
}
void
RocmBandwidthTest
::
AllocateConcurrentCopyResources
(
bool
bidir
,
vector
<
async_trans_t
>&
trans_list
,
vector
<
void
*>&
buf_list
,
vector
<
hsa_agent_t
>&
dev_list
,
vector
<
uint32_t
>&
dev_idx_list
,
vector
<
hsa_signal_t
>&
sig_list
,
vector
<
hsa_amd_memory_pool_t
>&
pool_list
)
{
// Number of Unidirectional or Bidirectional
// Concurrent Copy transactions in user request
uint32_t
trans_cnt
=
trans_list
.
size
();
size_t
max_size
=
size_list_
.
back
();
// Common variables used in different loops
void
*
buf_src
;
void
*
buf_dst
;
uint32_t
src_idx
;
uint32_t
dst_idx
;
hsa_signal_t
signal
;
hsa_agent_t
src_dev
;
hsa_agent_t
dst_dev
;
uint32_t
src_dev_idx
;
uint32_t
dst_dev_idx
;
hsa_amd_memory_pool_t
src_pool
;
hsa_amd_memory_pool_t
dst_pool
;
// Allocate buffers for the various transactions
for
(
uint32_t
idx
=
0
;
idx
<
trans_cnt
;
idx
++
)
{
async_trans_t
&
trans
=
trans_list
[
idx
];
src_idx
=
trans
.
copy
.
src_idx_
;
dst_idx
=
trans
.
copy
.
dst_idx_
;
src_pool
=
trans
.
copy
.
src_pool_
;
dst_pool
=
trans
.
copy
.
dst_pool_
;
src_dev
=
pool_list_
[
src_idx
].
owner_agent_
;
dst_dev
=
pool_list_
[
dst_idx
].
owner_agent_
;
src_dev_idx
=
pool_list_
[
src_idx
].
agent_index_
;
dst_dev_idx
=
pool_list_
[
dst_idx
].
agent_index_
;
// Allocate buffers and signal for forward copy operation
AllocateCopyBuffers
(
max_size
,
buf_src
,
src_pool
,
buf_dst
,
dst_pool
);
err_
=
hsa_signal_create
(
1
,
0
,
NULL
,
&
signal
);
ErrorCheck
(
err_
);
// Acquire access to destination buffers
AcquirePoolAcceses
(
src_dev_idx
,
src_dev
,
buf_src
,
dst_dev_idx
,
dst_dev
,
buf_dst
);
sig_list
.
push_back
(
signal
);
buf_list
.
push_back
(
buf_src
);
buf_list
.
push_back
(
buf_dst
);
dev_list
.
push_back
(
src_dev
);
dev_list
.
push_back
(
dst_dev
);
dev_idx_list
.
push_back
(
src_dev_idx
);
dev_idx_list
.
push_back
(
dst_dev_idx
);
// For bidirectional copies allocate buffers
// and signal for reverse direction as well
if
(
bidir
)
{
AllocateCopyBuffers
(
max_size
,
buf_src
,
dst_pool
,
buf_dst
,
src_pool
);
err_
=
hsa_signal_create
(
1
,
0
,
NULL
,
&
signal
);
ErrorCheck
(
err_
);
// Acquire access to destination buffers
AcquirePoolAcceses
(
dst_dev_idx
,
dst_dev
,
buf_src
,
src_dev_idx
,
src_dev
,
buf_dst
);
sig_list
.
push_back
(
signal
);
buf_list
.
push_back
(
buf_src
);
buf_list
.
push_back
(
buf_dst
);
dev_list
.
push_back
(
dst_dev
);
dev_list
.
push_back
(
src_dev
);
dev_idx_list
.
push_back
(
dst_dev_idx
);
dev_idx_list
.
push_back
(
src_dev_idx
);
}
}
}
void
RocmBandwidthTest
::
AllocateCopyBuffers
(
size_t
size
,
void
RocmBandwidthTest
::
AllocateCopyBuffers
(
size_t
size
,
void
*&
src
,
hsa_amd_memory_pool_t
src_pool
,
void
*&
src
,
hsa_amd_memory_pool_t
src_pool
,
void
*&
dst
,
hsa_amd_memory_pool_t
dst_pool
)
{
void
*&
dst
,
hsa_amd_memory_pool_t
dst_pool
)
{
...
@@ -231,7 +313,6 @@ void RocmBandwidthTest::WaitForCopyCompletion(vector<hsa_signal_t>& signal_list)
...
@@ -231,7 +313,6 @@ void RocmBandwidthTest::WaitForCopyCompletion(vector<hsa_signal_t>& signal_list)
uint32_t
size
=
signal_list
.
size
();
uint32_t
size
=
signal_list
.
size
();
for
(
uint32_t
idx
=
0
;
idx
<
size
;
idx
++
)
{
for
(
uint32_t
idx
=
0
;
idx
<
size
;
idx
++
)
{
hsa_signal_t
signal
=
signal_list
[
idx
];
hsa_signal_t
signal
=
signal_list
[
idx
];
// Wait for copy operation to complete
while
(
hsa_signal_wait_acquire
(
signal
,
HSA_SIGNAL_CONDITION_LT
,
while
(
hsa_signal_wait_acquire
(
signal
,
HSA_SIGNAL_CONDITION_LT
,
1
,
uint64_t
(
-
1
),
policy
));
1
,
uint64_t
(
-
1
),
policy
));
}
}
...
@@ -252,6 +333,128 @@ void RocmBandwidthTest::copy_buffer(void* dst, hsa_agent_t dst_agent,
...
@@ -252,6 +333,128 @@ void RocmBandwidthTest::copy_buffer(void* dst, hsa_agent_t dst_agent,
uint64_t
(
-
1
),
HSA_WAIT_STATE_ACTIVE
));
uint64_t
(
-
1
),
HSA_WAIT_STATE_ACTIVE
));
}
}
void
RocmBandwidthTest
::
RunConcurrentCopyBenchmark
(
bool
bidir
,
vector
<
async_trans_t
>&
trans_list
)
{
// Number of Unidirectional or Bidirectional
// Concurrent Copy transactions in user request
uint32_t
trans_cnt
=
trans_list
.
size
();
size_t
max_size
=
size_list_
.
back
();
uint32_t
size_len
=
size_list_
.
size
();
// Lists of buffers, pools, agents and signals
// used to run copy requests
vector
<
void
*>
buf_list
;
vector
<
hsa_agent_t
>
dev_list
;
vector
<
uint32_t
>
dev_idx_list
;
vector
<
hsa_signal_t
>
sig_list
;
vector
<
hsa_amd_memory_pool_t
>
pool_list
;
// Allocate resources for the various transactions
AllocateConcurrentCopyResources
(
bidir
,
trans_list
,
buf_list
,
dev_list
,
dev_idx_list
,
sig_list
,
pool_list
);
// Common variables used in different loops
void
*
buf_src
;
void
*
buf_dst
;
hsa_agent_t
src_dev
;
hsa_agent_t
dst_dev
;
hsa_signal_t
signal
;
// Signa to trigger all copy requests to wait
// until allowed to begin
hsa_signal_t
sig_grp_start
;
err_
=
hsa_signal_create
(
1
,
0
,
NULL
,
&
sig_grp_start
);
ErrorCheck
(
err_
);
// Bind the number of iterations
uint32_t
iterations
=
GetIterationNum
();
// Iterate through the differnt buffer sizes to
// compute the bandwidth as determined by copy
for
(
uint32_t
idx
=
0
;
idx
<
size_len
;
idx
++
)
{
// This should not be happening
size_t
curr_size
=
size_list_
[
idx
];
if
(
curr_size
>
max_size
)
{
break
;
}
std
::
vector
<
std
::
vector
<
double
>
>
gpu_time_list
(
trans_cnt
,
std
::
vector
<
double
>
());
for
(
uint32_t
it
=
0
;
it
<
iterations
;
it
++
)
{
if
(
it
%
2
)
{
printf
(
"."
);
fflush
(
stdout
);
}
// Set group trigger signal
hsa_signal_store_relaxed
(
sig_grp_start
,
1
);
// Update signal value to one before submitting copy requests
uint32_t
sig_idx
=
0
;
uint32_t
sig_cnt
=
sig_list
.
size
();
for
(
sig_idx
=
0
;
sig_idx
<
sig_cnt
;
sig_idx
++
)
{
signal
=
sig_list
[
sig_idx
];
hsa_signal_store_relaxed
(
signal
,
1
);
}
// Submit copy operations in batch mode
uint32_t
rsrc_idx
=
0
;
uint32_t
cpy_cnt
=
(
bidir
)
?
(
trans_cnt
*
2
)
:
trans_cnt
;
for
(
uint32_t
cpy_idx
=
0
;
cpy_idx
<
cpy_cnt
;
cpy_idx
++
)
{
sig_idx
=
cpy_idx
;
rsrc_idx
=
cpy_idx
*
2
;
signal
=
sig_list
[
sig_idx
+
0
];
buf_src
=
buf_list
[
rsrc_idx
+
0
];
buf_dst
=
buf_list
[
rsrc_idx
+
1
];
src_dev
=
dev_list
[
rsrc_idx
+
0
];
dst_dev
=
dev_list
[
rsrc_idx
+
1
];
err_
=
hsa_amd_memory_async_copy
(
buf_dst
,
dst_dev
,
buf_src
,
src_dev
,
curr_size
,
1
,
&
sig_grp_start
,
signal
);
ErrorCheck
(
err_
);
}
// Set group trigger signal
hsa_signal_store_relaxed
(
sig_grp_start
,
0
);
// Wait for the copy operations to complete
WaitForCopyCompletion
(
sig_list
);
// Retrieve times for each copy operation
hsa_signal_t
signal_rev
;
for
(
uint32_t
tidx
=
0
;
tidx
<
trans_cnt
;
tidx
++
)
{
sig_idx
=
(
bidir
)
?
(
tidx
*
2
)
:
(
tidx
);
signal
=
sig_list
[
sig_idx
+
0
];
signal_rev
=
(
bidir
)
?
(
sig_list
[
sig_idx
+
1
])
:
signal
;
double
temp
=
GetGpuCopyTime
(
bidir
,
signal
,
signal_rev
);
std
::
vector
<
double
>&
gpu_time
=
gpu_time_list
[
tidx
];
gpu_time
.
push_back
(
temp
);
}
}
// Update time taken to copy a particular size
// Get Gpu min and mean copy times
for
(
uint32_t
tidx
=
0
;
tidx
<
trans_cnt
;
tidx
++
)
{
async_trans_t
&
trans
=
trans_list
[
tidx
];
std
::
vector
<
double
>&
gpu_time
=
gpu_time_list
[
tidx
];
double
min_time
=
GetMinTime
(
gpu_time
);
double
mean_time
=
GetMeanTime
(
gpu_time
);
trans
.
gpu_min_time_
.
push_back
(
min_time
);
trans
.
gpu_avg_time_
.
push_back
(
mean_time
);
gpu_time
.
clear
();
}
}
// Free up buffers and signal objects used in copy operation
sig_list
.
push_back
(
sig_grp_start
);
ReleaseSignals
(
sig_list
);
ReleaseBuffers
(
buf_list
);
}
void
RocmBandwidthTest
::
RunCopyBenchmark
(
async_trans_t
&
trans
)
{
void
RocmBandwidthTest
::
RunCopyBenchmark
(
async_trans_t
&
trans
)
{
// Bind if this transaction is bidirectional
// Bind if this transaction is bidirectional
...
@@ -458,6 +661,16 @@ void RocmBandwidthTest::Run() {
...
@@ -458,6 +661,16 @@ void RocmBandwidthTest::Run() {
ErrorCheck
(
err_
);
ErrorCheck
(
err_
);
}
}
if
((
req_concurrent_copy_bidir_
==
REQ_CONCURRENT_COPY_BIDIR
)
||
(
req_concurrent_copy_unidir_
==
REQ_CONCURRENT_COPY_UNIDIR
))
{
bool
bidir
=
(
req_concurrent_copy_bidir_
==
REQ_CONCURRENT_COPY_BIDIR
);
RunConcurrentCopyBenchmark
(
bidir
,
trans_list_
);
ComputeCopyTime
(
trans_list_
);
err_
=
hsa_amd_profiling_async_copy_enable
(
false
);
ErrorCheck
(
err_
);
return
;
}
// Iterate through the list of transactions and execute them
// Iterate through the list of transactions and execute them
uint32_t
trans_size
=
trans_list_
.
size
();
uint32_t
trans_size
=
trans_list_
.
size
();
for
(
uint32_t
idx
=
0
;
idx
<
trans_size
;
idx
++
)
{
for
(
uint32_t
idx
=
0
;
idx
<
trans_size
;
idx
++
)
{
...
@@ -474,7 +687,6 @@ void RocmBandwidthTest::Run() {
...
@@ -474,7 +687,6 @@ void RocmBandwidthTest::Run() {
RunIOBenchmark
(
trans
);
RunIOBenchmark
(
trans
);
}
}
}
}
std
::
cout
<<
std
::
endl
;
// Disable profiling of Async Copy Activity
// Disable profiling of Async Copy Activity
if
(
print_cpu_time_
==
false
)
{
if
(
print_cpu_time_
==
false
)
{
...
@@ -535,6 +747,8 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
...
@@ -535,6 +747,8 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
req_copy_unidir_
=
REQ_INVALID
;
req_copy_unidir_
=
REQ_INVALID
;
req_copy_all_bidir_
=
REQ_INVALID
;
req_copy_all_bidir_
=
REQ_INVALID
;
req_copy_all_unidir_
=
REQ_INVALID
;
req_copy_all_unidir_
=
REQ_INVALID
;
req_concurrent_copy_bidir_
=
REQ_INVALID
;
req_concurrent_copy_unidir_
=
REQ_INVALID
;
access_matrix_
=
NULL
;
access_matrix_
=
NULL
;
link_type_matrix_
=
NULL
;
link_type_matrix_
=
NULL
;
...
...
rocm_bandwidth_test.hpp
View file @
b389fac3
...
@@ -172,7 +172,9 @@ typedef enum Request_Type {
...
@@ -172,7 +172,9 @@ typedef enum Request_Type {
REQ_COPY_UNIDIR
=
6
,
REQ_COPY_UNIDIR
=
6
,
REQ_COPY_ALL_BIDIR
=
7
,
REQ_COPY_ALL_BIDIR
=
7
,
REQ_COPY_ALL_UNIDIR
=
8
,
REQ_COPY_ALL_UNIDIR
=
8
,
REQ_INVALID
=
9
,
REQ_CONCURRENT_COPY_BIDIR
=
9
,
REQ_CONCURRENT_COPY_UNIDIR
=
10
,
REQ_INVALID
=
11
,
}
Request_Type
;
}
Request_Type
;
...
@@ -247,14 +249,18 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -247,14 +249,18 @@ class RocmBandwidthTest : public BaseTest {
// @brief: Run copy requests of users
// @brief: Run copy requests of users
void
RunCopyBenchmark
(
async_trans_t
&
trans
);
void
RunCopyBenchmark
(
async_trans_t
&
trans
);
// @brief: Run copy requests of users
void
RunConcurrentCopyBenchmark
(
bool
bidir
,
vector
<
async_trans_t
>&
trans_list
);
// @brief: Get iteration number
// @brief: Get iteration number
uint32_t
GetIterationNum
();
uint32_t
GetIterationNum
();
// @brief: Get the mean copy time
// @brief: Get the mean copy time
double
GetMeanTime
(
std
::
vector
<
double
>&
vec
);
double
GetMeanTime
(
vector
<
double
>&
vec
);
// @brief: Get the min copy time
// @brief: Get the min copy time
double
GetMinTime
(
std
::
vector
<
double
>&
vec
);
double
GetMinTime
(
vector
<
double
>&
vec
);
// @brief: Dispaly Benchmark result
// @brief: Dispaly Benchmark result
void
PopulatePerfMatrix
(
bool
peak
,
double
*
perf_matrix
)
const
;
void
PopulatePerfMatrix
(
bool
peak
,
double
*
perf_matrix
)
const
;
...
@@ -280,6 +286,7 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -280,6 +286,7 @@ class RocmBandwidthTest : public BaseTest {
bool
ValidateBidirCopyReq
();
bool
ValidateBidirCopyReq
();
bool
ValidateUnidirCopyReq
();
bool
ValidateUnidirCopyReq
();
bool
ValidateConcurrentCopyReq
();
bool
ValidateCopyReq
(
vector
<
size_t
>&
in_list
);
bool
ValidateCopyReq
(
vector
<
size_t
>&
in_list
);
void
PrintIOAccessError
(
uint32_t
agent_idx
,
uint32_t
pool_idx
);
void
PrintIOAccessError
(
uint32_t
agent_idx
,
uint32_t
pool_idx
);
void
PrintCopyAccessError
(
uint32_t
src_pool_idx
,
uint32_t
dst_pool_idx
);
void
PrintCopyAccessError
(
uint32_t
src_pool_idx
,
uint32_t
dst_pool_idx
);
...
@@ -289,6 +296,7 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -289,6 +296,7 @@ class RocmBandwidthTest : public BaseTest {
// @brief: Builds a list of transaction per user request
// @brief: Builds a list of transaction per user request
void
ComputeCopyTime
(
async_trans_t
&
trans
);
void
ComputeCopyTime
(
async_trans_t
&
trans
);
void
ComputeCopyTime
(
vector
<
async_trans_t
>&
trans_list
);
void
BuildDeviceList
();
void
BuildDeviceList
();
void
BuildBufferList
();
void
BuildBufferList
();
bool
BuildTransList
();
bool
BuildTransList
();
...
@@ -303,6 +311,8 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -303,6 +311,8 @@ class RocmBandwidthTest : public BaseTest {
bool
BuildCopyTrans
(
uint32_t
req_type
,
bool
BuildCopyTrans
(
uint32_t
req_type
,
vector
<
size_t
>&
src_list
,
vector
<
size_t
>&
src_list
,
vector
<
size_t
>&
dst_list
);
vector
<
size_t
>&
dst_list
);
bool
BuildConcurrentCopyTrans
(
uint32_t
req_type
,
vector
<
size_t
>&
dev_list
);
void
WaitForCopyCompletion
(
vector
<
hsa_signal_t
>&
signal_list
);
void
WaitForCopyCompletion
(
vector
<
hsa_signal_t
>&
signal_list
);
...
@@ -310,8 +320,16 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -310,8 +320,16 @@ class RocmBandwidthTest : public BaseTest {
void
*&
src
,
hsa_amd_memory_pool_t
src_pool
,
void
*&
src
,
hsa_amd_memory_pool_t
src_pool
,
void
*&
dst
,
hsa_amd_memory_pool_t
dst_pool
);
void
*&
dst
,
hsa_amd_memory_pool_t
dst_pool
);
void
ReleaseBuffers
(
std
::
vector
<
void
*>&
buffer_list
);
void
AllocateConcurrentCopyResources
(
bool
bidir
,
void
ReleaseSignals
(
std
::
vector
<
hsa_signal_t
>&
signal_list
);
vector
<
async_trans_t
>&
trans_list
,
vector
<
void
*>&
buffer_list
,
vector
<
hsa_agent_t
>&
dev_list
,
vector
<
uint32_t
>&
dev_idx_list
,
vector
<
hsa_signal_t
>&
sig_list
,
vector
<
hsa_amd_memory_pool_t
>&
pool_list
);
void
ReleaseBuffers
(
vector
<
void
*>&
buffer_list
);
void
ReleaseSignals
(
vector
<
hsa_signal_t
>&
signal_list
);
double
GetGpuCopyTime
(
bool
bidir
,
hsa_signal_t
signal_fwd
,
hsa_signal_t
signal_rev
);
double
GetGpuCopyTime
(
bool
bidir
,
hsa_signal_t
signal_fwd
,
hsa_signal_t
signal_rev
);
...
@@ -329,7 +347,7 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -329,7 +347,7 @@ class RocmBandwidthTest : public BaseTest {
bool
fine_grained
);
bool
fine_grained
);
// Find the mirror transaction if present
// Find the mirror transaction if present
bool
FindMirrorRequest
(
uint32_t
src_idx
,
uint32_t
dst_idx
);
bool
FindMirrorRequest
(
bool
reverse
,
uint32_t
src_idx
,
uint32_t
dst_idx
);
// @brief: Check if agent and access memory pool, if so, set
// @brief: Check if agent and access memory pool, if so, set
// access to the agent, if not, exit
// access to the agent, if not, exit
...
@@ -378,9 +396,6 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -378,9 +396,6 @@ class RocmBandwidthTest : public BaseTest {
void
PrintVersion
()
const
;
void
PrintVersion
()
const
;
std
::
string
GetVersion
()
const
;
std
::
string
GetVersion
()
const
;
// More variables declared for testing
// vector<transaction> tran_;
// Used to help count agent_info
// Used to help count agent_info
uint32_t
agent_index_
;
uint32_t
agent_index_
;
...
@@ -434,6 +449,8 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -434,6 +449,8 @@ class RocmBandwidthTest : public BaseTest {
uint32_t
req_copy_unidir_
;
uint32_t
req_copy_unidir_
;
uint32_t
req_copy_all_bidir_
;
uint32_t
req_copy_all_bidir_
;
uint32_t
req_copy_all_unidir_
;
uint32_t
req_copy_all_unidir_
;
uint32_t
req_concurrent_copy_bidir_
;
uint32_t
req_concurrent_copy_unidir_
;
static
const
uint32_t
USR_SRC_FLAG
=
0x01
;
static
const
uint32_t
USR_SRC_FLAG
=
0x01
;
static
const
uint32_t
USR_DST_FLAG
=
0x02
;
static
const
uint32_t
USR_DST_FLAG
=
0x02
;
...
...
rocm_bandwidth_test_parse.cpp
View file @
b389fac3
...
@@ -216,6 +216,22 @@ void RocmBandwidthTest::ValidateInputFlags(uint32_t pf_cnt,
...
@@ -216,6 +216,22 @@ void RocmBandwidthTest::ValidateInputFlags(uint32_t pf_cnt,
return
ValidateCopyAllUnidirFlags
(
copy_ctrl_mask
);
return
ValidateCopyAllUnidirFlags
(
copy_ctrl_mask
);
}
}
// Input is requesting to run concurrent copies
// rocm_bandwidth_test -k or -K
// It is illegal to specify secondary flags
if
((
req_concurrent_copy_bidir_
==
REQ_CONCURRENT_COPY_BIDIR
)
||
(
req_concurrent_copy_unidir_
==
REQ_CONCURRENT_COPY_UNIDIR
))
{
if
((
copy_ctrl_mask
&
DEV_COPY_LATENCY
)
||
(
copy_ctrl_mask
&
USR_BUFFER_INIT
)
||
(
copy_ctrl_mask
&
USR_BUFFER_SIZE
)
||
(
copy_ctrl_mask
&
CPU_VISIBLE_TIME
)
||
(
copy_ctrl_mask
&
VALIDATE_COPY_OP
))
{
PrintHelpScreen
();
exit
(
0
);
}
return
;
}
std
::
cout
<<
"ValidateInputFlags: This should not be happening"
<<
std
::
endl
;
std
::
cout
<<
"ValidateInputFlags: This should not be happening"
<<
std
::
endl
;
assert
(
false
);
assert
(
false
);
return
;
return
;
...
@@ -274,6 +290,11 @@ void RocmBandwidthTest::BuildBufferList() {
...
@@ -274,6 +290,11 @@ void RocmBandwidthTest::BuildBufferList() {
if
(
req_copy_bidir_
==
REQ_COPY_BIDIR
)
{
if
(
req_copy_bidir_
==
REQ_COPY_BIDIR
)
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
if
((
req_concurrent_copy_bidir_
==
REQ_CONCURRENT_COPY_BIDIR
)
||
(
req_concurrent_copy_unidir_
==
REQ_CONCURRENT_COPY_UNIDIR
))
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
}
}
}
...
@@ -291,7 +312,7 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -291,7 +312,7 @@ void RocmBandwidthTest::ParseArguments() {
int
opt
;
int
opt
;
bool
status
;
bool
status
;
while
((
opt
=
getopt
(
usr_argc_
,
usr_argv_
,
"hqtclvaAb:i:s:d:r:w:m:"
))
!=
-
1
)
{
while
((
opt
=
getopt
(
usr_argc_
,
usr_argv_
,
"hqtclvaAb:i:s:d:r:w:m:
k:K:
"
))
!=
-
1
)
{
switch
(
opt
)
{
switch
(
opt
)
{
// Print help screen
// Print help screen
...
@@ -358,6 +379,22 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -358,6 +379,22 @@ void RocmBandwidthTest::ParseArguments() {
print_help
=
true
;
print_help
=
true
;
break
;
break
;
// Collect list of agents involved in concurrent copy operation
case
'k'
:
case
'K'
:
status
=
ParseOptionValue
(
optarg
,
bidir_list_
);
if
((
status
)
&&
((
bidir_list_
.
size
()
%
2
)
==
0
))
{
num_primary_flags
++
;
if
(
opt
==
'K'
)
{
req_concurrent_copy_bidir_
=
REQ_CONCURRENT_COPY_BIDIR
;
}
else
{
req_concurrent_copy_unidir_
=
REQ_CONCURRENT_COPY_UNIDIR
;
}
break
;
}
print_help
=
true
;
break
;
// Size of buffers to use in copy and read/write operations
// Size of buffers to use in copy and read/write operations
case
'm'
:
case
'm'
:
status
=
ParseOptionValue
(
optarg
,
size_list_
);
status
=
ParseOptionValue
(
optarg
,
size_list_
);
...
@@ -420,7 +457,7 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -420,7 +457,7 @@ void RocmBandwidthTest::ParseArguments() {
std
::
cout
<<
"Argument is illegal or needs value: "
<<
'?'
<<
std
::
endl
;
std
::
cout
<<
"Argument is illegal or needs value: "
<<
'?'
<<
std
::
endl
;
if
((
optopt
==
'b'
)
||
(
optopt
==
's'
)
||
if
((
optopt
==
'b'
)
||
(
optopt
==
's'
)
||
(
optopt
==
'd'
)
||
(
optopt
==
'm'
)
||
(
optopt
==
'i'
))
{
(
optopt
==
'd'
)
||
(
optopt
==
'm'
)
||
(
optopt
==
'i'
))
{
std
::
cout
<<
"Error: Options -b -s -d
and
-m -i require argument"
<<
std
::
endl
;
std
::
cout
<<
"Error: Options -b -s -d -m -i
-k and -K
require argument"
<<
std
::
endl
;
}
}
print_help
=
true
;
print_help
=
true
;
break
;
break
;
...
...
rocm_bandwidth_test_print.cpp
View file @
b389fac3
...
@@ -309,6 +309,11 @@ void RocmBandwidthTest::PrintTransList() {
...
@@ -309,6 +309,11 @@ void RocmBandwidthTest::PrintTransList() {
std
::
cout
<<
" Src Memory Pool used in Copy: "
<<
trans
.
copy
.
src_idx_
<<
std
::
endl
;
std
::
cout
<<
" Src Memory Pool used in Copy: "
<<
trans
.
copy
.
src_idx_
<<
std
::
endl
;
std
::
cout
<<
" Dst Memory Pool used in Copy: "
<<
trans
.
copy
.
dst_idx_
<<
std
::
endl
;
std
::
cout
<<
" Dst Memory Pool used in Copy: "
<<
trans
.
copy
.
dst_idx_
<<
std
::
endl
;
}
}
if
((
trans
.
req_type_
==
REQ_CONCURRENT_COPY_BIDIR
)
||
(
trans
.
req_type_
==
REQ_CONCURRENT_COPY_UNIDIR
))
{
std
::
cout
<<
" Src Memory Pool used in Copy: "
<<
trans
.
copy
.
src_idx_
<<
std
::
endl
;
std
::
cout
<<
" Dst Memory Pool used in Copy: "
<<
trans
.
copy
.
dst_idx_
<<
std
::
endl
;
}
}
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
...
...
rocm_bandwidth_test_report.cpp
View file @
b389fac3
...
@@ -178,14 +178,18 @@ void RocmBandwidthTest::Display() const {
...
@@ -178,14 +178,18 @@ void RocmBandwidthTest::Display() const {
}
}
if
((
req_copy_bidir_
==
REQ_COPY_BIDIR
)
||
if
((
req_copy_bidir_
==
REQ_COPY_BIDIR
)
||
(
req_copy_unidir_
==
REQ_COPY_UNIDIR
))
{
(
req_copy_unidir_
==
REQ_COPY_UNIDIR
)
||
(
req_concurrent_copy_bidir_
==
REQ_CONCURRENT_COPY_BIDIR
)
||
(
req_concurrent_copy_unidir_
==
REQ_CONCURRENT_COPY_UNIDIR
))
{
PrintVersion
();
PrintVersion
();
}
}
for
(
uint32_t
idx
=
0
;
idx
<
trans_size
;
idx
++
)
{
for
(
uint32_t
idx
=
0
;
idx
<
trans_size
;
idx
++
)
{
async_trans_t
trans
=
trans_list_
[
idx
];
async_trans_t
trans
=
trans_list_
[
idx
];
if
((
trans
.
req_type_
==
REQ_COPY_BIDIR
)
||
if
((
trans
.
req_type_
==
REQ_COPY_BIDIR
)
||
(
trans
.
req_type_
==
REQ_COPY_UNIDIR
))
{
(
trans
.
req_type_
==
REQ_COPY_UNIDIR
)
||
(
trans
.
req_type_
==
REQ_CONCURRENT_COPY_BIDIR
)
||
(
trans
.
req_type_
==
REQ_CONCURRENT_COPY_UNIDIR
))
{
DisplayCopyTime
(
trans
);
DisplayCopyTime
(
trans
);
}
}
if
((
trans
.
req_type_
==
REQ_READ
)
||
if
((
trans
.
req_type_
==
REQ_READ
)
||
...
...
rocm_bandwidth_test_trans.cpp
View file @
b389fac3
...
@@ -43,15 +43,23 @@
...
@@ -43,15 +43,23 @@
#include "common.hpp"
#include "common.hpp"
#include "rocm_bandwidth_test.hpp"
#include "rocm_bandwidth_test.hpp"
bool
RocmBandwidthTest
::
FindMirrorRequest
(
uint32_t
src_idx
,
uint32_t
dst_idx
)
{
bool
RocmBandwidthTest
::
FindMirrorRequest
(
bool
reverse
,
uint32_t
src_idx
,
uint32_t
dst_idx
)
{
uint32_t
size
=
trans_list_
.
size
();
uint32_t
size
=
trans_list_
.
size
();
for
(
uint32_t
idx
=
0
;
idx
<
size
;
idx
++
)
{
for
(
uint32_t
idx
=
0
;
idx
<
size
;
idx
++
)
{
async_trans_t
&
mirror
=
trans_list_
[
idx
];
async_trans_t
&
mirror
=
trans_list_
[
idx
];
if
(
reverse
)
{
if
((
src_idx
==
mirror
.
copy
.
dst_idx_
)
&&
if
((
src_idx
==
mirror
.
copy
.
dst_idx_
)
&&
(
dst_idx
==
mirror
.
copy
.
src_idx_
))
{
(
dst_idx
==
mirror
.
copy
.
src_idx_
))
{
return
true
;
return
true
;
}
}
}
else
{
if
((
src_idx
==
mirror
.
copy
.
src_idx_
)
&&
(
dst_idx
==
mirror
.
copy
.
dst_idx_
))
{
return
true
;
}
}
}
}
return
false
;
return
false
;
...
@@ -135,7 +143,6 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
...
@@ -135,7 +143,6 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
vector
<
size_t
>&
src_list
,
vector
<
size_t
>&
src_list
,
vector
<
size_t
>&
dst_list
)
{
vector
<
size_t
>&
dst_list
)
{
// bool filter_out;
uint32_t
src_size
=
src_list
.
size
();
uint32_t
src_size
=
src_list
.
size
();
uint32_t
dst_size
=
dst_list
.
size
();
uint32_t
dst_size
=
dst_list
.
size
();
...
@@ -172,7 +179,7 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
...
@@ -172,7 +179,7 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
continue
;
continue
;
}
}
bool
mirror
=
FindMirrorRequest
(
src_idx
,
dst_idx
);
bool
mirror
=
FindMirrorRequest
(
true
,
src_idx
,
dst_idx
);
if
(
mirror
)
{
if
(
mirror
)
{
continue
;
continue
;
}
}
...
@@ -226,6 +233,94 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
...
@@ -226,6 +233,94 @@ bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
return
true
;
return
true
;
}
}
bool
RocmBandwidthTest
::
BuildConcurrentCopyTrans
(
uint32_t
req_type
,
vector
<
size_t
>&
dev_list
)
{
uint32_t
size
=
dev_list
.
size
();
for
(
uint32_t
idx
=
0
;
idx
<
size
;
idx
+=
2
)
{
// Retrieve Roc runtime handles for Src memory pool and agents
uint32_t
src_idx
=
dev_list
[
idx
];
uint32_t
src_dev_idx
=
pool_list_
[
src_idx
].
agent_index_
;
hsa_amd_memory_pool_t
src_pool
=
pool_list_
[
src_idx
].
pool_
;
hsa_device_type_t
src_dev_type
=
agent_list_
[
src_dev_idx
].
device_type_
;
// Retrieve Roc runtime handles for Dst memory pool and agents
uint32_t
dst_idx
=
dev_list
[
idx
+
1
];
uint32_t
dst_dev_idx
=
pool_list_
[
dst_idx
].
agent_index_
;
hsa_amd_memory_pool_t
dst_pool
=
pool_list_
[
dst_idx
].
pool_
;
hsa_device_type_t
dst_dev_type
=
agent_list_
[
dst_dev_idx
].
device_type_
;
// Filter out transactions that involve only Cpu agents/devices
// without regard to type of request, default run, partial or full
// unidirectional or bidirectional copies
if
((
src_dev_type
==
HSA_DEVICE_TYPE_CPU
)
&&
(
dst_dev_type
==
HSA_DEVICE_TYPE_CPU
))
{
continue
;
}
// Determine there is no duplicate
bool
mirror
=
false
;
mirror
=
FindMirrorRequest
(
false
,
src_idx
,
dst_idx
);
if
(
mirror
)
{
continue
;
}
// Filter out transactions that involve only same GPU as both
// Src and Dst device if the request is bidirectional copy that
// is either partial or full
if
(
req_type
==
REQ_CONCURRENT_COPY_BIDIR
)
{
if
(
src_dev_idx
==
dst_dev_idx
)
{
continue
;
}
mirror
=
FindMirrorRequest
(
true
,
src_idx
,
dst_idx
);
if
(
mirror
)
{
continue
;
}
}
// Determine if accessibility to dst pool for src agent is not denied
uint32_t
path_exists
=
access_matrix_
[(
src_dev_idx
*
agent_index_
)
+
dst_dev_idx
];
if
(
path_exists
==
0
)
{
PrintCopyAccessError
(
src_idx
,
dst_idx
);
return
false
;
}
// For bidirectional copies determine both access paths are valid
// Both paths are valid when one of the devices is a CPU. This is
// not true when both of the devices are GPU's.
if
(
req_type
==
REQ_CONCURRENT_COPY_BIDIR
)
{
path_exists
=
access_matrix_
[(
dst_dev_idx
*
agent_index_
)
+
src_dev_idx
];
if
(
path_exists
==
0
)
{
PrintCopyAccessError
(
dst_idx
,
src_idx
);
return
false
;
}
}
// Update the list of agents active in any copy operation
if
(
active_agents_list_
==
NULL
)
{
active_agents_list_
=
new
uint32_t
[
agent_index_
]();
}
active_agents_list_
[
src_dev_idx
]
=
1
;
active_agents_list_
[
dst_dev_idx
]
=
1
;
// Agents have access, build an instance of transaction
// and add it to the list of transactions
async_trans_t
trans
(
req_type
);
trans
.
copy
.
src_idx_
=
src_idx
;
trans
.
copy
.
dst_idx_
=
dst_idx
;
trans
.
copy
.
src_pool_
=
src_pool
;
trans
.
copy
.
dst_pool_
=
dst_pool
;
trans
.
copy
.
bidir_
=
(
req_type
==
REQ_CONCURRENT_COPY_BIDIR
);
trans
.
copy
.
uses_gpu_
=
((
src_dev_type
==
HSA_DEVICE_TYPE_GPU
)
||
(
dst_dev_type
==
HSA_DEVICE_TYPE_GPU
));
trans_list_
.
push_back
(
trans
);
}
return
true
;
}
bool
RocmBandwidthTest
::
BuildBidirCopyTrans
()
{
bool
RocmBandwidthTest
::
BuildBidirCopyTrans
()
{
return
BuildCopyTrans
(
REQ_COPY_BIDIR
,
bidir_list_
,
bidir_list_
);
return
BuildCopyTrans
(
REQ_COPY_BIDIR
,
bidir_list_
,
bidir_list_
);
}
}
...
@@ -246,63 +341,58 @@ bool RocmBandwidthTest::BuildAllPoolsUnidirCopyTrans() {
...
@@ -246,63 +341,58 @@ bool RocmBandwidthTest::BuildAllPoolsUnidirCopyTrans() {
bool
RocmBandwidthTest
::
BuildTransList
()
{
bool
RocmBandwidthTest
::
BuildTransList
()
{
// Build list of Read transactions per user request
// Build list of Read transactions per user request
bool
status
=
false
;
if
(
req_read_
==
REQ_READ
)
{
if
(
req_read_
==
REQ_READ
)
{
status
=
BuildReadTrans
();
return
BuildReadTrans
();
if
(
status
==
false
)
{
return
status
;
}
}
}
// Build list of Write transactions per user request
// Build list of Write transactions per user request
status
=
false
;
if
(
req_write_
==
REQ_WRITE
)
{
if
(
req_write_
==
REQ_WRITE
)
{
status
=
BuildWriteTrans
();
return
BuildWriteTrans
();
if
(
status
==
false
)
{
return
status
;
}
}
}
// Build list of Bidirectional Copy transactions per user request
// Build list of Bidirectional Copy transactions per user request
status
=
false
;
if
(
req_copy_bidir_
==
REQ_COPY_BIDIR
)
{
if
(
req_copy_bidir_
==
REQ_COPY_BIDIR
)
{
status
=
BuildBidirCopyTrans
();
return
BuildBidirCopyTrans
();
if
(
status
==
false
)
{
return
status
;
}
}
}
// Build list of Unidirectional Copy transactions per user request
// Build list of Unidirectional Copy transactions per user request
status
=
false
;
if
(
req_copy_unidir_
==
REQ_COPY_UNIDIR
)
{
if
(
req_copy_unidir_
==
REQ_COPY_UNIDIR
)
{
status
=
BuildUnidirCopyTrans
();
return
BuildUnidirCopyTrans
();
if
(
status
==
false
)
{
return
status
;
}
}
}
// Build list of All Bidir Copy transactions per user request
// Build list of All Bidir Copy transactions per user request
status
=
false
;
if
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
{
if
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
{
status
=
BuildAllPoolsBidirCopyTrans
();
return
BuildAllPoolsBidirCopyTrans
();
if
(
status
==
false
)
{
return
status
;
}
}
}
// Build list of All Unidir Copy transactions per user request
// Build list of All Unidir Copy transactions per user request
status
=
false
;
if
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
)
{
if
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
)
{
status
=
BuildAllPoolsUnidirCopyTrans
();
return
BuildAllPoolsUnidirCopyTrans
();
if
(
status
==
false
)
{
return
status
;
}
}
// Build list of Bidir Concurrent Copy transactions per user request
if
(
req_concurrent_copy_bidir_
==
REQ_CONCURRENT_COPY_BIDIR
)
{
return
BuildConcurrentCopyTrans
(
req_concurrent_copy_bidir_
,
bidir_list_
);
}
// Build list of Unidir Concurrent Copy transactions per user request
if
(
req_concurrent_copy_unidir_
==
REQ_CONCURRENT_COPY_UNIDIR
)
{
return
BuildConcurrentCopyTrans
(
req_concurrent_copy_unidir_
,
bidir_list_
);
}
}
// All of the transaction are built up
// All of the transaction are built up
return
true
;
return
true
;
}
}
void
RocmBandwidthTest
::
ComputeCopyTime
(
std
::
vector
<
async_trans_t
>&
trans_list
)
{
uint32_t
trans_cnt
=
trans_list
.
size
();
for
(
uint32_t
idx
=
0
;
idx
<
trans_cnt
;
idx
++
)
{
async_trans_t
&
trans
=
trans_list
[
idx
];
ComputeCopyTime
(
trans
);
}
}
void
RocmBandwidthTest
::
ComputeCopyTime
(
async_trans_t
&
trans
)
{
void
RocmBandwidthTest
::
ComputeCopyTime
(
async_trans_t
&
trans
)
{
// Get the frequency of Gpu Timestamping
// Get the frequency of Gpu Timestamping
...
...
rocm_bandwidth_test_validate.cpp
View file @
b389fac3
...
@@ -147,52 +147,50 @@ bool RocmBandwidthTest::ValidateUnidirCopyReq() {
...
@@ -147,52 +147,50 @@ bool RocmBandwidthTest::ValidateUnidirCopyReq() {
return
((
ValidateCopyReq
(
src_list_
))
&&
(
ValidateCopyReq
(
dst_list_
)));
return
((
ValidateCopyReq
(
src_list_
))
&&
(
ValidateCopyReq
(
dst_list_
)));
}
}
bool
RocmBandwidthTest
::
ValidateConcurrentCopyReq
()
{
// Determine every pool is present in system
return
PoolIsPresent
(
bidir_list_
);
}
bool
RocmBandwidthTest
::
ValidateArguments
()
{
bool
RocmBandwidthTest
::
ValidateArguments
()
{
// Determine if user has requested a READ
// Determine if user has requested a READ
// operation and gave valid inputs
// operation and gave valid inputs
bool
status
=
false
;
if
(
req_read_
==
REQ_READ
)
{
if
(
req_read_
==
REQ_READ
)
{
status
=
ValidateReadReq
();
return
ValidateReadReq
();
if
(
status
==
false
)
{
return
status
;
}
}
}
// Determine if user has requested a WRITE
// Determine if user has requested a WRITE
// operation and gave valid inputs
// operation and gave valid inputs
status
=
false
;
if
(
req_write_
==
REQ_WRITE
)
{
if
(
req_write_
==
REQ_WRITE
)
{
status
=
ValidateWriteReq
();
return
ValidateWriteReq
();
if
(
status
==
false
)
{
return
status
;
}
}
}
// Determine if user has requested a Copy
// Determine if user has requested a Copy
// operation that is bidirectional and gave
// operation that is bidirectional and gave
// valid inputs. Same validation is applied
// valid inputs. Same validation is applied
// for all-to-all unidirectional copy operation
// for all-to-all unidirectional copy operation
status
=
false
;
if
((
req_copy_bidir_
==
REQ_COPY_BIDIR
)
||
if
((
req_copy_bidir_
==
REQ_COPY_BIDIR
)
||
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
))
{
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
))
{
status
=
ValidateBidirCopyReq
();
return
ValidateBidirCopyReq
();
if
(
status
==
false
)
{
return
status
;
}
}
}
// Determine if user has requested a Copy
// Determine if user has requested a Copy
// operation that is unidirectional and gave
// operation that is unidirectional and gave
// valid inputs. Same validation is applied
// valid inputs. Same validation is applied
// for all-to-all bidirectional copy operation
// for all-to-all bidirectional copy operation
status
=
false
;
if
((
req_copy_unidir_
==
REQ_COPY_UNIDIR
)
||
if
((
req_copy_unidir_
==
REQ_COPY_UNIDIR
)
||
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
))
{
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
))
{
status
=
ValidateUnidirCopyReq
();
return
ValidateUnidirCopyReq
();
if
(
status
==
false
)
{
return
status
;
}
}
// Determine if user has requested a Concurrent
// Copy operation that is unidirectional or bidirectional
// and gave valid inputs.
if
((
req_concurrent_copy_bidir_
==
REQ_CONCURRENT_COPY_BIDIR
)
||
(
req_concurrent_copy_unidir_
==
REQ_CONCURRENT_COPY_UNIDIR
))
{
return
ValidateConcurrentCopyReq
();
}
}
// All of the request are well formed
// All of the request are well formed
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment