Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
rocm_bandwidth_test
Commits
1e5e2e06
Commit
1e5e2e06
authored
May 08, 2019
by
Ramesh Errabolu
Browse files
Initialize buffers used to copy
parent
fca6eaa8
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
433 additions
and
378 deletions
+433
-378
rocm_bandwidth_test.cpp
rocm_bandwidth_test.cpp
+140
-138
rocm_bandwidth_test.hpp
rocm_bandwidth_test.hpp
+45
-23
rocm_bandwidth_test_parse.cpp
rocm_bandwidth_test_parse.cpp
+194
-126
rocm_bandwidth_test_print.cpp
rocm_bandwidth_test_print.cpp
+8
-7
rocm_bandwidth_test_report.cpp
rocm_bandwidth_test_report.cpp
+46
-84
No files found.
rocm_bandwidth_test.cpp
View file @
1e5e2e06
...
@@ -97,87 +97,93 @@ void RocmBandwidthTest::AcquirePoolAcceses(uint32_t src_dev_idx,
...
@@ -97,87 +97,93 @@ void RocmBandwidthTest::AcquirePoolAcceses(uint32_t src_dev_idx,
return
;
return
;
}
}
void
RocmBandwidthTest
::
AllocateHostBuffers
(
size_t
size
,
void
RocmBandwidthTest
::
InitializeSrcBuffer
(
size_t
size
,
void
*
buf_cpy
,
uint32_t
src_dev_idx
,
uint32_t
cpy_dev_idx
,
hsa_agent_t
cpy_agent
)
{
uint32_t
dst_dev_idx
,
void
*&
src
,
void
*&
dst
,
void
*
buf_src
,
void
*
buf_dst
,
hsa_agent_t
src_agent
,
hsa_agent_t
dst_agent
,
hsa_signal_t
&
signal
)
{
// Allocate host buffers and setup accessibility for copy operation
// Allocate host buffers and setup accessibility for copy operation
err_
=
hsa_amd_memory_pool_allocate
(
sys_pool_
,
size
,
0
,
(
void
**
)
&
src
);
if
(
init_src_
==
NULL
)
{
ErrorCheck
(
err_
);
err_
=
hsa_amd_memory_pool_allocate
(
sys_pool_
,
size
,
0
,
(
void
**
)
&
init_src_
);
ErrorCheck
(
err_
);
memset
(
init_src_
,
init_val_
,
size
);
err_
=
hsa_signal_create
(
0
,
0
,
NULL
,
&
init_signal_
);
ErrorCheck
(
err_
);
}
// Gain access to the pools
// If Copy device is a Gpu setup buffer access
AcquirePoolAcceses
(
cpu_index_
,
cpu_agent_
,
src
,
hsa_device_type_t
cpy_dev_type
=
agent_list_
[
cpy_dev_idx
].
device_type_
;
src_dev_idx
,
src_agent
,
buf_src
);
if
(
cpy_dev_type
==
HSA_DEVICE_TYPE_GPU
)
{
AcquireAccess
(
cpy_agent
,
init_src_
);
hsa_signal_store_relaxed
(
init_signal_
,
1
);
copy_buffer
(
buf_cpy
,
cpy_agent
,
init_src_
,
cpu_agent_
,
size
,
init_signal_
);
return
;
}
err_
=
hsa_amd_memory_pool_allocate
(
sys_pool_
,
size
,
0
,
(
void
**
)
&
dst
);
// Copy initialization buffer into copy buffer
ErrorCheck
(
err_
);
memcpy
(
buf_cpy
,
init_src_
,
size
);
return
;
}
bool
RocmBandwidthTest
::
ValidateDstBuffer
(
size_t
max_size
,
size_t
curr_size
,
void
*
buf_cpy
,
uint32_t
cpy_dev_idx
,
hsa_agent_t
cpy_agent
)
{
// Gain access to the pools
// Allocate host buffers and setup accessibility for copy operation
AcquirePoolAcceses
(
dst_dev_idx
,
dst_agent
,
buf_dst
,
if
(
validate_dst_
==
NULL
)
{
cpu_index_
,
cpu_agent_
,
dst
);
err_
=
hsa_amd_memory_pool_allocate
(
sys_pool_
,
max_size
,
0
,
(
void
**
)
&
validate_dst_
);
ErrorCheck
(
err_
);
}
// Initialize host buffers to a determinate value
// If Copy device is a Gpu setup buffer access
memset
(
src
,
0x23
,
size
);
memset
(
validate_dst_
,
~
init_val_
,
curr_size
);
memset
(
dst
,
0x00
,
size
);
hsa_device_type_t
cpy_dev_type
=
agent_list_
[
cpy_dev_idx
].
device_type_
;
if
(
cpy_dev_type
==
HSA_DEVICE_TYPE_GPU
)
{
// Create a signal to wait on copy operation
AcquireAccess
(
cpy_agent
,
validate_dst_
);
// @TODO: replace it with a signal pool call
hsa_signal_store_relaxed
(
init_signal_
,
1
);
err_
=
hsa_signal_create
(
1
,
0
,
NULL
,
&
signal
);
copy_buffer
(
validate_dst_
,
cpu_agent_
,
ErrorCheck
(
err_
);
buf_cpy
,
cpy_agent
,
curr_size
,
init_signal_
);
}
else
{
// Copying device is a CPU, copy dst buffer
// into validation buffer
memcpy
(
validate_dst_
,
buf_cpy
,
curr_size
);
}
return
;
// Copy initialization buffer into copy buffer
err_
=
(
hsa_status_t
)
memcmp
(
init_src_
,
validate_dst_
,
curr_size
);
if
(
err_
!=
HSA_STATUS_SUCCESS
)
{
exit_value_
=
err_
;
}
return
(
err_
==
HSA_STATUS_SUCCESS
);
}
}
void
RocmBandwidthTest
::
AllocateCopyBuffers
(
size_t
size
,
void
RocmBandwidthTest
::
AllocateCopyBuffers
(
size_t
size
,
uint32_t
src_dev_idx
,
uint32_t
dst_dev_idx
,
void
*&
src
,
hsa_amd_memory_pool_t
src_pool
,
void
*&
src
,
hsa_amd_memory_pool_t
src_pool
,
void
*&
dst
,
hsa_amd_memory_pool_t
dst_pool
,
void
*&
dst
,
hsa_amd_memory_pool_t
dst_pool
)
{
hsa_agent_t
src_agent
,
hsa_agent_t
dst_agent
,
hsa_signal_t
&
signal
)
{
// Allocate buffers in src and dst pools for forward copy
// Allocate buffers in src and dst pools for forward copy
err_
=
hsa_amd_memory_pool_allocate
(
src_pool
,
size
,
0
,
&
src
);
err_
=
hsa_amd_memory_pool_allocate
(
src_pool
,
size
,
0
,
&
src
);
ErrorCheck
(
err_
);
ErrorCheck
(
err_
);
err_
=
hsa_amd_memory_pool_allocate
(
dst_pool
,
size
,
0
,
&
dst
);
err_
=
hsa_amd_memory_pool_allocate
(
dst_pool
,
size
,
0
,
&
dst
);
ErrorCheck
(
err_
);
ErrorCheck
(
err_
);
}
// Create a signal to wait on copy operation
void
RocmBandwidthTest
::
ReleaseBuffers
(
std
::
vector
<
void
*>&
buffer_list
)
{
// @TODO: replace it with a signal pool call
err_
=
hsa_signal_create
(
1
,
0
,
NULL
,
&
signal
);
ErrorCheck
(
err_
);
return
AcquirePoolAcceses
(
src_dev_idx
,
src_agent
,
src
,
for
(
uint32_t
idx
=
0
;
idx
<
buffer_list
.
size
();
idx
++
)
{
dst_dev_idx
,
dst_agent
,
dst
);
void
*
buffer
=
buffer_list
[
idx
];
err_
=
hsa_amd_memory_pool_free
(
buffer
);
ErrorCheck
(
err_
);
}
}
}
void
RocmBandwidthTest
::
ReleaseBuffers
(
bool
bidir
,
void
RocmBandwidthTest
::
ReleaseSignals
(
std
::
vector
<
hsa_signal_t
>&
signal_list
)
{
void
*
src_fwd
,
void
*
src_rev
,
void
*
dst_fwd
,
void
*
dst_rev
,
hsa_signal_t
signal_fwd
,
hsa_signal_t
signal_rev
)
{
// Free the src and dst buffers used in forward copy
for
(
uint32_t
idx
=
0
;
idx
<
signal_list
.
size
();
idx
++
)
{
// including the signal used to wait
hsa_signal_t
signal
=
signal_list
[
idx
];
err_
=
hsa_amd_memory_pool_free
(
src_fwd
);
err_
=
hsa_signal_destroy
(
signal
);
ErrorCheck
(
err_
);
err_
=
hsa_amd_memory_pool_free
(
dst_fwd
);
ErrorCheck
(
err_
);
err_
=
hsa_signal_destroy
(
signal_fwd
);
ErrorCheck
(
err_
);
// Free the src and dst buffers used in reverse copy
// including the signal used to wait
if
(
bidir
)
{
err_
=
hsa_amd_memory_pool_free
(
src_rev
);
ErrorCheck
(
err_
);
err_
=
hsa_amd_memory_pool_free
(
dst_rev
);
ErrorCheck
(
err_
);
err_
=
hsa_signal_destroy
(
signal_rev
);
ErrorCheck
(
err_
);
ErrorCheck
(
err_
);
}
}
}
}
...
@@ -217,6 +223,20 @@ double RocmBandwidthTest::GetGpuCopyTime(bool bidir,
...
@@ -217,6 +223,20 @@ double RocmBandwidthTest::GetGpuCopyTime(bool bidir,
return
copy_time
;
return
copy_time
;
}
}
void
RocmBandwidthTest
::
WaitForCopyCompletion
(
vector
<
hsa_signal_t
>&
signal_list
)
{
hsa_wait_state_t
policy
=
(
bw_blocking_run_
==
NULL
)
?
HSA_WAIT_STATE_ACTIVE
:
HSA_WAIT_STATE_BLOCKED
;
uint32_t
size
=
signal_list
.
size
();
for
(
uint32_t
idx
=
0
;
idx
<
size
;
idx
++
)
{
hsa_signal_t
signal
=
signal_list
[
idx
];
// Wait for copy operation to complete
while
(
hsa_signal_wait_acquire
(
signal
,
HSA_SIGNAL_CONDITION_LT
,
1
,
uint64_t
(
-
1
),
policy
));
}
}
void
RocmBandwidthTest
::
copy_buffer
(
void
*
dst
,
hsa_agent_t
dst_agent
,
void
RocmBandwidthTest
::
copy_buffer
(
void
*
dst
,
hsa_agent_t
dst_agent
,
void
*
src
,
hsa_agent_t
src_agent
,
void
*
src
,
hsa_agent_t
src_agent
,
size_t
size
,
hsa_signal_t
signal
)
{
size_t
size
,
hsa_signal_t
signal
)
{
...
@@ -247,11 +267,8 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
...
@@ -247,11 +267,8 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
void
*
buf_dst_fwd
;
void
*
buf_dst_fwd
;
void
*
buf_src_rev
;
void
*
buf_src_rev
;
void
*
buf_dst_rev
;
void
*
buf_dst_rev
;
void
*
validation_dst
;
void
*
validation_src
;
hsa_signal_t
signal_fwd
;
hsa_signal_t
signal_fwd
;
hsa_signal_t
signal_rev
;
hsa_signal_t
signal_rev
;
hsa_signal_t
validation_signal
;
hsa_signal_t
signal_start_bidir
;
hsa_signal_t
signal_start_bidir
;
uint32_t
src_idx
=
trans
.
copy
.
src_idx_
;
uint32_t
src_idx
=
trans
.
copy
.
src_idx_
;
uint32_t
dst_idx
=
trans
.
copy
.
dst_idx_
;
uint32_t
dst_idx
=
trans
.
copy
.
dst_idx_
;
...
@@ -267,46 +284,64 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
...
@@ -267,46 +284,64 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
hsa_agent_t
dst_agent_fwd
=
pool_list_
[
dst_idx
].
owner_agent_
;
hsa_agent_t
dst_agent_fwd
=
pool_list_
[
dst_idx
].
owner_agent_
;
hsa_agent_t
src_agent_rev
=
dst_agent_fwd
;
hsa_agent_t
src_agent_rev
=
dst_agent_fwd
;
hsa_agent_t
dst_agent_rev
=
src_agent_fwd
;
hsa_agent_t
dst_agent_rev
=
src_agent_fwd
;
std
::
vector
<
void
*>
buffer_list
;
std
::
vector
<
hsa_signal_t
>
signal_list
;
// Allocate buffers and signal objects
// Allocate buffers for forward path of unidirectional
// or bidirectional copy
AllocateCopyBuffers
(
max_size
,
AllocateCopyBuffers
(
max_size
,
src_dev_idx_fwd
,
dst_dev_idx_fwd
,
buf_src_fwd
,
src_pool_fwd
,
buf_src_fwd
,
src_pool_fwd
,
buf_dst_fwd
,
dst_pool_fwd
,
buf_dst_fwd
,
dst_pool_fwd
);
src_agent_fwd
,
dst_agent_fwd
,
signal_fwd
);
// Create a signal to wait on copy operation
// @TODO: replace it with a signal pool call
err_
=
hsa_signal_create
(
1
,
0
,
NULL
,
&
signal_fwd
);
ErrorCheck
(
err_
);
// Collect resources to be released later
signal_list
.
push_back
(
signal_fwd
);
buffer_list
.
push_back
(
buf_src_fwd
);
buffer_list
.
push_back
(
buf_dst_fwd
);
// Allocate buffers for reverse path of bidirectional copy
if
(
bidir
)
{
if
(
bidir
)
{
AllocateCopyBuffers
(
max_size
,
AllocateCopyBuffers
(
max_size
,
src_dev_idx_rev
,
dst_dev_idx_rev
,
buf_src_rev
,
src_pool_rev
,
buf_src_rev
,
src_pool_rev
,
buf_dst_rev
,
dst_pool_rev
,
buf_dst_rev
,
dst_pool_rev
);
src_agent_rev
,
dst_agent_rev
,
signal_rev
);
// Create a signal to begin bidir copy operations
// Create a signal to begin bidir copy operations
// @TODO: replace it with a signal pool call
// @TODO: replace it with a signal pool call
err_
=
hsa_signal_create
(
1
,
0
,
NULL
,
&
signal_rev
);
ErrorCheck
(
err_
);
err_
=
hsa_signal_create
(
1
,
0
,
NULL
,
&
signal_start_bidir
);
err_
=
hsa_signal_create
(
1
,
0
,
NULL
,
&
signal_start_bidir
);
ErrorCheck
(
err_
);
ErrorCheck
(
err_
);
signal_list
.
push_back
(
signal_rev
);
signal_list
.
push_back
(
signal_start_bidir
);
buffer_list
.
push_back
(
buf_src_rev
);
buffer_list
.
push_back
(
buf_dst_rev
);
}
}
if
(
validate_
)
{
// Initialize source buffers with data that could be verified
AllocateHostBuffers
(
max_size
,
if
((
init_
)
||
(
validate_
))
{
src_dev_idx_fwd
,
InitializeSrcBuffer
(
max_size
,
buf_src_fwd
,
dst_dev_idx_fwd
,
src_dev_idx_fwd
,
src_agent_fwd
);
validation_src
,
validation_dst
,
buf_src_fwd
,
buf_dst_fwd
,
if
(
bidir
)
{
src_agent_fwd
,
dst_agent_fwd
,
InitializeSrcBuffer
(
max_size
,
buf_src_rev
,
validation_signal
);
src_dev_idx_rev
,
src_agent_rev
);
}
// Initialize source buffer with values from verification buffer
copy_buffer
(
buf_src_fwd
,
src_agent_fwd
,
validation_src
,
cpu_agent_
,
max_size
,
validation_signal
);
}
}
// Setup access to destination buffers for
// both unidirectional and bidirectional copies
AcquirePoolAcceses
(
src_dev_idx_fwd
,
src_agent_fwd
,
buf_src_fwd
,
dst_dev_idx_fwd
,
dst_agent_fwd
,
buf_dst_fwd
);
if
(
bidir
)
{
AcquirePoolAcceses
(
src_dev_idx_rev
,
src_agent_rev
,
buf_src_rev
,
dst_dev_idx_rev
,
dst_agent_rev
,
buf_dst_rev
);
}
// Bind the number of iterations
// Bind the number of iterations
uint32_t
iterations
=
GetIterationNum
();
uint32_t
iterations
=
GetIterationNum
();
...
@@ -367,31 +402,7 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
...
@@ -367,31 +402,7 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
hsa_signal_store_relaxed
(
signal_start_bidir
,
0
);
hsa_signal_store_relaxed
(
signal_start_bidir
,
0
);
}
}
if
(
bw_blocking_run_
==
NULL
)
{
WaitForCopyCompletion
(
signal_list
);
// Wait for the forward copy operation to complete
while
(
hsa_signal_wait_acquire
(
signal_fwd
,
HSA_SIGNAL_CONDITION_LT
,
1
,
uint64_t
(
-
1
),
HSA_WAIT_STATE_ACTIVE
));
// Wait for the reverse copy operation to complete
if
(
bidir
)
{
while
(
hsa_signal_wait_acquire
(
signal_rev
,
HSA_SIGNAL_CONDITION_LT
,
1
,
uint64_t
(
-
1
),
HSA_WAIT_STATE_ACTIVE
));
}
}
else
{
// Wait for the forward copy operation to complete
hsa_signal_wait_acquire
(
signal_fwd
,
HSA_SIGNAL_CONDITION_LT
,
1
,
uint64_t
(
-
1
),
HSA_WAIT_STATE_BLOCKED
);
// Wait for the reverse copy operation to complete
if
(
bidir
)
{
hsa_signal_wait_acquire
(
signal_rev
,
HSA_SIGNAL_CONDITION_LT
,
1
,
uint64_t
(
-
1
),
HSA_WAIT_STATE_BLOCKED
);
}
}
// Stop the timer object
// Stop the timer object
timer
.
StopTimer
(
index
);
timer
.
StopTimer
(
index
);
...
@@ -408,19 +419,8 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
...
@@ -408,19 +419,8 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
}
}
if
(
validate_
)
{
if
(
validate_
)
{
verify
=
ValidateDstBuffer
(
max_size
,
curr_size
,
buf_dst_fwd
,
// Init dst buffer with values from outbuffer of copy operation
dst_dev_idx_fwd
,
dst_agent_fwd
);
hsa_signal_store_relaxed
(
validation_signal
,
1
);
copy_buffer
(
validation_dst
,
cpu_agent_
,
buf_dst_fwd
,
dst_agent_fwd
,
curr_size
,
validation_signal
);
// Compare output equals input
err_
=
(
hsa_status_t
)
memcmp
(
validation_src
,
validation_dst
,
curr_size
);
if
(
err_
!=
HSA_STATUS_SUCCESS
)
{
verify
=
false
;
exit_value_
=
err_
;
}
}
}
}
}
...
@@ -446,20 +446,8 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
...
@@ -446,20 +446,8 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
}
}
// Free up buffers and signal objects used in copy operation
// Free up buffers and signal objects used in copy operation
ReleaseBuffers
(
bidir
,
buf_src_fwd
,
buf_src_rev
,
ReleaseSignals
(
signal_list
);
buf_dst_fwd
,
buf_dst_rev
,
signal_fwd
,
signal_rev
);
ReleaseBuffers
(
buffer_list
);
if
(
validate_
)
{
hsa_signal_t
fake_signal
=
{
0
};
ReleaseBuffers
(
false
,
validation_src
,
NULL
,
validation_dst
,
NULL
,
validation_signal
,
fake_signal
);
}
// Free signal used to sync bidirectional copies
if
(
bidir
)
{
err_
=
hsa_signal_destroy
(
signal_start_bidir
);
ErrorCheck
(
err_
);
}
}
}
void
RocmBandwidthTest
::
Run
()
{
void
RocmBandwidthTest
::
Run
()
{
...
@@ -553,9 +541,16 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
...
@@ -553,9 +541,16 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
active_agents_list_
=
NULL
;
active_agents_list_
=
NULL
;
link_weight_matrix_
=
NULL
;
link_weight_matrix_
=
NULL
;
init_
=
false
;
latency_
=
false
;
latency_
=
false
;
validate_
=
false
;
validate_
=
false
;
print_cpu_time_
=
false
;
print_cpu_time_
=
false
;
// Set initial value to 0x23 in case
// user does not have a preference
init_val_
=
0x23
;
init_src_
=
NULL
;
validate_dst_
=
NULL
;
// Initialize version of the test
// Initialize version of the test
version_
.
major_id
=
2
;
version_
.
major_id
=
2
;
...
@@ -585,6 +580,13 @@ RocmBandwidthTest::~RocmBandwidthTest() {
...
@@ -585,6 +580,13 @@ RocmBandwidthTest::~RocmBandwidthTest() {
delete
link_type_matrix_
;
delete
link_type_matrix_
;
delete
link_weight_matrix_
;
delete
link_weight_matrix_
;
delete
active_agents_list_
;
delete
active_agents_list_
;
if
(
init_
)
{
hsa_signal_destroy
(
init_signal_
);
hsa_amd_memory_pool_free
(
init_src_
);
if
(
validate_
)
{
hsa_amd_memory_pool_free
(
validate_dst_
);
}
}
}
}
std
::
string
RocmBandwidthTest
::
GetVersion
()
const
{
std
::
string
RocmBandwidthTest
::
GetVersion
()
const
{
...
...
rocm_bandwidth_test.hpp
View file @
1e5e2e06
...
@@ -166,11 +166,13 @@ typedef enum Request_Type {
...
@@ -166,11 +166,13 @@ typedef enum Request_Type {
REQ_READ
=
1
,
REQ_READ
=
1
,
REQ_WRITE
=
2
,
REQ_WRITE
=
2
,
REQ_COPY_BIDIR
=
3
,
REQ_VERSION
=
3
,
REQ_COPY_UNIDIR
=
4
,
REQ_TOPOLOGY
=
4
,
REQ_COPY_ALL_BIDIR
=
5
,
REQ_COPY_BIDIR
=
5
,
REQ_COPY_ALL_UNIDIR
=
6
,
REQ_COPY_UNIDIR
=
6
,
REQ_INVALID
=
7
,
REQ_COPY_ALL_BIDIR
=
7
,
REQ_COPY_ALL_UNIDIR
=
8
,
REQ_INVALID
=
9
,
}
Request_Type
;
}
Request_Type
;
...
@@ -255,6 +257,8 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -255,6 +257,8 @@ class RocmBandwidthTest : public BaseTest {
double
GetMinTime
(
std
::
vector
<
double
>&
vec
);
double
GetMinTime
(
std
::
vector
<
double
>&
vec
);
// @brief: Dispaly Benchmark result
// @brief: Dispaly Benchmark result
void
PopulatePerfMatrix
(
bool
peak
,
double
*
perf_matrix
)
const
;
void
PrintPerfMatrix
(
bool
validate
,
bool
peak
,
double
*
perf_matrix
)
const
;
void
DisplayDevInfo
()
const
;
void
DisplayDevInfo
()
const
;
void
DisplayIOTime
(
async_trans_t
&
trans
)
const
;
void
DisplayIOTime
(
async_trans_t
&
trans
)
const
;
void
DisplayCopyTime
(
async_trans_t
&
trans
)
const
;
void
DisplayCopyTime
(
async_trans_t
&
trans
)
const
;
...
@@ -268,6 +272,11 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -268,6 +272,11 @@ class RocmBandwidthTest : public BaseTest {
bool
ValidateReadReq
();
bool
ValidateReadReq
();
bool
ValidateWriteReq
();
bool
ValidateWriteReq
();
bool
ValidateReadOrWriteReq
(
vector
<
size_t
>&
in_list
);
bool
ValidateReadOrWriteReq
(
vector
<
size_t
>&
in_list
);
void
ValidateCopyBidirFlags
(
uint32_t
copy_ctrl_mask
);
void
ValidateCopyAllBidirFlags
(
uint32_t
copy_ctrl_mask
);
void
ValidateCopyAllUnidirFlags
(
uint32_t
copy_ctrl_mask
);
void
ValidateCopyUnidirFlags
(
uint32_t
copy_mask
,
uint32_t
copy_ctrl_mask
);
bool
ValidateBidirCopyReq
();
bool
ValidateBidirCopyReq
();
bool
ValidateUnidirCopyReq
();
bool
ValidateUnidirCopyReq
();
...
@@ -280,6 +289,8 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -280,6 +289,8 @@ class RocmBandwidthTest : public BaseTest {
// @brief: Builds a list of transaction per user request
// @brief: Builds a list of transaction per user request
void
ComputeCopyTime
(
async_trans_t
&
trans
);
void
ComputeCopyTime
(
async_trans_t
&
trans
);
void
BuildDeviceList
();
void
BuildBufferList
();
bool
BuildTransList
();
bool
BuildTransList
();
bool
BuildReadTrans
();
bool
BuildReadTrans
();
bool
BuildWriteTrans
();
bool
BuildWriteTrans
();
...
@@ -293,24 +304,23 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -293,24 +304,23 @@ class RocmBandwidthTest : public BaseTest {
vector
<
size_t
>&
src_list
,
vector
<
size_t
>&
src_list
,
vector
<
size_t
>&
dst_list
);
vector
<
size_t
>&
dst_list
);
void
WaitForCopyCompletion
(
vector
<
hsa_signal_t
>&
signal_list
);
void
AllocateCopyBuffers
(
size_t
size
,
void
AllocateCopyBuffers
(
size_t
size
,
uint32_t
src_dev_idx
,
uint32_t
dst_dev_idx
,
void
*&
src
,
hsa_amd_memory_pool_t
src_pool
,
void
*&
src
,
hsa_amd_memory_pool_t
src_pool
,
void
*&
dst
,
hsa_amd_memory_pool_t
dst_pool
,
void
*&
dst
,
hsa_amd_memory_pool_t
dst_pool
);
hsa_agent_t
src_agent
,
hsa_agent_t
dst_agent
,
hsa_signal_t
&
signal
);
void
ReleaseBuffers
(
std
::
vector
<
void
*>&
buffer_list
);
void
ReleaseBuffers
(
bool
bidir
,
void
ReleaseSignals
(
std
::
vector
<
hsa_signal_t
>&
signal_list
);
void
*
src_fwd
,
void
*
src_rev
,
void
*
dst_fwd
,
void
*
dst_rev
,
hsa_signal_t
signal_fwd
,
hsa_signal_t
signal_rev
);
double
GetGpuCopyTime
(
bool
bidir
,
hsa_signal_t
signal_fwd
,
hsa_signal_t
signal_rev
);
double
GetGpuCopyTime
(
bool
bidir
,
hsa_signal_t
signal_fwd
,
hsa_signal_t
signal_rev
);
void
AllocateHostBuffers
(
size_t
size
,
uint32_t
src_dev_idx
,
void
InitializeSrcBuffer
(
size_t
size
,
void
*
buf_cpy
,
uint32_t
dst
_dev_idx
,
uint32_t
cpy
_dev_idx
,
hsa_agent_t
cpy_agent
);
void
*&
src
,
void
*&
dst
,
void
*
buf_src
,
void
*
buf_dst
,
bool
ValidateDstBuffer
(
size_t
max_size
,
size_t
curr_size
,
hsa_agent_t
src_agent
,
hsa_agent_t
dst
_agent
,
void
*
buf_cpy
,
uint32_t
cpy_dev_idx
,
hsa_agent_t
cpy
_agent
);
hsa_signal_t
&
signal
);
void
copy_buffer
(
void
*
dst
,
hsa_agent_t
dst_agent
,
void
copy_buffer
(
void
*
dst
,
hsa_agent_t
dst_agent
,
void
*
src
,
hsa_agent_t
src_agent
,
void
*
src
,
hsa_agent_t
src_agent
,
size_t
size
,
hsa_signal_t
signal
);
size_t
size
,
hsa_signal_t
signal
);
...
@@ -418,6 +428,8 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -418,6 +428,8 @@ class RocmBandwidthTest : public BaseTest {
// Type of service requested by user
// Type of service requested by user
uint32_t
req_read_
;
uint32_t
req_read_
;
uint32_t
req_write_
;
uint32_t
req_write_
;
uint32_t
req_version_
;
uint32_t
req_topology_
;
uint32_t
req_copy_bidir_
;
uint32_t
req_copy_bidir_
;
uint32_t
req_copy_unidir_
;
uint32_t
req_copy_unidir_
;
uint32_t
req_copy_all_bidir_
;
uint32_t
req_copy_all_bidir_
;
...
@@ -427,9 +439,10 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -427,9 +439,10 @@ class RocmBandwidthTest : public BaseTest {
static
const
uint32_t
USR_DST_FLAG
=
0x02
;
static
const
uint32_t
USR_DST_FLAG
=
0x02
;
static
const
uint32_t
USR_BUFFER_SIZE
=
0x01
;
static
const
uint32_t
USR_BUFFER_SIZE
=
0x01
;
static
const
uint32_t
USR_VISIBLE_TIME
=
0x02
;
static
const
uint32_t
USR_BUFFER_INIT
=
0x02
;
static
const
uint32_t
DEV_COPY_LATENCY
=
0x04
;
static
const
uint32_t
CPU_VISIBLE_TIME
=
0x04
;
static
const
uint32_t
VALIDATE_COPY_OP
=
0x08
;
static
const
uint32_t
DEV_COPY_LATENCY
=
0x08
;
static
const
uint32_t
VALIDATE_COPY_OP
=
0x010
;
static
const
uint32_t
LINK_TYPE_SELF
=
0x00
;
static
const
uint32_t
LINK_TYPE_SELF
=
0x00
;
static
const
uint32_t
LINK_TYPE_PCIE
=
0x01
;
static
const
uint32_t
LINK_TYPE_PCIE
=
0x01
;
...
@@ -482,9 +495,18 @@ class RocmBandwidthTest : public BaseTest {
...
@@ -482,9 +495,18 @@ class RocmBandwidthTest : public BaseTest {
// Flag to print Cpu time
// Flag to print Cpu time
bool
print_cpu_time_
;
bool
print_cpu_time_
;
// Determines if user has requested initialization
bool
init_
;
// Determines if user has requested validation
// Determines if user has requested validation
bool
validate_
;
bool
validate_
;
uint8_t
init_val_
;
// Handles to buffer used to initialize and validate
void
*
init_src_
;
void
*
validate_dst_
;
hsa_signal_t
init_signal_
;
// Determines the latency overhead of copy operations
// Determines the latency overhead of copy operations
bool
latency_
;
bool
latency_
;
...
...
rocm_bandwidth_test_parse.cpp
View file @
1e5e2e06
...
@@ -43,10 +43,20 @@
...
@@ -43,10 +43,20 @@
#include "common.hpp"
#include "common.hpp"
#include "rocm_bandwidth_test.hpp"
#include "rocm_bandwidth_test.hpp"
#include <assert.h>
#include <algorithm>
#include <algorithm>
#include <sstream>
#include <sstream>
#include <unistd.h>
#include <unistd.h>
// Parse option value string. The string has one decimal
// value as in example: -i 0x33
static
bool
ParseInitValue
(
char
*
value_str
,
uint8_t
&
value
)
{
// Capture the option value string
uint32_t
value_read
=
strtoul
(
value_str
,
NULL
,
0
);
return
((
value
=
value_read
)
&&
(
value_read
>
255
))
?
false
:
true
;
}
// Parse option value string. The string has one more decimal
// Parse option value string. The string has one more decimal
// values separated by comma - "3,6,9,12,15".
// values separated by comma - "3,6,9,12,15".
static
bool
ParseOptionValue
(
char
*
value
,
vector
<
size_t
>&
value_list
)
{
static
bool
ParseOptionValue
(
char
*
value
,
vector
<
size_t
>&
value_list
)
{
...
@@ -80,80 +90,190 @@ static bool ParseOptionValue(char* value, vector<size_t>&value_list) {
...
@@ -80,80 +90,190 @@ static bool ParseOptionValue(char* value, vector<size_t>&value_list) {
return
true
;
return
true
;
}
}
void
RocmBandwidthTest
::
ValidateInputFlags
(
uint32_t
pf_cnt
,
void
RocmBandwidthTest
::
ValidateCopyBidirFlags
(
uint32_t
copy_ctrl_mask
)
{
uint32_t
copy_mask
,
uint32_t
copy_ctrl_mask
)
{
// Input can't have more than two Primary flags
// It is illegal to specify following flags
if
(
pf_cnt
>
2
)
{
// secondary flag that affects a copy operation
if
((
copy_ctrl_mask
&
DEV_COPY_LATENCY
)
||
(
copy_ctrl_mask
&
CPU_VISIBLE_TIME
)
||
(
copy_ctrl_mask
&
VALIDATE_COPY_OP
))
{
PrintHelpScreen
();
exit
(
0
);
}
return
;
}
void
RocmBandwidthTest
::
ValidateCopyUnidirFlags
(
uint32_t
copy_mask
,
uint32_t
copy_ctrl_mask
)
{
if
(
copy_mask
!=
(
USR_SRC_FLAG
|
USR_DST_FLAG
))
{
PrintHelpScreen
();
PrintHelpScreen
();
exit
(
0
);
exit
(
0
);
}
}
// Input specifies unidirectional copy among subset of devices
// It is illegal to specify Latency and another
if
(
pf_cnt
==
2
)
{
// secondary flag that affects a copy operation
if
(
copy_mask
!=
(
USR_SRC_FLAG
|
USR_DST_FLAG
))
{
if
((
copy_ctrl_mask
&
DEV_COPY_LATENCY
)
&&
((
copy_ctrl_mask
&
USR_BUFFER_INIT
)
||
(
copy_ctrl_mask
&
CPU_VISIBLE_TIME
)
||
(
copy_ctrl_mask
&
VALIDATE_COPY_OP
)))
{
PrintHelpScreen
();
PrintHelpScreen
();
exit
(
0
);
exit
(
0
);
}
}
}
// Rewrite input if user is requesting validation
// It is illegal to specify user buffer sizes and another
if
(
pf_cnt
==
0
)
{
// secondary flag that affects a copy operation
if
(
copy_ctrl_mask
&
VALIDATE_COPY_OP
)
{
if
((
copy_ctrl_mask
&
USR_BUFFER_SIZE
)
&&
req_copy_all_unidir_
=
REQ_COPY_ALL_UNIDIR
;
(
copy_ctrl_mask
&
VALIDATE_COPY_OP
))
{
}
PrintHelpScreen
();
exit
(
0
);
}
}
// User input for primary operation is valid.
// Check of illegal flags is complete
// Determine secondary flags are legal
return
;
}
// Case 1: It is illegal to specify copy size for copy
void
RocmBandwidthTest
::
ValidateCopyAllBidirFlags
(
uint32_t
copy_ctrl_mask
)
{
// operations involving all devices
if
(((
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
||
// It is illegal to specify following flags
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
))
&&
// secondary flag that affects a copy operation
(
copy_ctrl_mask
&
USR_BUFFER_SIZE
))
{
if
((
copy_ctrl_mask
&
DEV_COPY_LATENCY
)
||
(
copy_ctrl_mask
&
USR_BUFFER_SIZE
)
||
(
copy_ctrl_mask
&
CPU_VISIBLE_TIME
)
||
(
copy_ctrl_mask
&
VALIDATE_COPY_OP
))
{
PrintHelpScreen
();
PrintHelpScreen
();
exit
(
0
);
exit
(
0
);
}
}
//
// Check of illegal flags is complete
// Case 2: It is illegal to specify Latency for bidirectional
return
;
// copy operations or all-unidirectional
}
if
(((
req_copy_bidir_
==
REQ_COPY_BIDIR
)
||
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
||
void
RocmBandwidthTest
::
ValidateCopyAllUnidirFlags
(
uint32_t
copy_ctrl_mask
)
{
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
))
&&
(
copy_ctrl_mask
&
DEV_COPY_LATENCY
))
{
// It is illegal to specify following flags
// secondary flag that affects a copy operation
if
((
copy_ctrl_mask
&
DEV_COPY_LATENCY
)
||
(
copy_ctrl_mask
&
USR_BUFFER_SIZE
))
{
PrintHelpScreen
();
PrintHelpScreen
();
exit
(
0
);
exit
(
0
);
}
}
//
// Check of illegal flags is complete
// Case 3: It is illegal to specify Latency and another secondary
return
;
// flag that affects a copy operation
}
if
((
copy_ctrl_mask
&
DEV_COPY_LATENCY
)
&&
((
copy_ctrl_mask
&
USR_BUFFER_SIZE
)
||
void
RocmBandwidthTest
::
ValidateInputFlags
(
uint32_t
pf_cnt
,
(
copy_ctrl_mask
&
USR_VISIBLE_TIME
)
||
uint32_t
copy_mask
,
uint32_t
copy_ctrl_mask
)
{
(
copy_ctrl_mask
&
VALIDATE_COPY_OP
)))
{
PrintHelpScreen
();
// Input can't have more than two Primary flags
exit
(
0
);
if
((
pf_cnt
==
0
)
||
(
pf_cnt
>
2
))
{
PrintHelpScreen
();
exit
(
0
);
}
}
//
// Input specifies unidirectional copy among subset of devices
// Case 4: It is illegal to request Cpu time along with validation
// rocm_bandwidth_test -s Di,Dj,Dk -d Dp,Dq,Dr
// of copy operation
if
(
pf_cnt
==
2
)
{
if
((
copy_ctrl_mask
&
VALIDATE_COPY_OP
)
&&
return
ValidateCopyUnidirFlags
(
copy_mask
,
copy_ctrl_mask
);
((
copy_ctrl_mask
&
USR_BUFFER_SIZE
)
||
}
(
copy_ctrl_mask
&
USR_VISIBLE_TIME
)))
{
PrintHelpScreen
();
// Input is requesting to print RBT version
exit
(
0
);
// rocm_bandwidth_test -q
if
(
req_version_
==
REQ_VERSION
)
{
PrintVersion
();
exit
(
0
);
}
// Input is requesting to print ROCm topology
// rocm_bandwidth_test -t
if
(
req_topology_
==
REQ_TOPOLOGY
)
{
return
;
}
// Input is for bidirectional bandwidth for some devices
// rocm_bandwidth_test -b
if
(
req_copy_bidir_
==
REQ_COPY_BIDIR
)
{
return
ValidateCopyBidirFlags
(
copy_ctrl_mask
);
}
// Input is for bidirectional bandwidth for all devices
// rocm_bandwidth_test -A
if
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
{
return
ValidateCopyAllBidirFlags
(
copy_ctrl_mask
);
}
// Input is for unidirectional bandwidth for all devices
// rocm_bandwidth_test -a
if
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
)
{
return
ValidateCopyAllUnidirFlags
(
copy_ctrl_mask
);
}
std
::
cout
<<
"ValidateInputFlags: This should not be happening"
<<
std
::
endl
;
assert
(
false
);
return
;
}
void
RocmBandwidthTest
::
BuildDeviceList
()
{
// Initialize devices list if copying unidirectional
// all or bidirectional all mode is enabled
uint32_t
size
=
pool_list_
.
size
();
for
(
uint32_t
idx
=
0
;
idx
<
size
;
idx
++
)
{
if
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
{
bidir_list_
.
push_back
(
idx
);
}
else
{
src_list_
.
push_back
(
idx
);
dst_list_
.
push_back
(
idx
);
}
}
}
void
RocmBandwidthTest
::
BuildBufferList
()
{
// User has specified buffer sizes to be used
if
(
size_list_
.
size
()
!=
0
)
{
uint32_t
size_len
=
size_list_
.
size
();
for
(
uint32_t
idx
=
0
;
idx
<
size_len
;
idx
++
)
{
size_list_
[
idx
]
=
size_list_
[
idx
]
*
1024
*
1024
;
}
return
;
}
// User has NOT specified buffer sizes to be used
// For All Copy operations use only one buffer size
uint32_t
size_len
=
sizeof
(
SIZE_LIST
)
/
sizeof
(
size_t
);
for
(
uint32_t
idx
=
0
;
idx
<
size_len
;
idx
++
)
{
if
((
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
||
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
))
{
if
(
idx
==
16
)
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
if
(
req_copy_unidir_
==
REQ_COPY_UNIDIR
)
{
if
(
latency_
)
{
size_list_
.
push_back
(
LATENCY_SIZE_LIST
[
idx
]);
}
else
if
(
validate_
)
{
if
(
idx
==
16
)
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
else
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
if
(
req_copy_bidir_
==
REQ_COPY_BIDIR
)
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
}
}
}
void
RocmBandwidthTest
::
ParseArguments
()
{
void
RocmBandwidthTest
::
ParseArguments
()
{
bool
print_help
=
false
;
bool
print_help
=
0
;
bool
print_version
=
false
;
bool
print_topology
=
false
;
uint32_t
copy_mask
=
0
;
uint32_t
copy_mask
=
0
;
uint32_t
copy_ctrl_mask
=
0
;
uint32_t
copy_ctrl_mask
=
0
;
uint32_t
num_primary_flags
=
0
;
uint32_t
num_primary_flags
=
0
;
...
@@ -165,25 +285,24 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -165,25 +285,24 @@ void RocmBandwidthTest::ParseArguments() {
int
opt
;
int
opt
;
bool
status
;
bool
status
;
while
((
opt
=
getopt
(
usr_argc_
,
usr_argv_
,
"hqtclvaAb:s:d:r:w:m:"
))
!=
-
1
)
{
while
((
opt
=
getopt
(
usr_argc_
,
usr_argv_
,
"hqtclvaAb:
i:
s:d:r:w:m:"
))
!=
-
1
)
{
switch
(
opt
)
{
switch
(
opt
)
{
// Print help screen
// Print help screen
case
'h'
:
case
'h'
:
print_help
=
true
;
print_help
=
true
;
num_primary_flags
++
;
break
;
break
;
// Print version of the test
// Print version of the test
case
'q'
:
case
'q'
:
print_version
=
true
;
num_primary_flags
++
;
num_primary_flags
++
;
req_version_
=
REQ_VERSION
;
break
;
break
;
// Print system topology
// Print system topology
case
't'
:
case
't'
:
print_topology
=
true
;
num_primary_flags
++
;
num_primary_flags
++
;
req_topology_
=
REQ_TOPOLOGY
;
break
;
break
;
// Enable Unidirectional copy among all valid buffers
// Enable Unidirectional copy among all valid buffers
...
@@ -226,6 +345,7 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -226,6 +345,7 @@ void RocmBandwidthTest::ParseArguments() {
case
'b'
:
case
'b'
:
status
=
ParseOptionValue
(
optarg
,
bidir_list_
);
status
=
ParseOptionValue
(
optarg
,
bidir_list_
);
if
(
status
)
{
if
(
status
)
{
num_primary_flags
++
;
req_copy_bidir_
=
REQ_COPY_BIDIR
;
req_copy_bidir_
=
REQ_COPY_BIDIR
;
break
;
break
;
}
}
...
@@ -244,7 +364,7 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -244,7 +364,7 @@ void RocmBandwidthTest::ParseArguments() {
// Print Cpu time
// Print Cpu time
case
'c'
:
case
'c'
:
print_cpu_time_
=
true
;
print_cpu_time_
=
true
;
copy_ctrl_mask
|=
USR
_VISIBLE_TIME
;
copy_ctrl_mask
|=
CPU
_VISIBLE_TIME
;
break
;
break
;
// Set Latency mode flag to true
// Set Latency mode flag to true
...
@@ -259,6 +379,16 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -259,6 +379,16 @@ void RocmBandwidthTest::ParseArguments() {
copy_ctrl_mask
|=
VALIDATE_COPY_OP
;
copy_ctrl_mask
|=
VALIDATE_COPY_OP
;
break
;
break
;
// Set initialization mode flag to true
case
'i'
:
init_
=
true
;
status
=
ParseInitValue
(
optarg
,
init_val_
);
if
(
status
==
false
)
{
print_help
=
true
;
}
copy_ctrl_mask
|=
USR_BUFFER_INIT
;
break
;
// Collect request to read a buffer
// Collect request to read a buffer
case
'r'
:
case
'r'
:
req_read_
=
REQ_READ
;
req_read_
=
REQ_READ
;
...
@@ -282,8 +412,9 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -282,8 +412,9 @@ void RocmBandwidthTest::ParseArguments() {
// optopt
// optopt
case
'?'
:
case
'?'
:
std
::
cout
<<
"Argument is illegal or needs value: "
<<
'?'
<<
std
::
endl
;
std
::
cout
<<
"Argument is illegal or needs value: "
<<
'?'
<<
std
::
endl
;
if
((
optopt
==
'b'
||
optopt
==
's'
||
optopt
==
'd'
||
optopt
==
'm'
))
{
if
((
optopt
==
'b'
)
||
(
optopt
==
's'
)
||
std
::
cout
<<
"Error: Option -b -s -d and -m require argument"
<<
std
::
endl
;
(
optopt
==
'd'
)
||
(
optopt
==
'm'
)
||
(
optopt
==
'i'
))
{
std
::
cout
<<
"Error: Options -b -s -d and -m -i require argument"
<<
std
::
endl
;
}
}
print_help
=
true
;
print_help
=
true
;
break
;
break
;
...
@@ -292,9 +423,6 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -292,9 +423,6 @@ void RocmBandwidthTest::ParseArguments() {
break
;
break
;
}
}
}
}
// Determine input of primary flags is valid
ValidateInputFlags
(
num_primary_flags
,
copy_mask
,
copy_ctrl_mask
);
// Print help screen if user option has "-h"
// Print help screen if user option has "-h"
if
(
print_help
)
{
if
(
print_help
)
{
...
@@ -302,11 +430,8 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -302,11 +430,8 @@ void RocmBandwidthTest::ParseArguments() {
exit
(
0
);
exit
(
0
);
}
}
// Print version of the test
// Determine input of primary flags is valid
if
(
print_version
)
{
ValidateInputFlags
(
num_primary_flags
,
copy_mask
,
copy_ctrl_mask
);
PrintVersion
();
exit
(
0
);
}
// Initialize Roc Runtime
// Initialize Roc Runtime
err_
=
hsa_init
();
err_
=
hsa_init
();
...
@@ -316,7 +441,7 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -316,7 +441,7 @@ void RocmBandwidthTest::ParseArguments() {
DiscoverTopology
();
DiscoverTopology
();
// Print system topology if user option has "-t"
// Print system topology if user option has "-t"
if
(
print
_topology
)
{
if
(
req
_topology
_
==
REQ_TOPOLOGY
)
{
PrintVersion
();
PrintVersion
();
PrintTopology
();
PrintTopology
();
PrintLinkPropsMatrix
(
LINK_PROP_ACCESS
);
PrintLinkPropsMatrix
(
LINK_PROP_ACCESS
);
...
@@ -325,72 +450,15 @@ void RocmBandwidthTest::ParseArguments() {
...
@@ -325,72 +450,15 @@ void RocmBandwidthTest::ParseArguments() {
exit
(
0
);
exit
(
0
);
}
}
// Initialize
buffer
list if
full
copying
in
unidirectional
// Initialize
devices
list if copying unidirectional
// or bidirectional mode is enabled
//
all
or bidirectional
all
mode is enabled
if
((
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
)
||
if
((
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
)
||
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
))
{
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
))
{
uint32_t
size
=
pool_list_
.
size
();
BuildDeviceList
();
for
(
uint32_t
idx
=
0
;
idx
<
size
;
idx
++
)
{
if
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
{
bidir_list_
.
push_back
(
idx
);
}
else
{
src_list_
.
push_back
(
idx
);
dst_list_
.
push_back
(
idx
);
}
}
}
// Initialize the list of buffer sizes to use in copy/read/write operations
// For All Copy operations use only one buffer size
if
(
size_list_
.
size
()
==
0
)
{
uint32_t
size_len
=
sizeof
(
SIZE_LIST
)
/
sizeof
(
size_t
);
for
(
uint32_t
idx
=
0
;
idx
<
size_len
;
idx
++
)
{
if
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
{
if
(
idx
==
16
)
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
if
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
)
{
if
(
idx
==
16
)
{
if
(
latency_
==
false
)
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
else
{
size_list_
.
push_back
(
LATENCY_SIZE_LIST
[
3
]);
// size of 8 bytes
}
}
}
if
(
req_copy_unidir_
==
REQ_COPY_UNIDIR
)
{
if
(
latency_
)
{
size_list_
.
push_back
(
LATENCY_SIZE_LIST
[
idx
]);
}
else
if
(
validate_
)
{
if
(
idx
==
16
)
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
else
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
if
(
req_copy_bidir_
==
REQ_COPY_BIDIR
)
{
if
(
validate_
)
{
if
(
idx
==
16
)
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
else
{
size_list_
.
push_back
(
SIZE_LIST
[
idx
]);
}
}
}
}
else
{
uint32_t
size_len
=
size_list_
.
size
();
for
(
uint32_t
idx
=
0
;
idx
<
size_len
;
idx
++
)
{
size_list_
[
idx
]
=
size_list_
[
idx
]
*
1024
*
1024
;
}
}
}
// Initialize list of buffer sizes used in copy operations
BuildBufferList
();
std
::
sort
(
size_list_
.
begin
(),
size_list_
.
end
());
std
::
sort
(
size_list_
.
begin
(),
size_list_
.
end
());
}
}
rocm_bandwidth_test_print.cpp
View file @
1e5e2e06
...
@@ -59,6 +59,7 @@ void RocmBandwidthTest::PrintHelpScreen() {
...
@@ -59,6 +59,7 @@ void RocmBandwidthTest::PrintHelpScreen() {
std
::
cout
<<
"
\t
-v Run the test in validation mode"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-v Run the test in validation mode"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-l Run test to collect Latency data"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-l Run test to collect Latency data"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-c Time the operation using CPU Timers"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-c Time the operation using CPU Timers"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-i Initialize copy buffer with specified byte pattern"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-t Prints system topology and allocatable memory info"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-t Prints system topology and allocatable memory info"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-m List of buffer sizes to use, specified in Megabytes"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-m List of buffer sizes to use, specified in Megabytes"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-b List devices to use in bidirectional copy operations"
<<
std
::
endl
;
std
::
cout
<<
"
\t
-b List devices to use in bidirectional copy operations"
<<
std
::
endl
;
...
@@ -69,13 +70,13 @@ void RocmBandwidthTest::PrintHelpScreen() {
...
@@ -69,13 +70,13 @@ void RocmBandwidthTest::PrintHelpScreen() {
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
"
\t
NOTE: Mixing following options is illegal/unsupported"
<<
std
::
endl
;
std
::
cout
<<
"
\t
NOTE: Mixing following options is illegal/unsupported"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 1: rocm_bandwidth_test -a or -A with -
m
"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 1: rocm_bandwidth_test -a or -A with -
c
"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 2: rocm_bandwidth_test -b or -A with -
l
"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 2: rocm_bandwidth_test -b or -A with -
m
"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 3: rocm_bandwidth_test -
a
or -
s x -d
with -l
and -c
"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 3: rocm_bandwidth_test -
b
or -
A
with -l"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 4: rocm_bandwidth_test -
a
or -
s x -d
with -
l and -m
"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 4: rocm_bandwidth_test -
b
or -
A
with -
v
"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 5: rocm_bandwidth_test -a or -s x -d with -l and -
v
"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 5: rocm_bandwidth_test -a or -s x -d
y
with -l and -
c
"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 6: rocm_bandwidth_test -a or
-A -b or
-s x -d y with -
v
and -
c
"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 6: rocm_bandwidth_test -a or -s x -d y with -
l
and -
m
"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 7: rocm_bandwidth_test -a or
-A -b or
-s x -d y with -
v
and -
m
"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 7: rocm_bandwidth_test -a or -s x -d y with -
l
and -
v
"
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
...
...
rocm_bandwidth_test_report.cpp
View file @
1e5e2e06
...
@@ -162,7 +162,6 @@ void RocmBandwidthTest::Display() const {
...
@@ -162,7 +162,6 @@ void RocmBandwidthTest::Display() const {
DisplayDevInfo
();
DisplayDevInfo
();
PrintLinkPropsMatrix
(
LINK_PROP_ACCESS
);
PrintLinkPropsMatrix
(
LINK_PROP_ACCESS
);
PrintLinkPropsMatrix
(
LINK_PROP_WEIGHT
);
PrintLinkPropsMatrix
(
LINK_PROP_WEIGHT
);
PrintLinkPropsMatrix
(
LINK_PROP_TYPE
);
DisplayCopyTimeMatrix
(
true
);
DisplayCopyTimeMatrix
(
true
);
return
;
return
;
}
}
...
@@ -173,7 +172,6 @@ void RocmBandwidthTest::Display() const {
...
@@ -173,7 +172,6 @@ void RocmBandwidthTest::Display() const {
DisplayDevInfo
();
DisplayDevInfo
();
PrintLinkPropsMatrix
(
LINK_PROP_ACCESS
);
PrintLinkPropsMatrix
(
LINK_PROP_ACCESS
);
PrintLinkPropsMatrix
(
LINK_PROP_WEIGHT
);
PrintLinkPropsMatrix
(
LINK_PROP_WEIGHT
);
PrintLinkPropsMatrix
(
LINK_PROP_TYPE
);
}
}
DisplayCopyTimeMatrix
(
true
);
DisplayCopyTimeMatrix
(
true
);
return
;
return
;
...
@@ -221,9 +219,8 @@ void RocmBandwidthTest::DisplayCopyTime(async_trans_t& trans) const {
...
@@ -221,9 +219,8 @@ void RocmBandwidthTest::DisplayCopyTime(async_trans_t& trans) const {
}
}
}
}
void
RocmBandwidthTest
::
DisplayCopyTime
Matrix
(
bool
peak
)
const
{
void
RocmBandwidthTest
::
PopulatePerf
Matrix
(
bool
peak
,
double
*
perf_matrix
)
const
{
double
*
perf_matrix
=
new
double
[
agent_index_
*
agent_index_
]();
uint32_t
trans_size
=
trans_list_
.
size
();
uint32_t
trans_size
=
trans_list_
.
size
();
for
(
uint32_t
idx
=
0
;
idx
<
trans_size
;
idx
++
)
{
for
(
uint32_t
idx
=
0
;
idx
<
trans_size
;
idx
++
)
{
async_trans_t
trans
=
trans_list_
[
idx
];
async_trans_t
trans
=
trans_list_
[
idx
];
...
@@ -240,6 +237,10 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
...
@@ -240,6 +237,10 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
}
}
}
}
}
void
RocmBandwidthTest
::
PrintPerfMatrix
(
bool
validate
,
bool
peak
,
double
*
perf_matrix
)
const
{
uint32_t
format
=
10
;
uint32_t
format
=
10
;
std
::
cout
.
setf
(
ios
::
left
);
std
::
cout
.
setf
(
ios
::
left
);
...
@@ -247,20 +248,24 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
...
@@ -247,20 +248,24 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
std
::
cout
<<
""
;
std
::
cout
<<
""
;
std
::
cout
.
width
(
format
);
std
::
cout
.
width
(
format
);
if
((
peak
)
&&
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
))
{
if
(
validate
==
false
)
{
std
::
cout
<<
"Unidirectional copy peak bandwidth GB/s"
;
if
((
peak
)
&&
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
))
{
}
std
::
cout
<<
"Unidirectional copy peak bandwidth GB/s"
;
}
if
((
peak
==
false
)
&&
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
))
{
std
::
cout
<<
"Unidirectional copy average bandwidth GB/s"
;
if
((
peak
==
false
)
&&
(
req_copy_all_unidir_
==
REQ_COPY_ALL_UNIDIR
))
{
}
std
::
cout
<<
"Unidirectional copy average bandwidth GB/s"
;
}
if
((
peak
)
&&
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
))
{
std
::
cout
<<
"Bdirectional copy peak bandwidth GB/s"
;
if
((
peak
)
&&
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
))
{
}
std
::
cout
<<
"Bdirectional copy peak bandwidth GB/s"
;
}
if
((
peak
==
false
)
&&
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
))
{
std
::
cout
<<
"Bidirectional copy average bandwidth GB/s"
;
if
((
peak
==
false
)
&&
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
))
{
std
::
cout
<<
"Bidirectional copy average bandwidth GB/s"
;
}
}
else
{
std
::
cout
<<
"Data Path Validation"
;
}
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
...
@@ -293,10 +298,20 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
...
@@ -293,10 +298,20 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
format
=
12
;
format
=
12
;
std
::
cout
.
width
(
format
);
std
::
cout
.
width
(
format
);
double
value
=
perf_matrix
[(
idx0
*
agent_index_
)
+
idx1
];
double
value
=
perf_matrix
[(
idx0
*
agent_index_
)
+
idx1
];
if
(
value
==
0
)
{
if
(
validate
)
{
std
::
cout
<<
"N/A"
;
if
(
value
==
0
)
{
std
::
cout
<<
"N/A"
;
}
else
if
(
value
<
1
)
{
std
::
cout
<<
"FAIL"
;
}
else
{
std
::
cout
<<
"PASS"
;
}
}
else
{
}
else
{
std
::
cout
<<
perf_matrix
[(
idx0
*
agent_index_
)
+
idx1
];
if
(
value
==
0
)
{
std
::
cout
<<
"N/A"
;
}
else
{
std
::
cout
<<
perf_matrix
[(
idx0
*
agent_index_
)
+
idx1
];
}
}
}
}
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
...
@@ -305,73 +320,20 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
...
@@ -305,73 +320,20 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
}
}
void
RocmBandwidthTest
::
Display
ValidationMatrix
(
)
const
{
void
RocmBandwidthTest
::
Display
CopyTimeMatrix
(
bool
peak
)
const
{
double
*
perf_matrix
=
new
double
[
agent_index_
*
agent_index_
]();
double
*
perf_matrix
=
new
double
[
agent_index_
*
agent_index_
]();
uint32_t
trans_size
=
trans_list_
.
size
();
PopulatePerfMatrix
(
peak
,
perf_matrix
);
for
(
uint32_t
idx
=
0
;
idx
<
trans_size
;
idx
++
)
{
PrintPerfMatrix
(
false
,
peak
,
perf_matrix
);
async_trans_t
trans
=
trans_list_
[
idx
];
free
(
perf_matrix
);
uint32_t
src_idx
=
trans
.
copy
.
src_idx_
;
}
uint32_t
dst_idx
=
trans
.
copy
.
dst_idx_
;
uint32_t
src_dev_idx
=
pool_list_
[
src_idx
].
agent_index_
;
uint32_t
dst_dev_idx
=
pool_list_
[
dst_idx
].
agent_index_
;
perf_matrix
[(
src_dev_idx
*
agent_index_
)
+
dst_dev_idx
]
=
trans
.
peak_bandwidth_
[
0
];
if
(
req_copy_all_bidir_
==
REQ_COPY_ALL_BIDIR
)
{
perf_matrix
[(
dst_dev_idx
*
agent_index_
)
+
src_dev_idx
]
=
trans
.
peak_bandwidth_
[
0
];
}
}
uint32_t
format
=
10
;
std
::
cout
.
setf
(
ios
::
left
);
std
::
cout
.
width
(
format
);
std
::
cout
<<
""
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
"Data Path Validation"
;
std
::
cout
<<
std
::
endl
;
void
RocmBandwidthTest
::
DisplayValidationMatrix
()
const
{
std
::
cout
<<
std
::
endl
;
std
::
cout
.
precision
(
6
);
std
::
cout
<<
std
::
fixed
;
std
::
cout
.
width
(
format
);
double
*
perf_matrix
=
new
double
[
agent_index_
*
agent_index_
]();
std
::
cout
<<
""
;
PopulatePerfMatrix
(
true
,
perf_matrix
);
std
::
cout
.
width
(
format
);
PrintPerfMatrix
(
true
,
true
,
perf_matrix
);
std
::
cout
<<
"D/D"
;
free
(
perf_matrix
);
format
=
12
;
for
(
uint32_t
idx0
=
0
;
idx0
<
agent_index_
;
idx0
++
)
{
std
::
cout
.
width
(
format
);
std
::
stringstream
agent_id
;
agent_id
<<
idx0
;
std
::
cout
<<
agent_id
.
str
();
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
for
(
uint32_t
idx0
=
0
;
idx0
<
agent_index_
;
idx0
++
)
{
format
=
10
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
""
;
std
::
stringstream
agent_id
;
agent_id
<<
idx0
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
agent_id
.
str
();
for
(
uint32_t
idx1
=
0
;
idx1
<
agent_index_
;
idx1
++
)
{
format
=
12
;
std
::
cout
.
width
(
format
);
double
value
=
perf_matrix
[(
idx0
*
agent_index_
)
+
idx1
];
if
(
value
==
0
)
{
std
::
cout
<<
"N/A"
;
}
else
if
(
value
<
1
)
{
std
::
cout
<<
"FAIL"
;
}
else
{
std
::
cout
<<
"PASS"
;
}
}
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
}
}
void
RocmBandwidthTest
::
DisplayDevInfo
()
const
{
void
RocmBandwidthTest
::
DisplayDevInfo
()
const
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment