Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
rocm_bandwidth_test
Commits
88d4ad20
Commit
88d4ad20
authored
Oct 03, 2019
by
Ramesh Errabolu
Browse files
Refactor validation signal treatment in a device agnostic manner
parent
d1ac47c6
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
60 additions
and
48 deletions
+60
-48
rocm_bandwidth_test.cpp
rocm_bandwidth_test.cpp
+17
-9
rocm_bandwidth_test_parse.cpp
rocm_bandwidth_test_parse.cpp
+1
-3
rocm_bandwidth_test_print.cpp
rocm_bandwidth_test_print.cpp
+12
-7
rocm_bandwidth_test_trans.cpp
rocm_bandwidth_test_trans.cpp
+30
-29
No files found.
rocm_bandwidth_test.cpp
View file @
88d4ad20
...
@@ -641,17 +641,25 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
...
@@ -641,17 +641,25 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
}
}
}
}
// Get Cpu min and mean times for copy
// Collecting Cpu time. Capture verify failures if any
// Push them into the Cpu time list
// Get min and mean copy times and collect them into Cpu
trans
.
cpu_min_time_
.
push_back
(
GetMinTime
(
cpu_time
));
// time list
trans
.
cpu_avg_time_
.
push_back
(
GetMeanTime
(
cpu_time
));
double
min_time
=
0
;
double
mean_time
=
0
;
if
(
print_cpu_time_
)
{
min_time
=
(
verify
)
?
GetMinTime
(
cpu_time
)
:
VALIDATE_COPY_OP_FAILURE
;
mean_time
=
(
verify
)
?
GetMeanTime
(
cpu_time
)
:
VALIDATE_COPY_OP_FAILURE
;
trans
.
cpu_min_time_
.
push_back
(
min_time
);
trans
.
cpu_avg_time_
.
push_back
(
mean_time
);
}
// Collecting Gpu time. Capture verify failures if any
// Get min and mean copy times and collect them into Gpu
// time list
if
(
print_cpu_time_
==
false
)
{
if
(
print_cpu_time_
==
false
)
{
if
(
trans
.
copy
.
uses_gpu_
)
{
if
(
trans
.
copy
.
uses_gpu_
)
{
// Get Gpu min and mean copy times
min_time
=
(
verify
)
?
GetMinTime
(
gpu_time
)
:
VALIDATE_COPY_OP_FAILURE
;
// Push them into the Gpu time list
mean_time
=
(
verify
)
?
GetMeanTime
(
gpu_time
)
:
VALIDATE_COPY_OP_FAILURE
;
double
min_time
=
(
verify
)
?
GetMinTime
(
gpu_time
)
:
VALIDATE_COPY_OP_FAILURE
;
double
mean_time
=
(
verify
)
?
GetMeanTime
(
gpu_time
)
:
VALIDATE_COPY_OP_FAILURE
;
trans
.
gpu_min_time_
.
push_back
(
min_time
);
trans
.
gpu_min_time_
.
push_back
(
min_time
);
trans
.
gpu_avg_time_
.
push_back
(
mean_time
);
trans
.
gpu_avg_time_
.
push_back
(
mean_time
);
}
}
...
@@ -796,7 +804,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
...
@@ -796,7 +804,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
// Initialize version of the test
// Initialize version of the test
version_
.
major_id
=
2
;
version_
.
major_id
=
2
;
version_
.
minor_id
=
3
;
version_
.
minor_id
=
3
;
version_
.
step_id
=
7
;
version_
.
step_id
=
9
;
version_
.
reserved
=
0
;
version_
.
reserved
=
0
;
bw_iter_cnt_
=
getenv
(
"ROCM_BW_ITER_CNT"
);
bw_iter_cnt_
=
getenv
(
"ROCM_BW_ITER_CNT"
);
...
...
rocm_bandwidth_test_parse.cpp
View file @
88d4ad20
...
@@ -147,9 +147,7 @@ void RocmBandwidthTest::ValidateCopyUnidirFlags(uint32_t copy_mask,
...
@@ -147,9 +147,7 @@ void RocmBandwidthTest::ValidateCopyUnidirFlags(uint32_t copy_mask,
// It is illegal to specify Latency and another
// It is illegal to specify Latency and another
// secondary flag that affects a copy operation
// secondary flag that affects a copy operation
if
((
copy_ctrl_mask
&
DEV_COPY_LATENCY
)
&&
if
((
copy_ctrl_mask
&
DEV_COPY_LATENCY
)
&&
((
copy_ctrl_mask
&
USR_BUFFER_INIT
)
||
(
copy_ctrl_mask
&
VALIDATE_COPY_OP
))
{
(
copy_ctrl_mask
&
CPU_VISIBLE_TIME
)
||
(
copy_ctrl_mask
&
VALIDATE_COPY_OP
)))
{
PrintHelpScreen
();
PrintHelpScreen
();
exit
(
0
);
exit
(
0
);
}
}
...
...
rocm_bandwidth_test_print.cpp
View file @
88d4ad20
...
@@ -70,13 +70,10 @@ void RocmBandwidthTest::PrintHelpScreen() {
...
@@ -70,13 +70,10 @@ void RocmBandwidthTest::PrintHelpScreen() {
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
"
\t
NOTE: Mixing following options is illegal/unsupported"
<<
std
::
endl
;
std
::
cout
<<
"
\t
NOTE: Mixing following options is illegal/unsupported"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 1: rocm_bandwidth_test -a or -A with -c"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 1: rocm_bandwidth_test -a with {lm}{1,}"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 2: rocm_bandwidth_test -b or -A with -m"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 2: rocm_bandwidth_test -b with {clv}{1,}"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 3: rocm_bandwidth_test -b or -A with -l"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 3: rocm_bandwidth_test -A with {clmv}{1,}"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 4: rocm_bandwidth_test -b or -A with -v"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 4: rocm_bandwidth_test -s x -d y with {lmv}{2,}"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 5: rocm_bandwidth_test -a or -s x -d y with -l and -c"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 6: rocm_bandwidth_test -a or -s x -d y with -l and -m"
<<
std
::
endl
;
std
::
cout
<<
"
\t\t
Case 7: rocm_bandwidth_test -a or -s x -d y with -l and -v"
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
std
::
cout
<<
std
::
endl
;
...
@@ -152,11 +149,19 @@ void RocmBandwidthTest::PrintTopology() {
...
@@ -152,11 +149,19 @@ void RocmBandwidthTest::PrintTopology() {
if
(
HSA_DEVICE_TYPE_CPU
==
node
.
agent
.
device_type_
)
{
if
(
HSA_DEVICE_TYPE_CPU
==
node
.
agent
.
device_type_
)
{
std
::
cout
<<
" Device Type: CPU"
<<
std
::
endl
;
std
::
cout
<<
" Device Type: CPU"
<<
std
::
endl
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
""
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
" Device Name: "
<<
node
.
agent
.
name_
<<
std
::
endl
;
}
else
if
(
HSA_DEVICE_TYPE_GPU
==
node
.
agent
.
device_type_
)
{
}
else
if
(
HSA_DEVICE_TYPE_GPU
==
node
.
agent
.
device_type_
)
{
std
::
cout
<<
" Device Type: GPU"
<<
std
::
endl
;
std
::
cout
<<
" Device Type: GPU"
<<
std
::
endl
;
std
::
cout
.
width
(
format
);
std
::
cout
.
width
(
format
);
std
::
cout
<<
""
;
std
::
cout
<<
""
;
std
::
cout
.
width
(
format
);
std
::
cout
.
width
(
format
);
std
::
cout
<<
" Device Name: "
<<
node
.
agent
.
name_
<<
std
::
endl
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
""
;
std
::
cout
.
width
(
format
);
std
::
cout
<<
" Device BDF: "
<<
node
.
agent
.
bdf_id_
<<
std
::
endl
;
std
::
cout
<<
" Device BDF: "
<<
node
.
agent
.
bdf_id_
<<
std
::
endl
;
}
}
...
...
rocm_bandwidth_test_trans.cpp
View file @
88d4ad20
...
@@ -418,44 +418,45 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
...
@@ -418,44 +418,45 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
data_size
+=
data_size
;
data_size
+=
data_size
;
}
}
//
Copy operation does not involve a Gpu device
//
Get time taken by copy operation
// Divide bandwidth with 10^9 to get size in GigaBytes (10^9)
if
((
print_cpu_time_
)
||
if
(
trans
.
copy
.
uses_gpu_
!=
true
)
{
(
trans
.
copy
.
uses_gpu_
!=
true
)
)
{
avg_time
=
trans
.
cpu_avg_time_
[
idx
];
avg_time
=
trans
.
cpu_avg_time_
[
idx
];
min_time
=
trans
.
cpu_min_time_
[
idx
];
min_time
=
trans
.
cpu_min_time_
[
idx
];
avg_bandwidth
=
(
double
)
data_size
/
avg_time
/
1000
/
1000
/
1000
;
peak_bandwidth
=
(
double
)
data_size
/
min_time
/
1000
/
1000
/
1000
;
}
else
{
}
else
{
if
(
print_cpu_time_
)
{
avg_time
=
trans
.
gpu_avg_time_
[
idx
];
avg_time
=
trans
.
cpu_avg_time_
[
idx
];
min_time
=
trans
.
gpu_min_time_
[
idx
];
min_time
=
trans
.
cpu_min_time_
[
idx
];
}
else
{
avg_time
=
trans
.
gpu_avg_time_
[
idx
]
/
sys_freq
;
min_time
=
trans
.
gpu_min_time_
[
idx
]
/
sys_freq
;
}
avg_bandwidth
=
(
double
)
data_size
/
avg_time
/
1000
/
1000
/
1000
;
peak_bandwidth
=
(
double
)
data_size
/
min_time
/
1000
/
1000
/
1000
;
}
}
trans
.
min_time_
.
push_back
(
min_time
);
// Determine if there was a validation failure
trans
.
avg_time_
.
push_back
(
avg_time
);
// @note: Value is set to VALIDATE_COPY_OP_FAILURE
// if copy transaction wa validated and it failed
hsa_status_t
verify_status
=
HSA_STATUS_ERROR
;
if
((
avg_time
!=
VALIDATE_COPY_OP_FAILURE
)
&&
(
min_time
!=
VALIDATE_COPY_OP_FAILURE
))
{
verify_status
=
HSA_STATUS_SUCCESS
;
}
// Check validation failures as that signal is
// Adjust Gpu time if there is no validation error
// captured via Min and Avg time values. If there
if
((
trans
.
copy
.
uses_gpu_
)
&&
// is a failure propagate that value as computed
(
print_cpu_time_
==
false
)
&&
// bandwidth
(
verify_status
==
HSA_STATUS_SUCCESS
))
{
if
(
validate_
)
{
avg_time
=
avg_time
/
sys_freq
;
avg_time
=
trans
.
gpu_avg_time_
[
idx
];
min_time
=
min_time
/
sys_freq
;
min_time
=
trans
.
gpu_min_time_
[
idx
];
}
if
((
avg_time
==
VALIDATE_COPY_OP_FAILURE
)
&&
(
min_time
==
VALIDATE_COPY_OP_FAILURE
))
{
// Compute bandwidth - divide bandwidth with
trans
.
avg_bandwidth_
.
push_back
(
avg_time
);
// 10^9 not 1024^3 to get size in GigaBytes
trans
.
peak_bandwidth_
.
push_back
(
min_time
);
// @note: For validation failures bandwidth
continue
;
// is encoded by VALIDATE_COPY_OP_FAILURE
}
if
(
verify_status
==
HSA_STATUS_SUCCESS
)
{
avg_bandwidth
=
(
double
)
data_size
/
avg_time
/
1000
/
1000
/
1000
;
peak_bandwidth
=
(
double
)
data_size
/
min_time
/
1000
/
1000
/
1000
;
}
}
// Update computed bandwidth for the transaction
// Update computed bandwidth for the transaction
trans
.
min_time_
.
push_back
(
min_time
);
trans
.
avg_time_
.
push_back
(
avg_time
);
trans
.
avg_bandwidth_
.
push_back
(
avg_bandwidth
);
trans
.
avg_bandwidth_
.
push_back
(
avg_bandwidth
);
trans
.
peak_bandwidth_
.
push_back
(
peak_bandwidth
);
trans
.
peak_bandwidth_
.
push_back
(
peak_bandwidth
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment