Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
one
TransferBench
Commits
79a3a003
Unverified
Commit
79a3a003
authored
Oct 17, 2023
by
gilbertlee-amd
Committed by
GitHub
Oct 17, 2023
Browse files
V1.31 candidate (#58)
* Adding xccID output to SHOW_ITERATIONS
parent
e7cfab75
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
48 additions
and
8 deletions
+48
-8
CHANGELOG.md
CHANGELOG.md
+5
-0
src/TransferBench.cpp
src/TransferBench.cpp
+30
-6
src/include/EnvVars.hpp
src/include/EnvVars.hpp
+1
-1
src/include/Kernels.hpp
src/include/Kernels.hpp
+11
-0
src/include/TransferBench.hpp
src/include/TransferBench.hpp
+1
-1
No files found.
CHANGELOG.md
View file @
79a3a003
# Changelog for TransferBench
## v1.31
### Modified
-
SHOW_ITERATIONS now show XCC:CU instead of just CU ID
-
SHOW_ITERATIONS also printed when USE_SINGLE_STREAM=1
## v1.30
### Added
-
BLOCK_SIZE added to control threadblock size (Must be multiple of 64, up to 512)
...
...
src/TransferBench.cpp
View file @
79a3a003
...
...
@@ -578,7 +578,22 @@ void ExecuteTransfers(EnvVars const& ev,
{
double
iterDurationMsec
=
t
.
first
;
double
iterBandwidthGbs
=
(
transfer
->
numBytesActual
/
1.0E9
)
/
iterDurationMsec
*
1000.0
f
;
printf
(
" Iter %03d | %7.3f GB/s | %8.3f ms |
\n
"
,
t
.
second
,
iterBandwidthGbs
,
iterDurationMsec
);
printf
(
" Iter %03d | %7.3f GB/s | %8.3f ms |"
,
t
.
second
,
iterBandwidthGbs
,
iterDurationMsec
);
std
::
set
<
int
>
usedXccs
;
if
(
t
.
second
-
1
<
transfer
->
perIterationCUs
.
size
())
{
printf
(
" CUs:"
);
for
(
auto
x
:
transfer
->
perIterationCUs
[
t
.
second
-
1
])
{
printf
(
" %02d:%02d"
,
x
.
first
,
x
.
second
);
usedXccs
.
insert
(
x
.
first
);
}
}
printf
(
" XCCs:"
);
for
(
auto
x
:
usedXccs
)
printf
(
" %02d"
,
x
);
printf
(
"
\n
"
);
}
printf
(
" StandardDev | %7.3f GB/s | %8.3f ms |
\n
"
,
stdDevBw
,
stdDevTime
);
}
...
...
@@ -649,12 +664,19 @@ void ExecuteTransfers(EnvVars const& ev,
double
iterDurationMsec
=
t
.
first
;
double
iterBandwidthGbs
=
(
transfer
->
numBytesActual
/
1.0E9
)
/
iterDurationMsec
*
1000.0
f
;
printf
(
" Iter %03d | %7.3f GB/s | %8.3f ms |"
,
t
.
second
,
iterBandwidthGbs
,
iterDurationMsec
);
std
::
set
<
int
>
usedXccs
;
if
(
t
.
second
-
1
<
transfer
->
perIterationCUs
.
size
())
{
printf
(
" CUs:"
);
for
(
auto
x
:
transfer
->
perIterationCUs
[
t
.
second
-
1
])
printf
(
" %2d"
,
x
);
{
printf
(
" %02d:%02d"
,
x
.
first
,
x
.
second
);
usedXccs
.
insert
(
x
.
first
);
}
}
printf
(
" XCCs:"
);
for
(
auto
x
:
usedXccs
)
printf
(
" %d"
,
x
);
printf
(
"
\n
"
);
}
printf
(
" StandardDev | %7.3f GB/s | %8.3f ms |
\n
"
,
stdDevBw
,
stdDevTime
);
...
...
@@ -1362,13 +1384,14 @@ void RunTransfer(EnvVars const& ev, int const iteration,
long
long
minStartCycle
=
std
::
numeric_limits
<
long
long
>::
max
();
long
long
maxStopCycle
=
std
::
numeric_limits
<
long
long
>::
min
();
std
::
set
<
int
>
CUs
;
std
::
set
<
std
::
pair
<
int
,
int
>
>
CUs
;
for
(
auto
subExecIdx
:
currTransfer
->
subExecIdx
)
{
minStartCycle
=
std
::
min
(
minStartCycle
,
exeInfo
.
subExecParamGpu
[
subExecIdx
].
startCycle
);
maxStopCycle
=
std
::
max
(
maxStopCycle
,
exeInfo
.
subExecParamGpu
[
subExecIdx
].
stopCycle
);
if
(
ev
.
showIterations
)
CUs
.
insert
(
GetId
(
exeInfo
.
subExecParamGpu
[
subExecIdx
].
hwId
));
CUs
.
insert
(
std
::
make_pair
(
exeInfo
.
subExecParamGpu
[
subExecIdx
].
xccId
,
GetId
(
exeInfo
.
subExecParamGpu
[
subExecIdx
].
hwId
)));
}
int
const
wallClockRate
=
ev
.
wallClockPerDeviceMhz
[
exeIndex
];
double
iterationTimeMs
=
(
maxStopCycle
-
minStartCycle
)
/
(
double
)(
wallClockRate
);
...
...
@@ -1387,9 +1410,10 @@ void RunTransfer(EnvVars const& ev, int const iteration,
if
(
ev
.
showIterations
)
{
transfer
->
perIterationTime
.
push_back
(
gpuDeltaMsec
);
std
::
set
<
int
>
CUs
;
std
::
set
<
std
::
pair
<
int
,
int
>
>
CUs
;
for
(
int
i
=
0
;
i
<
transfer
->
numSubExecs
;
i
++
)
CUs
.
insert
(
GetId
(
transfer
->
subExecParamGpuPtr
[
i
].
hwId
));
CUs
.
insert
(
std
::
make_pair
(
transfer
->
subExecParamGpuPtr
[
i
].
xccId
,
GetId
(
transfer
->
subExecParamGpuPtr
[
i
].
hwId
)));
transfer
->
perIterationCUs
.
push_back
(
CUs
);
}
}
...
...
src/include/EnvVars.hpp
View file @
79a3a003
...
...
@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp"
#include "Kernels.hpp"
#define TB_VERSION "1.3
0
"
#define TB_VERSION "1.3
1
"
extern
char
const
MemTypeStr
[];
extern
char
const
ExeTypeStr
[];
...
...
src/include/Kernels.hpp
View file @
79a3a003
...
...
@@ -45,6 +45,7 @@ struct SubExecParam
long
long
startCycle
;
// Start timestamp for in-kernel timing (GPU-GFX executor)
long
long
stopCycle
;
// Stop timestamp for in-kernel timing (GPU-GFX executor)
uint32_t
hwId
;
// Hardware ID
uint32_t
xccId
;
// XCC ID
};
// Macro for collecting HW_REG_HW_ID
...
...
@@ -56,6 +57,15 @@ struct SubExecParam
asm volatile ("s_getreg_b32 %0, hwreg(HW_REG_HW_ID)" : "=s" (p.hwId));
#endif
// Macro for collecting HW_REG_XCC_ID
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
#define __trace_xccreg() \
asm volatile ("s_getreg_b32 %0, hwreg(HW_REG_XCC_ID)" : "=s" (p.xccId));
#else
#define __trace_xccreg() \
p.xccId = 0
#endif
void
CpuReduceKernel
(
SubExecParam
const
&
p
)
{
int
const
&
numSrcs
=
p
.
numSrcs
;
...
...
@@ -225,6 +235,7 @@ GpuReduceKernel(SubExecParam* params)
p
.
stopCycle
=
wall_clock64
();
p
.
startCycle
=
startCycle
;
__trace_hwreg
();
__trace_xccreg
();
}
}
...
...
src/include/TransferBench.hpp
View file @
79a3a003
...
...
@@ -121,7 +121,7 @@ struct Transfer
std
::
vector
<
int
>
subExecIdx
;
// Indicies into subExecParamGpu
std
::
vector
<
double
>
perIterationTime
;
// Per-iteration timing
std
::
vector
<
std
::
set
<
int
>>
perIterationCUs
;
// Per-iteration CU usage
std
::
vector
<
std
::
set
<
std
::
pair
<
int
,
int
>
>>
perIterationCUs
;
// Per-iteration CU usage
// Prepares src/dst subarray pointers for each SubExecutor
void
PrepareSubExecParams
(
EnvVars
const
&
ev
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment