Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
one
TransferBench
Commits
9132801d
Unverified
Commit
9132801d
authored
Jul 12, 2023
by
gilbertlee-amd
Committed by
GitHub
Jul 12, 2023
Browse files
Adding preset scaling benchmark (#45)
parent
f903fda3
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
99 additions
and
3 deletions
+99
-3
CHANGELOG.md
CHANGELOG.md
+5
-0
src/TransferBench.cpp
src/TransferBench.cpp
+90
-1
src/include/EnvVars.hpp
src/include/EnvVars.hpp
+3
-2
src/include/TransferBench.hpp
src/include/TransferBench.hpp
+1
-0
No files found.
CHANGELOG.md
View file @
9132801d
# Changelog for TransferBench
# Changelog for TransferBench
## v1.23
### Added
-
New GPU subexec scaling benchmark accessed by preset "scaling"
-
Tests GPU-GFX copy performance based on # of CUs used
## v1.22
## v1.22
### Modified
### Modified
-
Switching kernel timing function to wall_clock64
-
Switching kernel timing function to wall_clock64
...
...
src/TransferBench.cpp
View file @
9132801d
...
@@ -89,6 +89,21 @@ int main(int argc, char **argv)
...
@@ -89,6 +89,21 @@ int main(int argc, char **argv)
RunPeerToPeerBenchmarks
(
ev
,
numBytesPerTransfer
/
sizeof
(
float
));
RunPeerToPeerBenchmarks
(
ev
,
numBytesPerTransfer
/
sizeof
(
float
));
exit
(
0
);
exit
(
0
);
}
}
// - Test SubExecutor scaling
else
if
(
!
strcmp
(
argv
[
1
],
"scaling"
))
{
int
maxSubExecs
=
(
argc
>
3
?
atoi
(
argv
[
3
])
:
32
);
int
exeIndex
=
(
argc
>
4
?
atoi
(
argv
[
4
])
:
0
);
if
(
exeIndex
>=
ev
.
numGpuDevices
)
{
printf
(
"[ERROR] Cannot execute scaling test with GPU device %d
\n
"
,
exeIndex
);
exit
(
1
);
}
ev
.
configMode
=
CFG_SCALE
;
RunScalingBenchmark
(
ev
,
numBytesPerTransfer
/
sizeof
(
float
),
exeIndex
,
maxSubExecs
);
exit
(
0
);
}
// Check that Transfer configuration file can be opened
// Check that Transfer configuration file can be opened
ev
.
configMode
=
CFG_FILE
;
ev
.
configMode
=
CFG_FILE
;
...
@@ -548,7 +563,11 @@ void DisplayUsage(char const* cmdName)
...
@@ -548,7 +563,11 @@ void DisplayUsage(char const* cmdName)
printf
(
" - Name of preset config:
\n
"
);
printf
(
" - Name of preset config:
\n
"
);
printf
(
" p2p - Peer-to-peer benchmark tests
\n
"
);
printf
(
" p2p - Peer-to-peer benchmark tests
\n
"
);
printf
(
" sweep/rsweep - Sweep/random sweep across possible sets of Transfers
\n
"
);
printf
(
" sweep/rsweep - Sweep/random sweep across possible sets of Transfers
\n
"
);
printf
(
" - 3rd/4th optional args for # GPU SubExecs / # CPU SubExecs per Transfer
\n
"
);
printf
(
" - 3rd optional arg: # GPU SubExecs per Transfer
\n
"
);
printf
(
" - 4th optional arg: # CPU SubExecs per Transfer
\n
"
);
printf
(
" scaling - GPU SubExec scaling copy test
\n
"
);
printf
(
" - 3th optional arg: Max # of SubExecs to use
\n
"
);
printf
(
" - 4rd optional arg: GPU index to use as executor
\n
"
);
printf
(
" N : (Optional) Number of bytes to copy per Transfer.
\n
"
);
printf
(
" N : (Optional) Number of bytes to copy per Transfer.
\n
"
);
printf
(
" If not specified, defaults to %lu bytes. Must be a multiple of 4 bytes
\n
"
,
printf
(
" If not specified, defaults to %lu bytes. Must be a multiple of 4 bytes
\n
"
,
DEFAULT_BYTES_PER_TRANSFER
);
DEFAULT_BYTES_PER_TRANSFER
);
...
@@ -1296,6 +1315,76 @@ void RunPeerToPeerBenchmarks(EnvVars const& ev, size_t N)
...
@@ -1296,6 +1315,76 @@ void RunPeerToPeerBenchmarks(EnvVars const& ev, size_t N)
}
}
}
}
void
RunScalingBenchmark
(
EnvVars
const
&
ev
,
size_t
N
,
int
const
exeIndex
,
int
const
maxSubExecs
)
{
ev
.
DisplayEnvVars
();
// Collect the number of available CPUs/GPUs on this machine
int
const
numCpus
=
ev
.
numCpuDevices
;
int
const
numGpus
=
ev
.
numGpuDevices
;
int
const
numDevices
=
numCpus
+
numGpus
;
// Enable peer to peer for each GPU
for
(
int
i
=
0
;
i
<
numGpus
;
i
++
)
for
(
int
j
=
0
;
j
<
numGpus
;
j
++
)
if
(
i
!=
j
)
EnablePeerAccess
(
i
,
j
);
char
separator
=
(
ev
.
outputToCsv
?
','
:
' '
);
std
::
vector
<
Transfer
>
transfers
(
1
);
transfers
[
0
].
numBytes
=
N
*
sizeof
(
float
);
transfers
[
0
].
numSrcs
=
1
;
transfers
[
0
].
numDsts
=
1
;
transfers
[
0
].
exeType
=
EXE_GPU_GFX
;
transfers
[
0
].
exeIndex
=
exeIndex
;
transfers
[
0
].
srcType
.
resize
(
1
,
MEM_GPU
);
transfers
[
0
].
dstType
.
resize
(
1
,
MEM_GPU
);
transfers
[
0
].
srcIndex
.
resize
(
1
);
transfers
[
0
].
dstIndex
.
resize
(
1
);
printf
(
"GPU-GFX Scaling benchmark:
\n
"
);
printf
(
"==========================
\n
"
);
printf
(
"- Copying %lu bytes from GPU %d to other devices
\n
"
,
transfers
[
0
].
numBytes
,
exeIndex
);
printf
(
"- All numbers reported as GB/sec
\n\n
"
);
printf
(
"NumCUs"
);
for
(
int
i
=
0
;
i
<
numDevices
;
i
++
)
printf
(
"%c %s%02d "
,
separator
,
i
<
numCpus
?
"CPU"
:
"GPU"
,
i
<
numCpus
?
i
:
i
-
numCpus
);
printf
(
"
\n
"
);
std
::
vector
<
std
::
pair
<
double
,
int
>>
bestResult
(
numDevices
);
for
(
int
numSubExec
=
1
;
numSubExec
<=
maxSubExecs
;
numSubExec
++
)
{
transfers
[
0
].
numSubExecs
=
numSubExec
;
printf
(
"%4d "
,
numSubExec
);
for
(
int
i
=
0
;
i
<
numDevices
;
i
++
)
{
transfers
[
0
].
dstType
[
0
]
=
i
<
numCpus
?
MEM_CPU
:
MEM_GPU
;
transfers
[
0
].
dstIndex
[
0
]
=
i
<
numCpus
?
i
:
i
-
numCpus
;
ExecuteTransfers
(
ev
,
0
,
N
,
transfers
,
false
);
double
transferDurationMsec
=
transfers
[
0
].
transferTime
/
(
1.0
*
ev
.
numIterations
);
double
transferBandwidthGbs
=
(
transfers
[
0
].
numBytesActual
/
1.0E9
)
/
transferDurationMsec
*
1000.0
f
;
printf
(
"%c%7.2f "
,
separator
,
transferBandwidthGbs
);
if
(
transferBandwidthGbs
>
bestResult
[
i
].
first
)
{
bestResult
[
i
].
first
=
transferBandwidthGbs
;
bestResult
[
i
].
second
=
numSubExec
;
}
}
printf
(
"
\n
"
);
}
printf
(
" Best "
);
for
(
int
i
=
0
;
i
<
numDevices
;
i
++
)
{
printf
(
"%c%7.2f(%3d)"
,
separator
,
bestResult
[
i
].
first
,
bestResult
[
i
].
second
);
}
printf
(
"
\n
"
);
}
double
GetPeakBandwidth
(
EnvVars
const
&
ev
,
size_t
const
N
,
double
GetPeakBandwidth
(
EnvVars
const
&
ev
,
size_t
const
N
,
int
const
isBidirectional
,
int
const
isBidirectional
,
MemType
const
srcType
,
int
const
srcIndex
,
MemType
const
srcType
,
int
const
srcIndex
,
...
...
src/include/EnvVars.hpp
View file @
9132801d
...
@@ -29,7 +29,7 @@ THE SOFTWARE.
...
@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp"
#include "Compatibility.hpp"
#include "Kernels.hpp"
#include "Kernels.hpp"
#define TB_VERSION "1.2
2
"
#define TB_VERSION "1.2
3
"
extern
char
const
MemTypeStr
[];
extern
char
const
MemTypeStr
[];
extern
char
const
ExeTypeStr
[];
extern
char
const
ExeTypeStr
[];
...
@@ -38,7 +38,8 @@ enum ConfigModeEnum
...
@@ -38,7 +38,8 @@ enum ConfigModeEnum
{
{
CFG_FILE
=
0
,
CFG_FILE
=
0
,
CFG_P2P
=
1
,
CFG_P2P
=
1
,
CFG_SWEEP
=
2
CFG_SWEEP
=
2
,
CFG_SCALE
=
3
};
};
// This class manages environment variable that affect TransferBench
// This class manages environment variable that affect TransferBench
...
...
src/include/TransferBench.hpp
View file @
9132801d
...
@@ -182,6 +182,7 @@ void DeallocateMemory(MemType memType, void* memPtr, size_t const size = 0);
...
@@ -182,6 +182,7 @@ void DeallocateMemory(MemType memType, void* memPtr, size_t const size = 0);
void
CheckPages
(
char
*
byteArray
,
size_t
numBytes
,
int
targetId
);
void
CheckPages
(
char
*
byteArray
,
size_t
numBytes
,
int
targetId
);
void
RunTransfer
(
EnvVars
const
&
ev
,
int
const
iteration
,
ExecutorInfo
&
exeInfo
,
int
const
transferIdx
);
void
RunTransfer
(
EnvVars
const
&
ev
,
int
const
iteration
,
ExecutorInfo
&
exeInfo
,
int
const
transferIdx
);
void
RunPeerToPeerBenchmarks
(
EnvVars
const
&
ev
,
size_t
N
);
void
RunPeerToPeerBenchmarks
(
EnvVars
const
&
ev
,
size_t
N
);
void
RunScalingBenchmark
(
EnvVars
const
&
ev
,
size_t
N
,
int
const
exeIndex
,
int
const
maxSubExecs
);
void
RunSweepPreset
(
EnvVars
const
&
ev
,
size_t
const
numBytesPerTransfer
,
int
const
numGpuSubExec
,
int
const
numCpuSubExec
,
bool
const
isRandom
);
void
RunSweepPreset
(
EnvVars
const
&
ev
,
size_t
const
numBytesPerTransfer
,
int
const
numGpuSubExec
,
int
const
numCpuSubExec
,
bool
const
isRandom
);
// Return the maximum bandwidth measured for given (src/dst) pair
// Return the maximum bandwidth measured for given (src/dst) pair
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment