Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
one
TransferBench
Commits
33a5435c
Unverified
Commit
33a5435c
authored
Dec 01, 2023
by
gilbertlee-amd
Committed by
GitHub
Dec 01, 2023
Browse files
v1.44 Adding rwrite preset benchmark (#77)
parent
30f1c584
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
119 additions
and
8 deletions
+119
-8
CHANGELOG.md
CHANGELOG.md
+6
-0
src/TransferBench.cpp
src/TransferBench.cpp
+98
-6
src/include/EnvVars.hpp
src/include/EnvVars.hpp
+14
-2
src/include/TransferBench.hpp
src/include/TransferBench.hpp
+1
-0
No files found.
CHANGELOG.md
View file @
33a5435c
...
@@ -3,6 +3,12 @@
...
@@ -3,6 +3,12 @@
Documentation for TransferBench is available at
Documentation for TransferBench is available at
[
https://rocm.docs.amd.com/projects/TransferBench
](
https://rocm.docs.amd.com/projects/TransferBench
)
.
[
https://rocm.docs.amd.com/projects/TransferBench
](
https://rocm.docs.amd.com/projects/TransferBench
)
.
## v1.44
### Additions
*
Adding rwrite preset to benchmark remote parallel writes
*
Usage: ./TransferBench rwrite
<numBytes
=64
M
>
<
#CUs
=8
>
<srcGpu
=0
>
<minGpus
=1
>
<maxGpus
=3
>
## v1.43
## v1.43
### Changes
### Changes
...
...
src/TransferBench.cpp
View file @
33a5435c
...
@@ -149,6 +149,32 @@ int main(int argc, char **argv)
...
@@ -149,6 +149,32 @@ int main(int argc, char **argv)
}
while
(
curr
<
N
*
2
);
}
while
(
curr
<
N
*
2
);
}
}
}
}
else
if
(
!
strcmp
(
argv
[
1
],
"rwrite"
))
{
if
(
ev
.
numGpuDevices
<
2
)
{
printf
(
"[ERROR] Remote write benchmark requires at least 2 GPUs
\n
"
);
exit
(
1
);
}
ev
.
DisplayRemoteWriteEnvVars
();
int
numSubExecs
=
(
argc
>
3
?
atoi
(
argv
[
3
])
:
8
);
int
srcIdx
=
(
argc
>
4
?
atoi
(
argv
[
4
])
:
0
);
int
minGpus
=
(
argc
>
5
?
atoi
(
argv
[
5
])
:
1
);
int
maxGpus
=
(
argc
>
6
?
atoi
(
argv
[
6
])
:
std
::
min
(
ev
.
numGpuDevices
-
1
,
3
));
for
(
int
N
=
256
;
N
<=
(
1
<<
27
);
N
*=
2
)
{
int
delta
=
std
::
max
(
1
,
N
/
ev
.
samplingFactor
);
int
curr
=
(
numBytesPerTransfer
==
0
)
?
N
:
numBytesPerTransfer
/
sizeof
(
float
);
do
{
RunRemoteWriteBenchmark
(
ev
,
curr
*
sizeof
(
float
),
numSubExecs
,
srcIdx
,
minGpus
,
maxGpus
);
if
(
numBytesPerTransfer
!=
0
)
exit
(
0
);
curr
+=
delta
;
}
while
(
curr
<
N
*
2
);
}
}
else
if
(
!
strcmp
(
argv
[
1
],
"cmdline"
))
else
if
(
!
strcmp
(
argv
[
1
],
"cmdline"
))
{
{
// Print environment variables and CSV header
// Print environment variables and CSV header
...
@@ -2293,12 +2319,12 @@ void RunSchmooBenchmark(EnvVars const& ev, size_t const numBytesPerTransfer, int
...
@@ -2293,12 +2319,12 @@ void RunSchmooBenchmark(EnvVars const& ev, size_t const numBytesPerTransfer, int
printf
(
"Bytes to transfer: %lu Local GPU: %d Remote GPU: %d
\n
"
,
numBytesPerTransfer
,
localIdx
,
remoteIdx
);
printf
(
"Bytes to transfer: %lu Local GPU: %d Remote GPU: %d
\n
"
,
numBytesPerTransfer
,
localIdx
,
remoteIdx
);
printf
(
" | Local Read | Local Write | Local Copy | Remote Read | Remote Write| Remote Copy |
\n
"
);
printf
(
" | Local Read | Local Write | Local Copy | Remote Read | Remote Write| Remote Copy |
\n
"
);
printf
(
" #CUs |%c%02d->G%02d->N00|N00->G%02d->%c%02d|%c%02d->G%02d->%c%02d|%c%02d->G%02d->N00|N00->G%02d->%c%02d|%c%02d->G%02d->%c%02d|
\n
"
,
printf
(
" #CUs |%c%02d->G%02d->N00|N00->G%02d->%c%02d|%c%02d->G%02d->%c%02d|%c%02d->G%02d->N00|N00->G%02d->%c%02d|%c%02d->G%02d->%c%02d|
\n
"
,
memType
,
localIdx
,
localIdx
,
memType
,
localIdx
,
localIdx
,
localIdx
,
memType
,
localIdx
,
localIdx
,
memType
,
localIdx
,
memType
,
localIdx
,
localIdx
,
memType
,
localIdx
,
memType
,
localIdx
,
localIdx
,
memType
,
localIdx
,
memType
,
remoteIdx
,
localIdx
,
memType
,
remoteIdx
,
localIdx
,
localIdx
,
memType
,
remoteIdx
,
localIdx
,
memType
,
remoteIdx
,
memType
,
localIdx
,
localIdx
,
memType
,
remoteIdx
);
memType
,
localIdx
,
localIdx
,
memType
,
remoteIdx
);
printf
(
"|------|-------------|-------------|-------------|-------------|-------------|-------------|
\n
"
);
printf
(
"|------|-------------|-------------|-------------|-------------|-------------|-------------|
\n
"
);
std
::
vector
<
Transfer
>
transfers
(
1
);
std
::
vector
<
Transfer
>
transfers
(
1
);
...
@@ -2393,6 +2419,72 @@ void RunSchmooBenchmark(EnvVars const& ev, size_t const numBytesPerTransfer, int
...
@@ -2393,6 +2419,72 @@ void RunSchmooBenchmark(EnvVars const& ev, size_t const numBytesPerTransfer, int
}
}
}
}
void
RunRemoteWriteBenchmark
(
EnvVars
const
&
ev
,
size_t
const
numBytesPerTransfer
,
int
numSubExecs
,
int
const
srcIdx
,
int
minGpus
,
int
maxGpus
)
{
char
memType
=
ev
.
useFineGrain
?
'F'
:
'G'
;
printf
(
"Bytes to write: %lu from GPU %d using %d CUs [Sweeping %d to %d parallel writes]
\n
"
,
numBytesPerTransfer
,
srcIdx
,
numSubExecs
,
minGpus
,
maxGpus
);
for
(
int
i
=
0
;
i
<
ev
.
numGpuDevices
;
i
++
)
{
if
(
i
==
srcIdx
)
continue
;
printf
(
" GPU %3d "
,
i
);
}
printf
(
"
\n
"
);
for
(
int
i
=
0
;
i
<
ev
.
numGpuDevices
-
1
;
i
++
)
{
printf
(
"-------------"
);
}
printf
(
"
\n
"
);
for
(
int
p
=
minGpus
;
p
<=
maxGpus
;
p
++
)
{
for
(
int
bitmask
=
0
;
bitmask
<
(
1
<<
ev
.
numGpuDevices
);
bitmask
++
)
{
if
(
bitmask
&
(
1
<<
srcIdx
))
continue
;
if
(
__builtin_popcount
(
bitmask
)
==
p
)
{
std
::
vector
<
Transfer
>
transfers
;
for
(
int
i
=
0
;
i
<
ev
.
numGpuDevices
;
i
++
)
{
if
(
bitmask
&
(
1
<<
i
))
{
Transfer
t
;
t
.
dstType
.
resize
(
1
);
t
.
dstIndex
.
resize
(
1
);
t
.
exeType
=
EXE_GPU_GFX
;
t
.
exeIndex
=
srcIdx
;
t
.
exeSubIndex
=
-
1
;
t
.
numSubExecs
=
numSubExecs
;
t
.
numBytes
=
numBytesPerTransfer
;
t
.
numSrcs
=
0
;
t
.
numDsts
=
1
;
t
.
dstType
[
0
]
=
(
ev
.
useFineGrain
?
MEM_GPU_FINE
:
MEM_GPU
);
t
.
dstIndex
[
0
]
=
i
;
transfers
.
push_back
(
t
);
}
}
ExecuteTransfers
(
ev
,
0
,
0
,
transfers
,
false
);
int
counter
=
0
;
for
(
int
i
=
0
;
i
<
ev
.
numGpuDevices
;
i
++
)
{
if
(
bitmask
&
(
1
<<
i
))
printf
(
" %8.3f "
,
transfers
[
counter
++
].
transferBandwidth
);
else
if
(
i
!=
srcIdx
)
printf
(
" "
);
}
for
(
auto
i
=
0
;
i
<
transfers
.
size
();
i
++
)
{
printf
(
" (N0 G%d %c%d)"
,
srcIdx
,
MemTypeStr
[
transfers
[
i
].
dstType
[
0
]],
transfers
[
i
].
dstIndex
[
0
]);
}
printf
(
"
\n
"
);
}
}
printf
(
"
\n
"
);
}
}
void
RunSweepPreset
(
EnvVars
const
&
ev
,
size_t
const
numBytesPerTransfer
,
int
const
numGpuSubExecs
,
int
const
numCpuSubExecs
,
bool
const
isRandom
)
void
RunSweepPreset
(
EnvVars
const
&
ev
,
size_t
const
numBytesPerTransfer
,
int
const
numGpuSubExecs
,
int
const
numCpuSubExecs
,
bool
const
isRandom
)
{
{
...
...
src/include/EnvVars.hpp
View file @
33a5435c
...
@@ -29,7 +29,7 @@ THE SOFTWARE.
...
@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp"
#include "Compatibility.hpp"
#include "Kernels.hpp"
#include "Kernels.hpp"
#define TB_VERSION "1.4
3
"
#define TB_VERSION "1.4
4
"
extern
char
const
MemTypeStr
[];
extern
char
const
MemTypeStr
[];
extern
char
const
ExeTypeStr
[];
extern
char
const
ExeTypeStr
[];
...
@@ -41,7 +41,8 @@ enum ConfigModeEnum
...
@@ -41,7 +41,8 @@ enum ConfigModeEnum
CFG_SWEEP
=
2
,
CFG_SWEEP
=
2
,
CFG_SCALE
=
3
,
CFG_SCALE
=
3
,
CFG_A2A
=
4
,
CFG_A2A
=
4
,
CFG_SCHMOO
=
5
CFG_SCHMOO
=
5
,
CFG_RWRITE
=
6
};
};
enum
BlockOrderEnum
enum
BlockOrderEnum
...
@@ -739,6 +740,17 @@ public:
...
@@ -739,6 +740,17 @@ public:
std
::
string
(
"Using "
)
+
(
useFineGrain
?
"fine"
:
"coarse"
)
+
"-grained memory"
);
std
::
string
(
"Using "
)
+
(
useFineGrain
?
"fine"
:
"coarse"
)
+
"-grained memory"
);
}
}
void
DisplayRemoteWriteEnvVars
()
const
{
DisplayEnvVars
();
if
(
hideEnv
)
return
;
if
(
!
outputToCsv
)
printf
(
"[Remote-Write Related]
\n
"
);
PRINT_EV
(
"USE_FINE_GRAIN"
,
useFineGrain
,
std
::
string
(
"Using "
)
+
(
useFineGrain
?
"fine"
:
"coarse"
)
+
"-grained memory"
);
}
// Helper function that gets parses environment variable or sets to default value
// Helper function that gets parses environment variable or sets to default value
static
int
GetEnvVar
(
std
::
string
const
&
varname
,
int
defaultValue
)
static
int
GetEnvVar
(
std
::
string
const
&
varname
,
int
defaultValue
)
{
{
...
...
src/include/TransferBench.hpp
View file @
33a5435c
...
@@ -194,6 +194,7 @@ void RunScalingBenchmark(EnvVars const& ev, size_t N, int const exeIndex, int co
...
@@ -194,6 +194,7 @@ void RunScalingBenchmark(EnvVars const& ev, size_t N, int const exeIndex, int co
void
RunSweepPreset
(
EnvVars
const
&
ev
,
size_t
const
numBytesPerTransfer
,
int
const
numGpuSubExec
,
int
const
numCpuSubExec
,
bool
const
isRandom
);
void
RunSweepPreset
(
EnvVars
const
&
ev
,
size_t
const
numBytesPerTransfer
,
int
const
numGpuSubExec
,
int
const
numCpuSubExec
,
bool
const
isRandom
);
void
RunAllToAllBenchmark
(
EnvVars
const
&
ev
,
size_t
const
numBytesPerTransfer
,
int
const
numSubExecs
);
void
RunAllToAllBenchmark
(
EnvVars
const
&
ev
,
size_t
const
numBytesPerTransfer
,
int
const
numSubExecs
);
void
RunSchmooBenchmark
(
EnvVars
const
&
ev
,
size_t
const
numBytesPerTransfer
,
int
const
localIdx
,
int
const
remoteIdx
,
int
const
maxSubExecs
);
void
RunSchmooBenchmark
(
EnvVars
const
&
ev
,
size_t
const
numBytesPerTransfer
,
int
const
localIdx
,
int
const
remoteIdx
,
int
const
maxSubExecs
);
void
RunRemoteWriteBenchmark
(
EnvVars
const
&
ev
,
size_t
const
numBytesPerTransfer
,
int
numSubExecs
,
int
const
srcIdx
,
int
minGpus
,
int
maxGpus
);
std
::
string
GetLinkTypeDesc
(
uint32_t
linkType
,
uint32_t
hopCount
);
std
::
string
GetLinkTypeDesc
(
uint32_t
linkType
,
uint32_t
hopCount
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment