Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
one
TransferBench
Commits
7a1dbd6a
Unverified
Commit
7a1dbd6a
authored
Nov 29, 2023
by
gilbertlee-amd
Committed by
GitHub
Nov 29, 2023
Browse files
v1.39 Removing deprecated gcnArch. Adding experimental executor subindex support (#71)
parent
c5197729
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
58 additions
and
31 deletions
+58
-31
CHANGELOG.md
CHANGELOG.md
+7
-0
src/TransferBench.cpp
src/TransferBench.cpp
+45
-23
src/include/EnvVars.hpp
src/include/EnvVars.hpp
+2
-2
src/include/TransferBench.hpp
src/include/TransferBench.hpp
+4
-6
No files found.
CHANGELOG.md
View file @
7a1dbd6a
...
@@ -3,6 +3,13 @@
...
@@ -3,6 +3,13 @@
Documentation for TransferBench is available at
Documentation for TransferBench is available at
[
https://rocm.docs.amd.com/projects/TransferBench
](
https://rocm.docs.amd.com/projects/TransferBench
)
.
[
https://rocm.docs.amd.com/projects/TransferBench
](
https://rocm.docs.amd.com/projects/TransferBench
)
.
## v1.39
### Additions
*
(Experimental) Adding support for Executor sub-index
### Fixes
-
Remove deprecated gcnArch code. ROCm version must include support for hipDeviceMallocUncached
## v1.38
## v1.38
### Fixes
### Fixes
...
...
src/TransferBench.cpp
View file @
7a1dbd6a
...
@@ -133,7 +133,7 @@ int main(int argc, char **argv)
...
@@ -133,7 +133,7 @@ int main(int argc, char **argv)
char
line
[
MAX_LINE_LEN
];
char
line
[
MAX_LINE_LEN
];
sprintf
(
line
,
"%s"
,
cmdlineTransfer
.
c_str
());
sprintf
(
line
,
"%s"
,
cmdlineTransfer
.
c_str
());
std
::
vector
<
Transfer
>
transfers
;
std
::
vector
<
Transfer
>
transfers
;
ParseTransfers
(
line
,
ev
.
numCpuDevices
,
ev
.
numGpuDevices
,
transfers
);
ParseTransfers
(
ev
,
line
,
transfers
);
if
(
transfers
.
empty
())
exit
(
0
);
if
(
transfers
.
empty
())
exit
(
0
);
// If the number of bytes is specified, use it
// If the number of bytes is specified, use it
...
@@ -184,7 +184,7 @@ int main(int argc, char **argv)
...
@@ -184,7 +184,7 @@ int main(int argc, char **argv)
// Parse set of parallel Transfers to execute
// Parse set of parallel Transfers to execute
std
::
vector
<
Transfer
>
transfers
;
std
::
vector
<
Transfer
>
transfers
;
ParseTransfers
(
line
,
ev
.
numCpuDevices
,
ev
.
numGpuDevices
,
transfers
);
ParseTransfers
(
ev
,
line
,
transfers
);
if
(
transfers
.
empty
())
continue
;
if
(
transfers
.
empty
())
continue
;
// If the number of bytes is specified, use it
// If the number of bytes is specified, use it
...
@@ -1005,7 +1005,7 @@ void DisplayTopology(bool const outputToCsv)
...
@@ -1005,7 +1005,7 @@ void DisplayTopology(bool const outputToCsv)
#endif
#endif
}
}
void
ParseMemType
(
std
::
string
const
&
token
,
int
const
numCpus
,
in
t
const
numGpus
,
void
ParseMemType
(
EnvVars
const
&
ev
,
std
::
str
in
g
const
&
token
,
std
::
vector
<
MemType
>&
memTypes
,
std
::
vector
<
int
>&
memIndices
)
std
::
vector
<
MemType
>&
memTypes
,
std
::
vector
<
int
>&
memIndices
)
{
{
char
typeChar
;
char
typeChar
;
...
@@ -1019,14 +1019,14 @@ void ParseMemType(std::string const& token, int const numCpus, int const numGpus
...
@@ -1019,14 +1019,14 @@ void ParseMemType(std::string const& token, int const numCpus, int const numGpus
offset
+=
inc
;
offset
+=
inc
;
MemType
memType
=
CharToMemType
(
typeChar
);
MemType
memType
=
CharToMemType
(
typeChar
);
if
(
IsCpuType
(
memType
)
&&
(
devIndex
<
0
||
devIndex
>=
numCpus
))
if
(
IsCpuType
(
memType
)
&&
(
devIndex
<
0
||
devIndex
>=
ev
.
numCpu
Device
s
))
{
{
printf
(
"[ERROR] CPU index must be between 0 and %d (instead of %d)
\n
"
,
numCpus
-
1
,
devIndex
);
printf
(
"[ERROR] CPU index must be between 0 and %d (instead of %d)
\n
"
,
ev
.
numCpu
Device
s
-
1
,
devIndex
);
exit
(
1
);
exit
(
1
);
}
}
if
(
IsGpuType
(
memType
)
&&
(
devIndex
<
0
||
devIndex
>=
numGpus
))
if
(
IsGpuType
(
memType
)
&&
(
devIndex
<
0
||
devIndex
>=
ev
.
numGpu
Device
s
))
{
{
printf
(
"[ERROR] GPU index must be between 0 and %d (instead of %d)
\n
"
,
numGpus
-
1
,
devIndex
);
printf
(
"[ERROR] GPU index must be between 0 and %d (instead of %d)
\n
"
,
ev
.
numGpu
Device
s
-
1
,
devIndex
);
exit
(
1
);
exit
(
1
);
}
}
...
@@ -1045,11 +1045,13 @@ void ParseMemType(std::string const& token, int const numCpus, int const numGpus
...
@@ -1045,11 +1045,13 @@ void ParseMemType(std::string const& token, int const numCpus, int const numGpus
}
}
}
}
void
ParseExeType
(
std
::
string
const
&
token
,
int
const
numCpus
,
in
t
const
numGpus
,
void
ParseExeType
(
EnvVars
const
&
ev
,
std
::
str
in
g
const
&
token
,
ExeType
&
exeType
,
int
&
exeIndex
)
ExeType
&
exeType
,
int
&
exeIndex
,
int
&
exeSubIndex
)
{
{
char
typeChar
;
char
typeChar
;
if
(
sscanf
(
token
.
c_str
(),
" %c%d"
,
&
typeChar
,
&
exeIndex
)
!=
2
)
exeSubIndex
=
-
1
;
int
numTokensParsed
=
sscanf
(
token
.
c_str
(),
" %c%d.%d"
,
&
typeChar
,
&
exeIndex
,
&
exeSubIndex
);
if
(
numTokensParsed
<
2
)
{
{
printf
(
"[ERROR] Unable to parse valid executor token (%s). Exepected one of %s followed by an index
\n
"
,
printf
(
"[ERROR] Unable to parse valid executor token (%s). Exepected one of %s followed by an index
\n
"
,
token
.
c_str
(),
ExeTypeStr
);
token
.
c_str
(),
ExeTypeStr
);
...
@@ -1057,20 +1059,29 @@ void ParseExeType(std::string const& token, int const numCpus, int const numGpus
...
@@ -1057,20 +1059,29 @@ void ParseExeType(std::string const& token, int const numCpus, int const numGpus
}
}
exeType
=
CharToExeType
(
typeChar
);
exeType
=
CharToExeType
(
typeChar
);
if
(
IsCpuType
(
exeType
)
&&
(
exeIndex
<
0
||
exeIndex
>=
numCpus
))
if
(
IsCpuType
(
exeType
)
&&
(
exeIndex
<
0
||
exeIndex
>=
ev
.
numCpu
Device
s
))
{
{
printf
(
"[ERROR] CPU index must be between 0 and %d (instead of %d)
\n
"
,
numCpus
-
1
,
exeIndex
);
printf
(
"[ERROR] CPU index must be between 0 and %d (instead of %d)
\n
"
,
ev
.
numCpu
Device
s
-
1
,
exeIndex
);
exit
(
1
);
exit
(
1
);
}
}
if
(
IsGpuType
(
exeType
)
&&
(
exeIndex
<
0
||
exeIndex
>=
numGpus
))
if
(
IsGpuType
(
exeType
)
&&
(
exeIndex
<
0
||
exeIndex
>=
ev
.
numGpu
Device
s
))
{
{
printf
(
"[ERROR] GPU index must be between 0 and %d (instead of %d)
\n
"
,
numGpus
-
1
,
exeIndex
);
printf
(
"[ERROR] GPU index must be between 0 and %d (instead of %d)
\n
"
,
ev
.
numGpu
Device
s
-
1
,
exeIndex
);
exit
(
1
);
exit
(
1
);
}
}
if
(
exeType
==
EXE_GPU_GFX
&&
exeSubIndex
!=
-
1
)
{
int
const
idx
=
RemappedIndex
(
exeIndex
,
false
);
if
(
ev
.
xccIdsPerDevice
[
idx
].
count
(
exeSubIndex
)
==
0
)
{
printf
(
"[ERROR] GPU %d does not have subIndex %d
\n
"
,
exeIndex
,
exeSubIndex
);
exit
(
1
);
}
}
}
}
// Helper function to parse a list of Transfer definitions
// Helper function to parse a list of Transfer definitions
void
ParseTransfers
(
char
*
line
,
int
numCpus
,
int
numGpus
,
std
::
vector
<
Transfer
>&
transfers
)
void
ParseTransfers
(
EnvVars
const
&
ev
,
char
*
line
,
std
::
vector
<
Transfer
>&
transfers
)
{
{
// Replace any round brackets or '->' with spaces,
// Replace any round brackets or '->' with spaces,
for
(
int
i
=
1
;
line
[
i
];
i
++
)
for
(
int
i
=
1
;
line
[
i
];
i
++
)
...
@@ -1141,9 +1152,9 @@ void ParseTransfers(char* line, int numCpus, int numGpus, std::vector<Transfer>&
...
@@ -1141,9 +1152,9 @@ void ParseTransfers(char* line, int numCpus, int numGpus, std::vector<Transfer>&
}
}
}
}
ParseMemType
(
srcMem
,
numCpus
,
numGpus
,
transfer
.
srcType
,
transfer
.
srcIndex
);
ParseMemType
(
ev
,
srcMem
,
transfer
.
srcType
,
transfer
.
srcIndex
);
ParseMemType
(
dstMem
,
numCpus
,
numGpus
,
transfer
.
dstType
,
transfer
.
dstIndex
);
ParseMemType
(
ev
,
dstMem
,
transfer
.
dstType
,
transfer
.
dstIndex
);
ParseExeType
(
exeMem
,
numCpus
,
numGpus
,
transfer
.
exeType
,
transfer
.
exeIndex
);
ParseExeType
(
ev
,
exeMem
,
transfer
.
exeType
,
transfer
.
exeIndex
,
transfer
.
exeSubIndex
);
transfer
.
numSrcs
=
(
int
)
transfer
.
srcType
.
size
();
transfer
.
numSrcs
=
(
int
)
transfer
.
srcType
.
size
();
transfer
.
numDsts
=
(
int
)
transfer
.
dstType
.
size
();
transfer
.
numDsts
=
(
int
)
transfer
.
dstType
.
size
();
...
@@ -1247,11 +1258,9 @@ void AllocateMemory(MemType memType, int devIndex, size_t numBytes, void** memPt
...
@@ -1247,11 +1258,9 @@ void AllocateMemory(MemType memType, int devIndex, size_t numBytes, void** memPt
#else
#else
HIP_CALL
(
hipSetDevice
(
devIndex
));
HIP_CALL
(
hipSetDevice
(
devIndex
));
// NOTE: hipDeviceMallocFinegrained will be replaced by hipDeviceMallocUncached eventually
// Until then, this workaround is required
hipDeviceProp_t
prop
;
hipDeviceProp_t
prop
;
HIP_CALL
(
hipGetDeviceProperties
(
&
prop
,
0
));
HIP_CALL
(
hipGetDeviceProperties
(
&
prop
,
0
));
int
flag
=
(
prop
.
gcnArch
/
10
==
94
)
?
0x3
:
hipDeviceMalloc
Finegrain
ed
;
int
flag
=
hipDeviceMalloc
Uncach
ed
;
HIP_CALL
(
hipExtMallocWithFlags
((
void
**
)
memPtr
,
numBytes
,
flag
));
HIP_CALL
(
hipExtMallocWithFlags
((
void
**
)
memPtr
,
numBytes
,
flag
));
#endif
#endif
}
}
...
@@ -2002,12 +2011,25 @@ void Transfer::PrepareSubExecParams(EnvVars const& ev)
...
@@ -2002,12 +2011,25 @@ void Transfer::PrepareSubExecParams(EnvVars const& ev)
p
.
dst
[
iDst
]
=
this
->
dstMem
[
iDst
]
+
assigned
+
initOffset
;
p
.
dst
[
iDst
]
=
this
->
dstMem
[
iDst
]
+
assigned
+
initOffset
;
p
.
preferredXccId
=
-
1
;
p
.
preferredXccId
=
-
1
;
if
(
ev
.
useXccFilter
)
if
(
ev
.
useXccFilter
&&
this
->
exeType
==
EXE_GPU_GFX
)
{
{
if
(
this
->
exeType
==
EXE_GPU_GFX
&&
this
->
numDsts
==
1
&&
IsGpuType
(
this
->
dstType
[
0
]))
std
::
uniform_int_distribution
<
int
>
distribution
(
0
,
ev
.
xccIdsPerDevice
[
this
->
exeIndex
].
size
()
-
1
);
// Use this tranfer's executor subIndex if set
if
(
this
->
exeSubIndex
!=
-
1
)
{
p
.
preferredXccId
=
this
->
exeSubIndex
;
}
else
if
(
this
->
numDsts
>=
1
&&
IsGpuType
(
this
->
dstType
[
0
]))
{
{
p
.
preferredXccId
=
ev
.
prefXccTable
[
this
->
exeIndex
][
this
->
dstIndex
[
0
]];
p
.
preferredXccId
=
ev
.
prefXccTable
[
this
->
exeIndex
][
this
->
dstIndex
[
0
]];
}
}
if
(
p
.
preferredXccId
==
-
1
)
{
p
.
preferredXccId
=
distribution
(
*
ev
.
generator
);
}
}
}
if
(
ev
.
enableDebug
)
if
(
ev
.
enableDebug
)
...
...
src/include/EnvVars.hpp
View file @
7a1dbd6a
...
@@ -29,7 +29,7 @@ THE SOFTWARE.
...
@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp"
#include "Compatibility.hpp"
#include "Kernels.hpp"
#include "Kernels.hpp"
#define TB_VERSION "1.3
8
"
#define TB_VERSION "1.3
9
"
extern
char
const
MemTypeStr
[];
extern
char
const
MemTypeStr
[];
extern
char
const
ExeTypeStr
[];
extern
char
const
ExeTypeStr
[];
...
@@ -354,7 +354,7 @@ public:
...
@@ -354,7 +354,7 @@ public:
prefXccTable
.
resize
(
numGpuDevices
);
prefXccTable
.
resize
(
numGpuDevices
);
for
(
int
i
=
0
;
i
<
numGpuDevices
;
i
++
)
for
(
int
i
=
0
;
i
<
numGpuDevices
;
i
++
)
{
{
prefXccTable
[
i
].
resize
(
numGpuDevices
,
0
);
prefXccTable
[
i
].
resize
(
numGpuDevices
,
-
1
);
}
}
char
*
prefXccStr
=
getenv
(
"XCC_PREF_TABLE"
);
char
*
prefXccStr
=
getenv
(
"XCC_PREF_TABLE"
);
...
...
src/include/TransferBench.hpp
View file @
7a1dbd6a
...
@@ -103,6 +103,7 @@ struct Transfer
...
@@ -103,6 +103,7 @@ struct Transfer
int
transferIndex
;
// Transfer identifier (within a Test)
int
transferIndex
;
// Transfer identifier (within a Test)
ExeType
exeType
;
// Transfer executor type
ExeType
exeType
;
// Transfer executor type
int
exeIndex
;
// Executor index (NUMA node for CPU / device ID for GPU)
int
exeIndex
;
// Executor index (NUMA node for CPU / device ID for GPU)
int
exeSubIndex
;
// Executor subindex
int
numSubExecs
;
// Number of subExecutors to use for this Transfer
int
numSubExecs
;
// Number of subExecutors to use for this Transfer
size_t
numBytes
;
// # of bytes requested to Transfer (may be 0 to fallback to default)
size_t
numBytes
;
// # of bytes requested to Transfer (may be 0 to fallback to default)
size_t
numBytesActual
;
// Actual number of bytes to copy
size_t
numBytesActual
;
// Actual number of bytes to copy
...
@@ -171,13 +172,10 @@ void DisplayTopology(bool const outputToCsv);
...
@@ -171,13 +172,10 @@ void DisplayTopology(bool const outputToCsv);
void
PopulateTestSizes
(
size_t
const
numBytesPerTransfer
,
int
const
samplingFactor
,
void
PopulateTestSizes
(
size_t
const
numBytesPerTransfer
,
int
const
samplingFactor
,
std
::
vector
<
size_t
>&
valuesofN
);
std
::
vector
<
size_t
>&
valuesofN
);
void
ParseMemType
(
std
::
string
const
&
token
,
int
const
numCpus
,
int
const
numGpus
,
void
ParseMemType
(
EnvVars
const
&
ev
,
std
::
string
const
&
token
,
std
::
vector
<
MemType
>&
memType
,
std
::
vector
<
int
>&
memIndex
);
std
::
vector
<
MemType
>&
memType
,
std
::
vector
<
int
>&
memIndex
);
void
ParseExeType
(
EnvVars
const
&
ev
,
std
::
string
const
&
token
,
ExeType
&
exeType
,
int
&
exeIndex
,
int
&
exeSubIndex
);
void
ParseExeType
(
std
::
string
const
&
token
,
int
const
numCpus
,
int
const
numGpus
,
ExeType
&
exeType
,
int
&
exeIndex
);
void
ParseTransfers
(
char
*
line
,
int
numCpus
,
int
numGpus
,
void
ParseTransfers
(
EnvVars
const
&
ev
,
char
*
line
,
std
::
vector
<
Transfer
>&
transfers
);
std
::
vector
<
Transfer
>&
transfers
);
void
ExecuteTransfers
(
EnvVars
const
&
ev
,
int
const
testNum
,
size_t
const
N
,
void
ExecuteTransfers
(
EnvVars
const
&
ev
,
int
const
testNum
,
size_t
const
N
,
std
::
vector
<
Transfer
>&
transfers
,
bool
verbose
=
true
,
std
::
vector
<
Transfer
>&
transfers
,
bool
verbose
=
true
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment