Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
one
TransferBench
Commits
989cdd9b
Unverified
Commit
989cdd9b
authored
Apr 05, 2023
by
gilbertlee-amd
Committed by
GitHub
Apr 05, 2023
Browse files
Fixing USE_PREP_KERNEL vs VALIDATE_DIRECT (#21)
parent
633e3b91
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
17 additions
and
9 deletions
+17
-9
CHANGELOG.md
CHANGELOG.md
+5
-0
src/TransferBench.cpp
src/TransferBench.cpp
+11
-8
src/include/EnvVars.hpp
src/include/EnvVars.hpp
+1
-1
No files found.
CHANGELOG.md
View file @
989cdd9b
# Changelog for TransferBench
# Changelog for TransferBench
## v1.20
### Fixed
-
VALIDATE_DIRECT can now be used with USE_PREP_KERNEL
-
Switch to local GPU for validating GPU memory
## v1.19
## v1.19
### Added
### Added
-
VALIDATE_DIRECT now also applies to source memory array checking
-
VALIDATE_DIRECT now also applies to source memory array checking
...
...
src/TransferBench.cpp
View file @
989cdd9b
...
@@ -214,6 +214,8 @@ void ExecuteTransfers(EnvVars const& ev,
...
@@ -214,6 +214,8 @@ void ExecuteTransfers(EnvVars const& ev,
// Prepare additional requirement for GPU-based executors
// Prepare additional requirement for GPU-based executors
if
(
IsGpuType
(
exeType
))
if
(
IsGpuType
(
exeType
))
{
{
HIP_CALL
(
hipSetDevice
(
exeIndex
));
// Single-stream is only supported for GFX-based executors
// Single-stream is only supported for GFX-based executors
int
const
numStreamsToUse
=
(
exeType
==
EXE_GPU_DMA
||
!
ev
.
useSingleStream
)
?
exeInfo
.
transfers
.
size
()
:
1
;
int
const
numStreamsToUse
=
(
exeType
==
EXE_GPU_DMA
||
!
ev
.
useSingleStream
)
?
exeInfo
.
transfers
.
size
()
:
1
;
exeInfo
.
streams
.
resize
(
numStreamsToUse
);
exeInfo
.
streams
.
resize
(
numStreamsToUse
);
...
@@ -221,7 +223,6 @@ void ExecuteTransfers(EnvVars const& ev,
...
@@ -221,7 +223,6 @@ void ExecuteTransfers(EnvVars const& ev,
exeInfo
.
stopEvents
.
resize
(
numStreamsToUse
);
exeInfo
.
stopEvents
.
resize
(
numStreamsToUse
);
for
(
int
i
=
0
;
i
<
numStreamsToUse
;
++
i
)
for
(
int
i
=
0
;
i
<
numStreamsToUse
;
++
i
)
{
{
HIP_CALL
(
hipSetDevice
(
exeIndex
));
HIP_CALL
(
hipStreamCreate
(
&
exeInfo
.
streams
[
i
]));
HIP_CALL
(
hipStreamCreate
(
&
exeInfo
.
streams
[
i
]));
HIP_CALL
(
hipEventCreate
(
&
exeInfo
.
startEvents
[
i
]));
HIP_CALL
(
hipEventCreate
(
&
exeInfo
.
startEvents
[
i
]));
HIP_CALL
(
hipEventCreate
(
&
exeInfo
.
stopEvents
[
i
]));
HIP_CALL
(
hipEventCreate
(
&
exeInfo
.
stopEvents
[
i
]));
...
@@ -243,7 +244,11 @@ void ExecuteTransfers(EnvVars const& ev,
...
@@ -243,7 +244,11 @@ void ExecuteTransfers(EnvVars const& ev,
bool
isSrcCorrect
=
true
;
bool
isSrcCorrect
=
true
;
for
(
auto
&
exeInfoPair
:
transferMap
)
for
(
auto
&
exeInfoPair
:
transferMap
)
{
{
ExecutorInfo
&
exeInfo
=
exeInfoPair
.
second
;
Executor
const
&
executor
=
exeInfoPair
.
first
;
ExecutorInfo
&
exeInfo
=
exeInfoPair
.
second
;
ExeType
const
exeType
=
executor
.
first
;
int
const
exeIndex
=
RemappedIndex
(
executor
.
second
,
IsCpuType
(
exeType
));
exeInfo
.
totalBytes
=
0
;
exeInfo
.
totalBytes
=
0
;
int
transferOffset
=
0
;
int
transferOffset
=
0
;
...
@@ -259,10 +264,13 @@ void ExecuteTransfers(EnvVars const& ev,
...
@@ -259,10 +264,13 @@ void ExecuteTransfers(EnvVars const& ev,
if
(
transfer
->
exeType
==
EXE_GPU_GFX
)
if
(
transfer
->
exeType
==
EXE_GPU_GFX
)
{
{
exeInfo
.
transfers
[
i
]
->
subExecParamGpuPtr
=
exeInfo
.
subExecParamGpu
+
transferOffset
;
exeInfo
.
transfers
[
i
]
->
subExecParamGpuPtr
=
exeInfo
.
subExecParamGpu
+
transferOffset
;
HIP_CALL
(
hipSetDevice
(
exeIndex
));
HIP_CALL
(
hipMemcpy
(
&
exeInfo
.
subExecParamGpu
[
transferOffset
],
HIP_CALL
(
hipMemcpy
(
&
exeInfo
.
subExecParamGpu
[
transferOffset
],
transfer
->
subExecParam
.
data
(),
transfer
->
subExecParam
.
data
(),
transfer
->
subExecParam
.
size
()
*
sizeof
(
SubExecParam
),
transfer
->
subExecParam
.
size
()
*
sizeof
(
SubExecParam
),
hipMemcpyHostToDevice
));
hipMemcpyHostToDevice
));
HIP_CALL
(
hipDeviceSynchronize
());
transferOffset
+=
transfer
->
subExecParam
.
size
();
transferOffset
+=
transfer
->
subExecParam
.
size
();
}
}
}
}
...
@@ -1451,6 +1459,7 @@ bool Transfer::PrepareSrc(EnvVars const& ev)
...
@@ -1451,6 +1459,7 @@ bool Transfer::PrepareSrc(EnvVars const& ev)
for
(
int
srcIdx
=
0
;
srcIdx
<
this
->
numSrcs
;
++
srcIdx
)
for
(
int
srcIdx
=
0
;
srcIdx
<
this
->
numSrcs
;
++
srcIdx
)
{
{
float
*
srcPtr
=
this
->
srcMem
[
srcIdx
]
+
initOffset
;
float
*
srcPtr
=
this
->
srcMem
[
srcIdx
]
+
initOffset
;
PrepareReference
(
ev
,
reference
,
srcIdx
);
// Initialize source memory array with reference pattern
// Initialize source memory array with reference pattern
if
(
IsGpuType
(
this
->
srcType
[
srcIdx
]))
if
(
IsGpuType
(
this
->
srcType
[
srcIdx
]))
...
@@ -1458,19 +1467,13 @@ bool Transfer::PrepareSrc(EnvVars const& ev)
...
@@ -1458,19 +1467,13 @@ bool Transfer::PrepareSrc(EnvVars const& ev)
int
const
deviceIdx
=
RemappedIndex
(
this
->
srcIndex
[
srcIdx
],
false
);
int
const
deviceIdx
=
RemappedIndex
(
this
->
srcIndex
[
srcIdx
],
false
);
HIP_CALL
(
hipSetDevice
(
deviceIdx
));
HIP_CALL
(
hipSetDevice
(
deviceIdx
));
if
(
ev
.
usePrepSrcKernel
)
if
(
ev
.
usePrepSrcKernel
)
{
PrepSrcDataKernel
<<<
32
,
BLOCKSIZE
>>>
(
srcPtr
,
N
,
srcIdx
);
PrepSrcDataKernel
<<<
32
,
BLOCKSIZE
>>>
(
srcPtr
,
N
,
srcIdx
);
}
else
else
{
PrepareReference
(
ev
,
reference
,
srcIdx
);
HIP_CALL
(
hipMemcpy
(
srcPtr
,
reference
.
data
(),
this
->
numBytesActual
,
hipMemcpyDefault
));
HIP_CALL
(
hipMemcpy
(
srcPtr
,
reference
.
data
(),
this
->
numBytesActual
,
hipMemcpyDefault
));
}
HIP_CALL
(
hipDeviceSynchronize
());
HIP_CALL
(
hipDeviceSynchronize
());
}
}
else
if
(
IsCpuType
(
this
->
srcType
[
srcIdx
]))
else
if
(
IsCpuType
(
this
->
srcType
[
srcIdx
]))
{
{
PrepareReference
(
ev
,
reference
,
srcIdx
);
memcpy
(
srcPtr
,
reference
.
data
(),
this
->
numBytesActual
);
memcpy
(
srcPtr
,
reference
.
data
(),
this
->
numBytesActual
);
}
}
...
...
src/include/EnvVars.hpp
View file @
989cdd9b
...
@@ -29,7 +29,7 @@ THE SOFTWARE.
...
@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp"
#include "Compatibility.hpp"
#include "Kernels.hpp"
#include "Kernels.hpp"
#define TB_VERSION "1.
19
"
#define TB_VERSION "1.
20
"
extern
char
const
MemTypeStr
[];
extern
char
const
MemTypeStr
[];
extern
char
const
ExeTypeStr
[];
extern
char
const
ExeTypeStr
[];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment