Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
one
TransferBench
Commits
f903fda3
Unverified
Commit
f903fda3
authored
Jun 05, 2023
by
gilbertlee-amd
Committed by
GitHub
Jun 05, 2023
Browse files
Changing to wall_clock64() (#35)
parent
b86c5479
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
5 deletions
+9
-5
CHANGELOG.md
CHANGELOG.md
+4
-0
src/include/EnvVars.hpp
src/include/EnvVars.hpp
+1
-1
src/include/Kernels.hpp
src/include/Kernels.hpp
+4
-4
No files found.
CHANGELOG.md
View file @
f903fda3
# Changelog for TransferBench
## v1.22
### Modified
-
Switching kernel timing function to wall_clock64
## v1.21
### Fixed
-
Fixed bug with SAMPLING_FACTOR
...
...
src/include/EnvVars.hpp
View file @
f903fda3
...
...
@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp"
#include "Kernels.hpp"
#define TB_VERSION "1.2
1
"
#define TB_VERSION "1.2
2
"
extern
char
const
MemTypeStr
[];
extern
char
const
ExeTypeStr
[];
...
...
src/include/Kernels.hpp
View file @
f903fda3
...
...
@@ -104,7 +104,7 @@ template <int LOOP1_UNROLL>
__global__
void
__launch_bounds__
(
BLOCKSIZE
)
GpuReduceKernel
(
SubExecParam
*
params
)
{
int64_t
startCycle
=
__builtin_amdgcn_s_memrealtime
();
int64_t
startCycle
=
wall_clock64
();
// Operate on wavefront granularity
SubExecParam
&
p
=
params
[
blockIdx
.
x
];
...
...
@@ -210,7 +210,7 @@ GpuReduceKernel(SubExecParam* params)
if
(
threadIdx
.
x
==
0
)
{
p
.
startCycle
=
startCycle
;
p
.
stopCycle
=
__builtin_amdgcn_s_memrealtime
();
p
.
stopCycle
=
wall_clock64
();
}
}
...
...
@@ -343,7 +343,7 @@ __device__ size_t GpuReduceFunc(SubExecParam const &p, size_t const offset, size
__global__
void
__launch_bounds__
(
BLOCKSIZE
)
GpuReduceKernel2
(
SubExecParam
*
params
)
{
int64_t
startCycle
=
__builtin_amdgcn_s_memrealtime
();
int64_t
startCycle
=
wall_clock64
();
SubExecParam
&
p
=
params
[
blockIdx
.
x
];
size_t
numFloatsLeft
=
GpuReduceFunc
<
float4
>
(
p
,
0
,
p
.
N
,
8
);
...
...
@@ -357,7 +357,7 @@ GpuReduceKernel2(SubExecParam* params)
if
(
threadIdx
.
x
==
0
)
{
p
.
startCycle
=
startCycle
;
p
.
stopCycle
=
__builtin_amdgcn_s_memrealtime
();
p
.
stopCycle
=
wall_clock64
();
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment