Unverified Commit f903fda3 authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

Changing to wall_clock64() (#35)

parent b86c5479
# Changelog for TransferBench # Changelog for TransferBench
## v1.22
### Modified
- Switching kernel timing function to wall_clock64
## v1.21 ## v1.21
### Fixed ### Fixed
- Fixed bug with SAMPLING_FACTOR - Fixed bug with SAMPLING_FACTOR
......
...@@ -29,7 +29,7 @@ THE SOFTWARE. ...@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp" #include "Compatibility.hpp"
#include "Kernels.hpp" #include "Kernels.hpp"
#define TB_VERSION "1.21" #define TB_VERSION "1.22"
extern char const MemTypeStr[]; extern char const MemTypeStr[];
extern char const ExeTypeStr[]; extern char const ExeTypeStr[];
......
...@@ -104,7 +104,7 @@ template <int LOOP1_UNROLL> ...@@ -104,7 +104,7 @@ template <int LOOP1_UNROLL>
__global__ void __launch_bounds__(BLOCKSIZE) __global__ void __launch_bounds__(BLOCKSIZE)
GpuReduceKernel(SubExecParam* params) GpuReduceKernel(SubExecParam* params)
{ {
int64_t startCycle = __builtin_amdgcn_s_memrealtime(); int64_t startCycle = wall_clock64();
// Operate on wavefront granularity // Operate on wavefront granularity
SubExecParam& p = params[blockIdx.x]; SubExecParam& p = params[blockIdx.x];
...@@ -210,7 +210,7 @@ GpuReduceKernel(SubExecParam* params) ...@@ -210,7 +210,7 @@ GpuReduceKernel(SubExecParam* params)
if (threadIdx.x == 0) if (threadIdx.x == 0)
{ {
p.startCycle = startCycle; p.startCycle = startCycle;
p.stopCycle = __builtin_amdgcn_s_memrealtime(); p.stopCycle = wall_clock64();
} }
} }
...@@ -343,7 +343,7 @@ __device__ size_t GpuReduceFunc(SubExecParam const &p, size_t const offset, size ...@@ -343,7 +343,7 @@ __device__ size_t GpuReduceFunc(SubExecParam const &p, size_t const offset, size
__global__ void __launch_bounds__(BLOCKSIZE) __global__ void __launch_bounds__(BLOCKSIZE)
GpuReduceKernel2(SubExecParam* params) GpuReduceKernel2(SubExecParam* params)
{ {
int64_t startCycle = __builtin_amdgcn_s_memrealtime(); int64_t startCycle = wall_clock64();
SubExecParam& p = params[blockIdx.x]; SubExecParam& p = params[blockIdx.x];
size_t numFloatsLeft = GpuReduceFunc<float4>(p, 0, p.N, 8); size_t numFloatsLeft = GpuReduceFunc<float4>(p, 0, p.N, 8);
...@@ -357,7 +357,7 @@ GpuReduceKernel2(SubExecParam* params) ...@@ -357,7 +357,7 @@ GpuReduceKernel2(SubExecParam* params)
if (threadIdx.x == 0) if (threadIdx.x == 0)
{ {
p.startCycle = startCycle; p.startCycle = startCycle;
p.stopCycle = __builtin_amdgcn_s_memrealtime(); p.stopCycle = wall_clock64();
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment