Unverified Commit f903fda3 authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

Changing to wall_clock64() (#35)

parent b86c5479
# Changelog for TransferBench
## v1.22
### Modified
- Switching kernel timing function to wall_clock64
## v1.21
### Fixed
- Fixed bug with SAMPLING_FACTOR
......
......@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp"
#include "Kernels.hpp"
#define TB_VERSION "1.21"
#define TB_VERSION "1.22"
extern char const MemTypeStr[];
extern char const ExeTypeStr[];
......
......@@ -104,7 +104,7 @@ template <int LOOP1_UNROLL>
__global__ void __launch_bounds__(BLOCKSIZE)
GpuReduceKernel(SubExecParam* params)
{
int64_t startCycle = __builtin_amdgcn_s_memrealtime();
int64_t startCycle = wall_clock64();
// Operate on wavefront granularity
SubExecParam& p = params[blockIdx.x];
......@@ -210,7 +210,7 @@ GpuReduceKernel(SubExecParam* params)
if (threadIdx.x == 0)
{
p.startCycle = startCycle;
p.stopCycle = __builtin_amdgcn_s_memrealtime();
p.stopCycle = wall_clock64();
}
}
......@@ -343,7 +343,7 @@ __device__ size_t GpuReduceFunc(SubExecParam const &p, size_t const offset, size
__global__ void __launch_bounds__(BLOCKSIZE)
GpuReduceKernel2(SubExecParam* params)
{
int64_t startCycle = __builtin_amdgcn_s_memrealtime();
int64_t startCycle = wall_clock64();
SubExecParam& p = params[blockIdx.x];
size_t numFloatsLeft = GpuReduceFunc<float4>(p, 0, p.N, 8);
......@@ -357,7 +357,7 @@ GpuReduceKernel2(SubExecParam* params)
if (threadIdx.x == 0)
{
p.startCycle = startCycle;
p.stopCycle = __builtin_amdgcn_s_memrealtime();
p.stopCycle = wall_clock64();
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment