Unverified Commit c5197729 authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

v1.38 Adding missing __threadfence_system() (#70)

parent 20ada430
...@@ -3,6 +3,11 @@ ...@@ -3,6 +3,11 @@
Documentation for TransferBench is available at Documentation for TransferBench is available at
[https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench). [https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench).
## v1.38
### Fixes
* Adding missing threadfence which could cause non-fine-grained Transfers to report higher speeds
## v1.37 ## v1.37
### Changes ### Changes
......
...@@ -29,7 +29,7 @@ THE SOFTWARE. ...@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp" #include "Compatibility.hpp"
#include "Kernels.hpp" #include "Kernels.hpp"
#define TB_VERSION "1.37" #define TB_VERSION "1.38"
extern char const MemTypeStr[]; extern char const MemTypeStr[];
extern char const ExeTypeStr[]; extern char const ExeTypeStr[];
......
...@@ -247,6 +247,7 @@ GpuReduceKernel(SubExecParam* params) ...@@ -247,6 +247,7 @@ GpuReduceKernel(SubExecParam* params)
__syncthreads(); __syncthreads();
if (threadIdx.x == 0) if (threadIdx.x == 0)
{ {
__threadfence_system();
p.stopCycle = wall_clock64(); p.stopCycle = wall_clock64();
p.startCycle = startCycle; p.startCycle = startCycle;
p.xccId = xccId; p.xccId = xccId;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment