Commit c9c5997b authored by peastman's avatar peastman
Browse files

Fixed bug when running on a CPU device

parent 46479322
...@@ -2271,7 +2271,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo ...@@ -2271,7 +2271,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
defines["USE_CUTOFF"] = "1"; defines["USE_CUTOFF"] = "1";
if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic) if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic)
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize()); defines["LOCAL_MEMORY_SIZE"] = cu.intToString(max(32, cu.getNonbondedUtilities().getForceThreadBlockSize()));
double cutoff = force.getCutoffDistance(); double cutoff = force.getCutoffDistance();
defines["CUTOFF_SQUARED"] = cu.doubleToString(cutoff*cutoff); defines["CUTOFF_SQUARED"] = cu.doubleToString(cutoff*cutoff);
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
......
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct { typedef struct {
real x, y, z; real x, y, z;
real q; real q;
...@@ -19,7 +17,7 @@ extern "C" __global__ void computeInteractionGroups( ...@@ -19,7 +17,7 @@ extern "C" __global__ void computeInteractionGroups(
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = threadIdx.x - tgx; // block warpIndex const unsigned int tbx = threadIdx.x - tgx; // block warpIndex
real energy = 0.0f; real energy = 0.0f;
__shared__ AtomData localData[THREAD_BLOCK_SIZE]; __shared__ AtomData localData[LOCAL_MEMORY_SIZE];
const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps; const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps;
const unsigned int endTile = FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps; const unsigned int endTile = FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps;
......
...@@ -2289,7 +2289,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon ...@@ -2289,7 +2289,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
defines["USE_CUTOFF"] = "1"; defines["USE_CUTOFF"] = "1";
if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic) if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic)
defines["USE_PERIODIC"] = "1"; defines["USE_PERIODIC"] = "1";
defines["THREAD_BLOCK_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize()); defines["LOCAL_MEMORY_SIZE"] = cl.intToString(max(32, cl.getNonbondedUtilities().getForceThreadBlockSize()));
double cutoff = force.getCutoffDistance(); double cutoff = force.getCutoffDistance();
defines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff); defines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
......
...@@ -2,8 +2,6 @@ ...@@ -2,8 +2,6 @@
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#endif #endif
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct { typedef struct {
real x, y, z; real x, y, z;
real q; real q;
...@@ -52,7 +50,7 @@ __kernel void computeInteractionGroups( ...@@ -52,7 +50,7 @@ __kernel void computeInteractionGroups(
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1); // index within the warp const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1); // index within the warp
const unsigned int tbx = get_local_id(0) - tgx; // block warpIndex const unsigned int tbx = get_local_id(0) - tgx; // block warpIndex
real energy = 0.0f; real energy = 0.0f;
__local AtomData localData[THREAD_BLOCK_SIZE]; __local AtomData localData[LOCAL_MEMORY_SIZE];
const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps; const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps;
const unsigned int endTile = FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps; const unsigned int endTile = FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment