Fixed bug when running on a CPU device

c9c5997b · peastman · 46479322 · c9c5997b · c9c5997b · c9c5997b
Commit c9c5997b authored Jul 15, 2014 by peastman
4 changed files
--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
@@ -2271,7 +2271,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
        defines["USE_CUTOFF"] = "1";
    if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic)
        defines["USE_PERIODIC"] = "1";
-    defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize());
+    defines["LOCAL_MEMORY_SIZE"] = cu.intToString(max(32, cu.getNonbondedUtilities().getForceThreadBlockSize()));
    double cutoff = force.getCutoffDistance();
    defines["CUTOFF_SQUARED"] = cu.doubleToString(cutoff*cutoff);
    defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());

--- a/platforms/cuda/src/kernels/customNonbondedGroups.cu
+++ b/platforms/cuda/src/kernels/customNonbondedGroups.cu
-#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
-
 typedef struct {
    real x, y, z;
    real q;
@@ -19,7 +17,7 @@ extern "C" __global__ void computeInteractionGroups(
    const unsigned int tgx = threadIdx.x & (TILE_SIZE-1); // index within the warp
    const unsigned int tbx = threadIdx.x - tgx;           // block warpIndex
    real energy = 0.0f;
-    __shared__ AtomData localData[THREAD_BLOCK_SIZE];
+    __shared__ AtomData localData[LOCAL_MEMORY_SIZE];

    const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps;
    const unsigned int endTile = FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps;
@@ -86,4 +84,4 @@ extern "C" __global__ void computeInteractionGroups(
        atomicAdd(&forceBuffers[atom2+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fz*0x100000000)));
    }
    energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
-}
\ No newline at end of file
+}
--- a/platforms/opencl/src/OpenCLKernels.cpp
+++ b/platforms/opencl/src/OpenCLKernels.cpp
@@ -2289,7 +2289,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNon
        defines["USE_CUTOFF"] = "1";
    if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic)
        defines["USE_PERIODIC"] = "1";
-    defines["THREAD_BLOCK_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize());
+    defines["LOCAL_MEMORY_SIZE"] = cl.intToString(max(32, cl.getNonbondedUtilities().getForceThreadBlockSize()));
    double cutoff = force.getCutoffDistance();
    defines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
    defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());

--- a/platforms/opencl/src/kernels/customNonbondedGroups.cl
+++ b/platforms/opencl/src/kernels/customNonbondedGroups.cl
@@ -2,8 +2,6 @@
 #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
 #endif

-#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
-
 typedef struct {
    real x, y, z;
    real q;
@@ -52,7 +50,7 @@ __kernel void computeInteractionGroups(
    const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1); // index within the warp
    const unsigned int tbx = get_local_id(0) - tgx;           // block warpIndex
    real energy = 0.0f;
-    __local AtomData localData[THREAD_BLOCK_SIZE];
+    __local AtomData localData[LOCAL_MEMORY_SIZE];

    const unsigned int startTile = FIRST_TILE+warp*(LAST_TILE-FIRST_TILE)/totalWarps;
    const unsigned int endTile = FIRST_TILE+(warp+1)*(LAST_TILE-FIRST_TILE)/totalWarps;
@@ -127,4 +125,4 @@ __kernel void computeInteractionGroups(
 #endif
    }
    energyBuffer[get_global_id(0)] += energy;
-}
\ No newline at end of file
+}