Unverified Commit 938afab0 authored by peastman's avatar peastman Committed by GitHub
Browse files

Minor optimizations to CUDA kernels (#2861)

parent 6ca68730
......@@ -843,7 +843,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeInterpolateForceKernel = cu.getKernel(module, "gridInterpolateForce");
pmeEvalEnergyKernel = cu.getKernel(module, "gridEvaluateEnergy");
pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
cuFuncSetCacheConfig(pmeSpreadChargeKernel, CU_FUNC_CACHE_PREFER_L1);
cuFuncSetCacheConfig(pmeSpreadChargeKernel, CU_FUNC_CACHE_PREFER_SHARED);
cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
if (doLJPME) {
pmeDefines["EWALD_ALPHA"] = cu.doubleToString(dispersionAlpha);
......
......@@ -56,8 +56,6 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
#else
const real charge = (CHARGE)*EPSILON_FACTOR;
#endif
if (charge == 0)
continue;
APPLY_PERIODIC_TO_POS(pos)
real3 t = make_real3(pos.x*recipBoxVecX.x+pos.y*recipBoxVecY.x+pos.z*recipBoxVecZ.x,
pos.y*recipBoxVecY.y+pos.z*recipBoxVecZ.y,
......@@ -68,6 +66,8 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
int3 gridIndex = make_int3(((int) t.x) % GRID_SIZE_X,
((int) t.y) % GRID_SIZE_Y,
((int) t.z) % GRID_SIZE_Z);
if (charge == 0)
continue;
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment