"docs-source/vscode:/vscode.git/clone" did not exist on "bb8e21290def02cf133134752090b21dd56c8e6b"
Commit e7a00c6a authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed a potential race condition in PME

parent a3db0217
......@@ -205,14 +205,6 @@ void kFindAtomRangeForGrid_kernel()
cSim.pPmeAtomRange[j] = i;
last = gridIndex;
}
// The grid index won't be needed again. Reuse that component to hold the z index, thus saving
// some work in the charge spreading kernel.
float posz = cSim.pPosq[atomData.x].z;
posz -= floorf(posz*cSim.invPeriodicBoxSizeZ)*cSim.periodicBoxSizeZ;
int z = ((int) ((posz*cSim.invPeriodicBoxSizeZ)*cSim.pmeGridSize.z)) % cSim.pmeGridSize.z;
cSim.pPmeAtomGridIndex[i].y = z;
}
// Fill in values beyond the last atom.
......@@ -225,6 +217,33 @@ void kFindAtomRangeForGrid_kernel()
}
}
/**
* The grid index won't be needed again. Reuse that component to hold the z index, thus saving
* some work in the charge spreading kernel.
*/
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
void kRecordZIndex_kernel()
{
int thread = blockIdx.x*blockDim.x+threadIdx.x;
int start = (cSim.atoms*thread)/(blockDim.x*gridDim.x);
int end = (cSim.atoms*(thread+1))/(blockDim.x*gridDim.x);
for (int i = start; i < end; ++i)
{
int2 atomData = cSim.pPmeAtomGridIndex[i];
float posz = cSim.pPosq[atomData.x].z;
posz -= floorf(posz*cSim.invPeriodicBoxSizeZ)*cSim.periodicBoxSizeZ;
int z = ((int) ((posz*cSim.invPeriodicBoxSizeZ)*cSim.pmeGridSize.z)) % cSim.pmeGridSize.z;
cSim.pPmeAtomGridIndex[i].y = z;
}
}
__global__
void kGridSpreadCharge_kernel()
{
......@@ -392,6 +411,8 @@ void kCalculatePME(gpuContext gpu)
bbSort(gpu->psPmeAtomGridIndex->_pDevData, gpu->natoms);
kFindAtomRangeForGrid_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kFindAtomRangeForGrid");
kRecordZIndex_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kRecordZIndex");
kGridSpreadCharge_kernel<<<16*gpu->sim.blocks, 64>>>();
LAUNCHERROR("kGridSpreadCharge");
cufftExecC2C(gpu->fftplan, gpu->psPmeGrid->_pDevData, gpu->psPmeGrid->_pDevData, CUFFT_FORWARD);
......
......@@ -1263,6 +1263,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl::Program program = cl.createProgram(file, pmeDefines);
pmeUpdateBsplinesKernel = cl::Kernel(program, "updateBsplines");
pmeAtomRangeKernel = cl::Kernel(program, "findAtomRangeForGrid");
pmeZIndexKernel = cl::Kernel(program, "recordZIndex");
pmeSpreadChargeKernel = cl::Kernel(program, "gridSpreadCharge");
pmeConvolutionKernel = cl::Kernel(program, "reciprocalConvolution");
pmeInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
......@@ -1275,6 +1276,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange->getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex->getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange->getDeviceBuffer());
......@@ -1334,9 +1337,13 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms(), PmeOrder*PmeOrder*PmeOrder);
cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize());
}
else
else {
pmeZIndexKernel.setArg<mm_float4>(2, boxSize);
pmeZIndexKernel.setArg<mm_float4>(3, invBoxSize);
cl.executeKernel(pmeZIndexKernel, cl.getNumAtoms());
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
}
}
fft->execFFT(*pmeGrid, *pmeGrid2, true);
pmeConvolutionKernel.setArg<mm_float4>(5, invBoxSize);
pmeConvolutionKernel.setArg<cl_float>(6, (float) (1.0/(M_PI*boxSize.x*boxSize.y*boxSize.z)));
......
......@@ -553,6 +553,7 @@ private:
cl::Kernel ewaldForcesKernel;
cl::Kernel pmeGridIndexKernel;
cl::Kernel pmeAtomRangeKernel;
cl::Kernel pmeZIndexKernel;
cl::Kernel pmeUpdateBsplinesKernel;
cl::Kernel pmeSpreadChargeKernel;
cl::Kernel pmeFinishSpreadChargeKernel;
......
......@@ -51,14 +51,6 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g
pmeAtomRange[j] = i;
last = gridIndex;
}
// The grid index won't be needed again. Reuse that component to hold the z index, thus saving
// some work in the charge spreading kernel.
float posz = posq[pmeAtomGridIndex[i].x].z;
posz -= floor(posz*invPeriodicBoxSize.z)*periodicBoxSize.z;
int z = ((int) ((posz*invPeriodicBoxSize.z)*GRID_SIZE_Z)) % GRID_SIZE_Z;
pmeAtomGridIndex[i].y = z;
}
// Fill in values beyond the last atom.
......@@ -70,6 +62,21 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g
}
}
/**
* The grid index won't be needed again. Reuse that component to hold the z index, thus saving
* some work in the charge spreading kernel.
*/
__kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global const float4* restrict posq, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
int start = (NUM_ATOMS*get_global_id(0))/get_global_size(0);
int end = (NUM_ATOMS*(get_global_id(0)+1))/get_global_size(0);
for (int i = start; i < end; ++i) {
float posz = posq[pmeAtomGridIndex[i].x].z;
posz -= floor(posz*invPeriodicBoxSize.z)*periodicBoxSize.z;
int z = ((int) ((posz*invPeriodicBoxSize.z)*GRID_SIZE_Z)) % GRID_SIZE_Z;
pmeAtomGridIndex[i].y = z;
}
}
#ifdef SUPPORTS_64_BIT_ATOMICS
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment