"vscode:/vscode.git/clone" did not exist on "bce0c133e0e2bbc97d6db5040560857f75aa989e"
Commit e7a00c6a authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed a potential race condition in PME

parent a3db0217
...@@ -205,14 +205,6 @@ void kFindAtomRangeForGrid_kernel() ...@@ -205,14 +205,6 @@ void kFindAtomRangeForGrid_kernel()
cSim.pPmeAtomRange[j] = i; cSim.pPmeAtomRange[j] = i;
last = gridIndex; last = gridIndex;
} }
// The grid index won't be needed again. Reuse that component to hold the z index, thus saving
// some work in the charge spreading kernel.
float posz = cSim.pPosq[atomData.x].z;
posz -= floorf(posz*cSim.invPeriodicBoxSizeZ)*cSim.periodicBoxSizeZ;
int z = ((int) ((posz*cSim.invPeriodicBoxSizeZ)*cSim.pmeGridSize.z)) % cSim.pmeGridSize.z;
cSim.pPmeAtomGridIndex[i].y = z;
} }
// Fill in values beyond the last atom. // Fill in values beyond the last atom.
...@@ -225,6 +217,33 @@ void kFindAtomRangeForGrid_kernel() ...@@ -225,6 +217,33 @@ void kFindAtomRangeForGrid_kernel()
} }
} }
/**
* The grid index won't be needed again. Reuse that component to hold the z index, thus saving
* some work in the charge spreading kernel.
*/
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(1024, 1)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__(512, 1)
#else
__launch_bounds__(256, 1)
#endif
void kRecordZIndex_kernel()
{
int thread = blockIdx.x*blockDim.x+threadIdx.x;
int start = (cSim.atoms*thread)/(blockDim.x*gridDim.x);
int end = (cSim.atoms*(thread+1))/(blockDim.x*gridDim.x);
for (int i = start; i < end; ++i)
{
int2 atomData = cSim.pPmeAtomGridIndex[i];
float posz = cSim.pPosq[atomData.x].z;
posz -= floorf(posz*cSim.invPeriodicBoxSizeZ)*cSim.periodicBoxSizeZ;
int z = ((int) ((posz*cSim.invPeriodicBoxSizeZ)*cSim.pmeGridSize.z)) % cSim.pmeGridSize.z;
cSim.pPmeAtomGridIndex[i].y = z;
}
}
__global__ __global__
void kGridSpreadCharge_kernel() void kGridSpreadCharge_kernel()
{ {
...@@ -392,6 +411,8 @@ void kCalculatePME(gpuContext gpu) ...@@ -392,6 +411,8 @@ void kCalculatePME(gpuContext gpu)
bbSort(gpu->psPmeAtomGridIndex->_pDevData, gpu->natoms); bbSort(gpu->psPmeAtomGridIndex->_pDevData, gpu->natoms);
kFindAtomRangeForGrid_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>(); kFindAtomRangeForGrid_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kFindAtomRangeForGrid"); LAUNCHERROR("kFindAtomRangeForGrid");
kRecordZIndex_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kRecordZIndex");
kGridSpreadCharge_kernel<<<16*gpu->sim.blocks, 64>>>(); kGridSpreadCharge_kernel<<<16*gpu->sim.blocks, 64>>>();
LAUNCHERROR("kGridSpreadCharge"); LAUNCHERROR("kGridSpreadCharge");
cufftExecC2C(gpu->fftplan, gpu->psPmeGrid->_pDevData, gpu->psPmeGrid->_pDevData, CUFFT_FORWARD); cufftExecC2C(gpu->fftplan, gpu->psPmeGrid->_pDevData, gpu->psPmeGrid->_pDevData, CUFFT_FORWARD);
......
...@@ -1263,6 +1263,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1263,6 +1263,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl::Program program = cl.createProgram(file, pmeDefines); cl::Program program = cl.createProgram(file, pmeDefines);
pmeUpdateBsplinesKernel = cl::Kernel(program, "updateBsplines"); pmeUpdateBsplinesKernel = cl::Kernel(program, "updateBsplines");
pmeAtomRangeKernel = cl::Kernel(program, "findAtomRangeForGrid"); pmeAtomRangeKernel = cl::Kernel(program, "findAtomRangeForGrid");
pmeZIndexKernel = cl::Kernel(program, "recordZIndex");
pmeSpreadChargeKernel = cl::Kernel(program, "gridSpreadCharge"); pmeSpreadChargeKernel = cl::Kernel(program, "gridSpreadCharge");
pmeConvolutionKernel = cl::Kernel(program, "reciprocalConvolution"); pmeConvolutionKernel = cl::Kernel(program, "reciprocalConvolution");
pmeInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce"); pmeInterpolateForceKernel = cl::Kernel(program, "gridInterpolateForce");
...@@ -1275,6 +1276,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1275,6 +1276,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer()); pmeAtomRangeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange->getDeviceBuffer()); pmeAtomRangeKernel.setArg<cl::Buffer>(1, pmeAtomRange->getDeviceBuffer());
pmeAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer()); pmeAtomRangeKernel.setArg<cl::Buffer>(2, cl.getPosq().getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer());
pmeZIndexKernel.setArg<cl::Buffer>(1, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer()); pmeSpreadChargeKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex->getDeviceBuffer()); pmeSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomGridIndex->getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange->getDeviceBuffer()); pmeSpreadChargeKernel.setArg<cl::Buffer>(2, pmeAtomRange->getDeviceBuffer());
...@@ -1334,8 +1337,12 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1334,8 +1337,12 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms(), PmeOrder*PmeOrder*PmeOrder); cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms(), PmeOrder*PmeOrder*PmeOrder);
cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize()); cl.executeKernel(pmeFinishSpreadChargeKernel, pmeGrid->getSize());
} }
else else {
pmeZIndexKernel.setArg<mm_float4>(2, boxSize);
pmeZIndexKernel.setArg<mm_float4>(3, invBoxSize);
cl.executeKernel(pmeZIndexKernel, cl.getNumAtoms());
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms()); cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
}
} }
fft->execFFT(*pmeGrid, *pmeGrid2, true); fft->execFFT(*pmeGrid, *pmeGrid2, true);
pmeConvolutionKernel.setArg<mm_float4>(5, invBoxSize); pmeConvolutionKernel.setArg<mm_float4>(5, invBoxSize);
......
...@@ -553,6 +553,7 @@ private: ...@@ -553,6 +553,7 @@ private:
cl::Kernel ewaldForcesKernel; cl::Kernel ewaldForcesKernel;
cl::Kernel pmeGridIndexKernel; cl::Kernel pmeGridIndexKernel;
cl::Kernel pmeAtomRangeKernel; cl::Kernel pmeAtomRangeKernel;
cl::Kernel pmeZIndexKernel;
cl::Kernel pmeUpdateBsplinesKernel; cl::Kernel pmeUpdateBsplinesKernel;
cl::Kernel pmeSpreadChargeKernel; cl::Kernel pmeSpreadChargeKernel;
cl::Kernel pmeFinishSpreadChargeKernel; cl::Kernel pmeFinishSpreadChargeKernel;
......
...@@ -51,14 +51,6 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g ...@@ -51,14 +51,6 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g
pmeAtomRange[j] = i; pmeAtomRange[j] = i;
last = gridIndex; last = gridIndex;
} }
// The grid index won't be needed again. Reuse that component to hold the z index, thus saving
// some work in the charge spreading kernel.
float posz = posq[pmeAtomGridIndex[i].x].z;
posz -= floor(posz*invPeriodicBoxSize.z)*periodicBoxSize.z;
int z = ((int) ((posz*invPeriodicBoxSize.z)*GRID_SIZE_Z)) % GRID_SIZE_Z;
pmeAtomGridIndex[i].y = z;
} }
// Fill in values beyond the last atom. // Fill in values beyond the last atom.
...@@ -70,6 +62,21 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g ...@@ -70,6 +62,21 @@ __kernel void findAtomRangeForGrid(__global int2* restrict pmeAtomGridIndex, __g
} }
} }
/**
* The grid index won't be needed again. Reuse that component to hold the z index, thus saving
* some work in the charge spreading kernel.
*/
__kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global const float4* restrict posq, float4 periodicBoxSize, float4 invPeriodicBoxSize) {
int start = (NUM_ATOMS*get_global_id(0))/get_global_size(0);
int end = (NUM_ATOMS*(get_global_id(0)+1))/get_global_size(0);
for (int i = start; i < end; ++i) {
float posz = posq[pmeAtomGridIndex[i].x].z;
posz -= floor(posz*invPeriodicBoxSize.z)*periodicBoxSize.z;
int z = ((int) ((posz*invPeriodicBoxSize.z)*GRID_SIZE_Z)) % GRID_SIZE_Z;
pmeAtomGridIndex[i].y = z;
}
}
#ifdef SUPPORTS_64_BIT_ATOMICS #ifdef SUPPORTS_64_BIT_ATOMICS
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment