Commit 066cfd60 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed threading bug in PME

parent d6ad6438
...@@ -1288,6 +1288,7 @@ void OpenCLCalcNonbondedForceKernel::executeForces(ContextImpl& context) { ...@@ -1288,6 +1288,7 @@ void OpenCLCalcNonbondedForceKernel::executeForces(ContextImpl& context) {
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta->getDeviceBuffer()); pmeUpdateBsplinesKernel.setArg<cl::Buffer>(1, pmeBsplineTheta->getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(2, pmeBsplineDtheta->getDeviceBuffer()); pmeUpdateBsplinesKernel.setArg<cl::Buffer>(2, pmeBsplineDtheta->getDeviceBuffer());
pmeUpdateBsplinesKernel.setArg(3, 2*OpenCLContext::ThreadBlockSize*PmeOrder*sizeof(mm_float4), NULL); pmeUpdateBsplinesKernel.setArg(3, 2*OpenCLContext::ThreadBlockSize*PmeOrder*sizeof(mm_float4), NULL);
pmeUpdateBsplinesKernel.setArg<cl::Buffer>(4, pmeAtomGridIndex->getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer()); pmeSpreadChargeKernel.setArg<cl::Buffer>(0, pmeAtomGridIndex->getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomRange->getDeviceBuffer()); pmeSpreadChargeKernel.setArg<cl::Buffer>(1, pmeAtomRange->getDeviceBuffer());
pmeSpreadChargeKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer()); pmeSpreadChargeKernel.setArg<cl::Buffer>(2, pmeGrid->getDeviceBuffer());
......
...@@ -27,11 +27,6 @@ __kernel void findAtomRangeForGrid(__global float4* posq, __global float2* pmeAt ...@@ -27,11 +27,6 @@ __kernel void findAtomRangeForGrid(__global float4* posq, __global float2* pmeAt
pmeAtomRange[j] = i; pmeAtomRange[j] = i;
last = gridIndex; last = gridIndex;
} }
// The grid index won't be needed again. Reuse that component to hold the atom charge, thus saving
// an extra load operation in the charge spreading kernel.
pmeAtomGridIndex[i].y = posq[(int) atomData.x].w;
} }
// Fill in values beyond the last atom. // Fill in values beyond the last atom.
...@@ -43,7 +38,7 @@ __kernel void findAtomRangeForGrid(__global float4* posq, __global float2* pmeAt ...@@ -43,7 +38,7 @@ __kernel void findAtomRangeForGrid(__global float4* posq, __global float2* pmeAt
} }
} }
__kernel void updateBsplines(__global float4* posq, __global float4* pmeBsplineTheta, __global float4* pmeBsplineDTheta, __local float4* bsplinesCache) { __kernel void updateBsplines(__global float4* posq, __global float4* pmeBsplineTheta, __global float4* pmeBsplineDTheta, __local float4* bsplinesCache, __global float2* pmeAtomGridIndex) {
const float4 scale = 1.0f/(PME_ORDER-1); const float4 scale = 1.0f/(PME_ORDER-1);
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
__local float4* data = &bsplinesCache[get_local_id(0)*PME_ORDER]; __local float4* data = &bsplinesCache[get_local_id(0)*PME_ORDER];
...@@ -79,6 +74,16 @@ __kernel void updateBsplines(__global float4* posq, __global float4* pmeBsplineT ...@@ -79,6 +74,16 @@ __kernel void updateBsplines(__global float4* posq, __global float4* pmeBsplineT
pmeBsplineDTheta[i+j*NUM_ATOMS] = ddata[j]; pmeBsplineDTheta[i+j*NUM_ATOMS] = ddata[j];
} }
} }
// The grid index won't be needed again. Reuse that component to hold the atom charge, thus saving
// an extra load operation in the charge spreading kernel.
int start = (NUM_ATOMS*get_global_id(0))/get_global_size(0);
int end = (NUM_ATOMS*(get_global_id(0)+1))/get_global_size(0);
for (int i = start; i < end; ++i) {
float2 atomData = pmeAtomGridIndex[i];
pmeAtomGridIndex[i].y = posq[(int) atomData.x].w;
}
} }
__kernel void gridSpreadCharge(__global float2* pmeAtomGridIndex, __global int* pmeAtomRange, __global float2* pmeGrid, __global float4* pmeBsplineTheta) { __kernel void gridSpreadCharge(__global float2* pmeAtomGridIndex, __global int* pmeAtomRange, __global float2* pmeGrid, __global float4* pmeBsplineTheta) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment