Commit 622e066c authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed a race condition

parent 5c071a07
......@@ -1086,6 +1086,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipolePme, pmeDefines);
pmeUpdateBsplinesKernel = cu.getKernel(module, "updateBsplines");
pmeAtomRangeKernel = cu.getKernel(module, "findAtomRangeForGrid");
pmeZIndexKernel = cu.getKernel(module, "recordZIndex");
pmeSpreadFixedMultipolesKernel = cu.getKernel(module, "gridSpreadFixedMultipoles");
pmeSpreadInducedDipolesKernel = cu.getKernel(module, "gridSpreadInducedDipoles");
pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
......@@ -1358,7 +1359,9 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
sort->sort(*pmeAtomGridIndex);
void* pmeAtomRangeArgs[] = {&pmeAtomGridIndex->getDevicePointer(), &pmeAtomRange->getDevicePointer(),
&cu.getPosq().getDevicePointer(), cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeAtomRangeKernel, pmeAtomRangeArgs, cu.getNumAtoms(), cu.ThreadBlockSize, cu.ThreadBlockSize*PmeOrder*PmeOrder*elementSize);
cu.executeKernel(pmeAtomRangeKernel, pmeAtomRangeArgs, cu.getNumAtoms());
void* pmeZIndexArgs[] = {&pmeAtomGridIndex->getDevicePointer(), &cu.getPosq().getDevicePointer(), cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeZIndexKernel, pmeZIndexArgs, cu.getNumAtoms());
void* pmeSpreadFixedMultipolesArgs[] = {&cu.getPosq().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(),
&pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), &pmeAtomRange->getDevicePointer(),
&pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(), &pmeTheta3->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
......
......@@ -425,7 +425,7 @@ private:
CudaSort* sort;
cufftHandle fft;
CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
CUfunction pmeUpdateBsplinesKernel, pmeAtomRangeKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeConvolutionKernel, pmeFixedPotentialKernel, pmeInducedPotentialKernel;
CUfunction pmeUpdateBsplinesKernel, pmeAtomRangeKernel, pmeZIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeConvolutionKernel, pmeFixedPotentialKernel, pmeInducedPotentialKernel;
CUfunction pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel;
static const int PmeOrder = 5;
};
......
......@@ -139,16 +139,6 @@ extern "C" __global__ void findAtomRangeForGrid(int2* __restrict__ pmeAtomGridIn
pmeAtomRange[j] = i;
last = gridIndex;
}
// The grid index won't be needed again. Reuse that component to hold the z index, thus saving
// some work in the charge spreading kernel.
real posz = posq[atomData.x].z;
posz -= floor(posz*invPeriodicBoxSize.z)*periodicBoxSize.z;
real w = posz*invPeriodicBoxSize.z;
real fr = GRID_SIZE_Z*(w-(int)(w+0.5f)+0.5f);
int z = ((int) fr)-PME_ORDER+1;
pmeAtomGridIndex[i].y = z;
}
// Fill in values beyond the last atom.
......@@ -160,6 +150,24 @@ extern "C" __global__ void findAtomRangeForGrid(int2* __restrict__ pmeAtomGridIn
}
}
/**
* The grid index won't be needed again. Reuse that component to hold the z index, thus saving
* some work in the charge spreading kernel.
*/
extern "C" __global__ void recordZIndex(int2* __restrict__ pmeAtomGridIndex, const real4* __restrict__ posq, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
int thread = blockIdx.x*blockDim.x+threadIdx.x;
int start = (NUM_ATOMS*thread)/(blockDim.x*gridDim.x);
int end = (NUM_ATOMS*(thread+1))/(blockDim.x*gridDim.x);
for (int i = start; i < end; ++i) {
real posz = posq[pmeAtomGridIndex[i].x].z;
posz -= floor(posz*invPeriodicBoxSize.z)*periodicBoxSize.z;
real w = posz*invPeriodicBoxSize.z;
real fr = GRID_SIZE_Z*(w-(int)(w+0.5f)+0.5f);
int z = ((int) fr)-PME_ORDER+1;
pmeAtomGridIndex[i].y = z;
}
}
extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, real2* __restrict__ pmeGrid, int2* __restrict__ pmeAtomGridIndex, int* __restrict__ pmeAtomRange,
const real4* __restrict__ theta1, const real4* __restrict__ theta2, const real4* __restrict__ theta3, real4 invPeriodicBoxSize) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment