Commit de42858f authored by peastman's avatar peastman
Browse files

Bug fixes

parent 402e01b2
......@@ -1668,7 +1668,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cosSinSums.initialize(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if ((nonbondedMethod == PME && hasCoulomb) || doLJPME) {
else if (((nonbondedMethod == PME || nonbondedMethod == LJPME) && hasCoulomb) || doLJPME) {
// Compute the PME parameters.
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
......@@ -1934,37 +1934,34 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
charges.initialize(cu, cu.getPaddedNumAtoms(), cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "charges");
if (hasCoulomb) {
map<string, string> replacements;
if (usePosqCharges) {
cu.setCharges(chargeVec);
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
}
map<string, string> replacements;
if (usePosqCharges) {
cu.setCharges(chargeVec);
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
}
else {
if (cu.getUseDoublePrecision())
charges.upload(chargeVec);
else {
if (cu.getUseDoublePrecision())
charges.upload(chargeVec);
else {
vector<float> c(charges.getSize());
for (int i = 0; i < c.size(); i++)
c[i] = (float) chargeVec[i];
charges.upload(c);
}
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2";
vector<float> c(charges.getSize());
for (int i = 0; i < c.size(); i++)
c[i] = (float) chargeVec[i];
charges.upload(c);
}
source = cu.replaceStrings(source, replacements);
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer()));
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2";
}
if (hasCoulomb)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDevicePointer()));
if (hasLJ) {
sigmaEpsilon.initialize<float2>(cu, cu.getPaddedNumAtoms(), "sigmaEpsilon");
sigmaEpsilon.upload(sigmaEpsilonVector);
map<string, string> replacements;
replacements["SIGMA_EPSILON1"] = prefix+"sigmaEpsilon1";
replacements["SIGMA_EPSILON2"] = prefix+"sigmaEpsilon2";
source = cu.replaceStrings(source, replacements);
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon.getDevicePointer()));
}
source = cu.replaceStrings(source, replacements);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
// Initialize the exceptions.
......@@ -2032,65 +2029,67 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
// Execute the reciprocal space kernels.
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
if (hasCoulomb) {
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(pmeAtomGridIndex);
sort->sort(pmeAtomGridIndex);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&charges.getDevicePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&charges.getDevicePointer()};
cu.executeKernel(pmeSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid.getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
}
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid.getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
}
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecD2Z(fftForward, (double*) directPmeGrid.getDevicePointer(), (double2*) reciprocalPmeGrid.getDevicePointer());
else
cufftExecR2C(fftForward, (float*) directPmeGrid.getDevicePointer(), (float2*) reciprocalPmeGrid.getDevicePointer());
}
else {
fft->execFFT(directPmeGrid, reciprocalPmeGrid, true);
}
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecD2Z(fftForward, (double*) directPmeGrid.getDevicePointer(), (double2*) reciprocalPmeGrid.getDevicePointer());
else
cufftExecR2C(fftForward, (float*) directPmeGrid.getDevicePointer(), (float2*) reciprocalPmeGrid.getDevicePointer());
}
else {
fft->execFFT(directPmeGrid, reciprocalPmeGrid, true);
}
if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocalPmeGrid.getDevicePointer(), usePmeStream ? &pmeEnergyBuffer.getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocalPmeGrid.getDevicePointer(), usePmeStream ? &pmeEnergyBuffer.getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX.getDevicePointer(), &pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, gridSizeX*gridSizeY*gridSizeZ);
}
void* convolutionArgs[] = {&reciprocalPmeGrid.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX.getDevicePointer(), &pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalEnergyKernel, computeEnergyArgs, gridSizeX*gridSizeY*gridSizeZ);
}
cu.executeKernel(pmeConvolutionKernel, convolutionArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
void* convolutionArgs[] = {&reciprocalPmeGrid.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeBsplineModuliX.getDevicePointer(), &pmeBsplineModuliY.getDevicePointer(), &pmeBsplineModuliZ.getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeConvolutionKernel, convolutionArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecZ2D(fftBackward, (double2*) reciprocalPmeGrid.getDevicePointer(), (double*) directPmeGrid.getDevicePointer());
else
cufftExecC2R(fftBackward, (float2*) reciprocalPmeGrid.getDevicePointer(), (float*) directPmeGrid.getDevicePointer());
}
else {
fft->execFFT(reciprocalPmeGrid, directPmeGrid, false);
}
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecZ2D(fftBackward, (double2*) reciprocalPmeGrid.getDevicePointer(), (double*) directPmeGrid.getDevicePointer());
else
cufftExecC2R(fftBackward, (float2*) reciprocalPmeGrid.getDevicePointer(), (float*) directPmeGrid.getDevicePointer());
}
else {
fft->execFFT(reciprocalPmeGrid, directPmeGrid, false);
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&charges.getDevicePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex.getDevicePointer(),
&charges.getDevicePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
// As written, we check only the Electrostatic grid pointer to get here. We could separate them out, but for
// now we assume that LJPME can only be used if electrostatic PME is also active.
if (doLJPME) {
if (doLJPME && hasLJ) {
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex.getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
......
......@@ -1657,7 +1657,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cosSinSums.initialize(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if ((nonbondedMethod == PME && hasCoulomb) || doLJPME) {
else if (((nonbondedMethod == PME || nonbondedMethod == LJPME) && hasCoulomb) || doLJPME) {
// Compute the PME parameters.
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
......@@ -1865,37 +1865,34 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
string source = cl.replaceStrings(OpenCLKernelSources::coulombLennardJones, defines);
charges.initialize(cl, cl.getPaddedNumAtoms(), cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float), "charges");
if (hasCoulomb) {
map<string, string> replacements;
if (usePosqCharges) {
cl.setCharges(chargeVec);
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
}
map<string, string> replacements;
if (usePosqCharges) {
cl.setCharges(chargeVec);
replacements["CHARGE1"] = "posq1.w";
replacements["CHARGE2"] = "posq2.w";
}
else {
if (cl.getUseDoublePrecision())
charges.upload(chargeVec);
else {
if (cl.getUseDoublePrecision())
charges.upload(chargeVec);
else {
vector<float> c(charges.getSize());
for (int i = 0; i < c.size(); i++)
c[i] = (float) chargeVec[i];
charges.upload(c);
}
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2";
vector<float> c(charges.getSize());
for (int i = 0; i < c.size(); i++)
c[i] = (float) chargeVec[i];
charges.upload(c);
}
source = cl.replaceStrings(source, replacements);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDeviceBuffer()));
replacements["CHARGE1"] = prefix+"charge1";
replacements["CHARGE2"] = prefix+"charge2";
}
if (hasCoulomb)
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"charge", "real", 1, charges.getElementSize(), charges.getDeviceBuffer()));
if (hasLJ) {
sigmaEpsilon.initialize<mm_float2>(cl, cl.getPaddedNumAtoms(), "sigmaEpsilon");
sigmaEpsilon.upload(sigmaEpsilonVector);
map<string, string> replacements;
replacements["SIGMA_EPSILON1"] = prefix+"sigmaEpsilon1";
replacements["SIGMA_EPSILON2"] = prefix+"sigmaEpsilon2";
source = cl.replaceStrings(source, replacements);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"sigmaEpsilon", "float", 2, sizeof(cl_float2), sigmaEpsilon.getDeviceBuffer()));
}
source = cl.replaceStrings(source, replacements);
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
// Initialize the exceptions.
......@@ -2096,35 +2093,20 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
// Execute the reciprocal space kernels.
setPeriodicBoxArgs(cl, pmeUpdateBsplinesKernel, 4);
if (cl.getUseDoublePrecision()) {
pmeUpdateBsplinesKernel.setArg<mm_double4>(9, recipBoxVectors[0]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(10, recipBoxVectors[1]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(11, recipBoxVectors[2]);
}
else {
pmeUpdateBsplinesKernel.setArg<mm_float4>(9, recipBoxVectorsFloat[0]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[1]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (hasCoulomb) {
setPeriodicBoxArgs(cl, pmeUpdateBsplinesKernel, 4);
if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(11, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(12, recipBoxVectors[2]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(9, recipBoxVectors[0]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(10, recipBoxVectors[1]);
pmeUpdateBsplinesKernel.setArg<mm_double4>(11, recipBoxVectors[2]);
}
else {
pmeSpreadChargeKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(12, recipBoxVectorsFloat[2]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(9, recipBoxVectorsFloat[0]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[1]);
pmeUpdateBsplinesKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
}
else {
sort->sort(pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) {
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
......@@ -2136,59 +2118,76 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeSpreadChargeKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(12, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
cl.executeKernel(pmeFinishSpreadChargeKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeSpreadChargeKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
}
else {
cl.executeKernel(pmeAtomRangeKernel, cl.getNumAtoms());
setPeriodicBoxSizeArg(cl, pmeZIndexKernel, 2);
if (cl.getUseDoublePrecision())
pmeZIndexKernel.setArg<mm_double4>(3, recipBoxVectors[2]);
else
pmeZIndexKernel.setArg<mm_float4>(3, recipBoxVectorsFloat[2]);
cl.executeKernel(pmeZIndexKernel, cl.getNumAtoms());
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
sort->sort(pmeAtomGridIndex);
if (cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxArgs(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(10, recipBoxVectors[0]);
pmeSpreadChargeKernel.setArg<mm_double4>(11, recipBoxVectors[1]);
pmeSpreadChargeKernel.setArg<mm_double4>(12, recipBoxVectors[2]);
}
else {
pmeSpreadChargeKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[0]);
pmeSpreadChargeKernel.setArg<mm_float4>(11, recipBoxVectorsFloat[1]);
pmeSpreadChargeKernel.setArg<mm_float4>(12, recipBoxVectorsFloat[2]);
}
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
cl.executeKernel(pmeFinishSpreadChargeKernel, gridSizeX*gridSizeY*gridSizeZ);
}
else {
cl.executeKernel(pmeAtomRangeKernel, cl.getNumAtoms());
setPeriodicBoxSizeArg(cl, pmeZIndexKernel, 2);
if (cl.getUseDoublePrecision())
pmeZIndexKernel.setArg<mm_double4>(3, recipBoxVectors[2]);
else
pmeZIndexKernel.setArg<mm_float4>(3, recipBoxVectorsFloat[2]);
cl.executeKernel(pmeZIndexKernel, cl.getNumAtoms());
cl.executeKernel(pmeSpreadChargeKernel, cl.getNumAtoms());
}
}
fft->execFFT(pmeGrid, pmeGrid2, true);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) {
pmeConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
pmeConvolutionKernel.setArg<mm_double4>(5, recipBoxVectors[1]);
pmeConvolutionKernel.setArg<mm_double4>(6, recipBoxVectors[2]);
pmeEvalEnergyKernel.setArg<mm_double4>(5, recipBoxVectors[0]);
pmeEvalEnergyKernel.setArg<mm_double4>(6, recipBoxVectors[1]);
pmeEvalEnergyKernel.setArg<mm_double4>(7, recipBoxVectors[2]);
}
else {
pmeConvolutionKernel.setArg<mm_float4>(4, recipBoxVectorsFloat[0]);
pmeConvolutionKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[1]);
pmeConvolutionKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[2]);
pmeEvalEnergyKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[0]);
pmeEvalEnergyKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[1]);
pmeEvalEnergyKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[2]);
}
if (includeEnergy)
cl.executeKernel(pmeEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ);
fft->execFFT(pmeGrid2, pmeGrid, false);
setPeriodicBoxArgs(cl, pmeInterpolateForceKernel, 3);
if (cl.getUseDoublePrecision()) {
pmeInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
pmeInterpolateForceKernel.setArg<mm_double4>(9, recipBoxVectors[1]);
pmeInterpolateForceKernel.setArg<mm_double4>(10, recipBoxVectors[2]);
}
else {
pmeInterpolateForceKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[0]);
pmeInterpolateForceKernel.setArg<mm_float4>(9, recipBoxVectorsFloat[1]);
pmeInterpolateForceKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[2]);
}
if (deviceIsCpu)
cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
else
cl.executeKernel(pmeInterpolateForceKernel, cl.getNumAtoms());
}
fft->execFFT(pmeGrid, pmeGrid2, true);
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
if (cl.getUseDoublePrecision()) {
pmeConvolutionKernel.setArg<mm_double4>(4, recipBoxVectors[0]);
pmeConvolutionKernel.setArg<mm_double4>(5, recipBoxVectors[1]);
pmeConvolutionKernel.setArg<mm_double4>(6, recipBoxVectors[2]);
pmeEvalEnergyKernel.setArg<mm_double4>(5, recipBoxVectors[0]);
pmeEvalEnergyKernel.setArg<mm_double4>(6, recipBoxVectors[1]);
pmeEvalEnergyKernel.setArg<mm_double4>(7, recipBoxVectors[2]);
}
else {
pmeConvolutionKernel.setArg<mm_float4>(4, recipBoxVectorsFloat[0]);
pmeConvolutionKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[1]);
pmeConvolutionKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[2]);
pmeEvalEnergyKernel.setArg<mm_float4>(5, recipBoxVectorsFloat[0]);
pmeEvalEnergyKernel.setArg<mm_float4>(6, recipBoxVectorsFloat[1]);
pmeEvalEnergyKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[2]);
}
if (includeEnergy)
cl.executeKernel(pmeEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ);
fft->execFFT(pmeGrid2, pmeGrid, false);
setPeriodicBoxArgs(cl, pmeInterpolateForceKernel, 3);
if (cl.getUseDoublePrecision()) {
pmeInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
pmeInterpolateForceKernel.setArg<mm_double4>(9, recipBoxVectors[1]);
pmeInterpolateForceKernel.setArg<mm_double4>(10, recipBoxVectors[2]);
}
else {
pmeInterpolateForceKernel.setArg<mm_float4>(8, recipBoxVectorsFloat[0]);
pmeInterpolateForceKernel.setArg<mm_float4>(9, recipBoxVectorsFloat[1]);
pmeInterpolateForceKernel.setArg<mm_float4>(10, recipBoxVectorsFloat[2]);
}
if (deviceIsCpu)
cl.executeKernel(pmeInterpolateForceKernel, 2*cl.getDevice().getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>(), 1);
else
cl.executeKernel(pmeInterpolateForceKernel, cl.getNumAtoms());
if (doLJPME) {
if (doLJPME && hasLJ) {
setPeriodicBoxArgs(cl, pmeDispersionUpdateBsplinesKernel, 4);
if (cl.getUseDoublePrecision()) {
pmeDispersionUpdateBsplinesKernel.setArg<mm_double4>(9, recipBoxVectors[0]);
......@@ -2267,7 +2266,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (includeEnergy)
cl.executeKernel(pmeDispersionEvalEnergyKernel, gridSizeX*gridSizeY*gridSizeZ);
cl.executeKernel(pmeDispersionConvolutionKernel, gridSizeX*gridSizeY*gridSizeZ);
fft->execFFT(pmeGrid2, pmeGrid, false);
dispersionFft->execFFT(pmeGrid2, pmeGrid, false);
setPeriodicBoxArgs(cl, pmeDispersionInterpolateForceKernel, 3);
if (cl.getUseDoublePrecision()) {
pmeDispersionInterpolateForceKernel.setArg<mm_double4>(8, recipBoxVectors[0]);
......
......@@ -18,7 +18,7 @@ __kernel void computeBornSum(
#else
__global real* restrict global_bornSum,
#endif
__global const real4* restrict posq, , __global const real* restrict charge, __global const float2* restrict global_params,
__global const real4* restrict posq, __global const real* restrict charge, __global const float2* restrict global_params,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
......
......@@ -1269,7 +1269,7 @@ void testWater2DpmeEnergiesForcesWithExclusions() {
const vector<Vec3>& forces = state.getForces();
ASSERT_EQUAL_TOL(refenergy, energy, 1E-4);
ASSERT_EQUAL_TOL(refenergy, energy, 5E-4);
for (int n = 0; n < numAtoms; ++n)
ASSERT_EQUAL_VEC(refforces[n], forces[n], 5E-4);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment