Commit 9bcba51e authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing OpenCL support for triclinic boxes

parent 195bcecf
...@@ -77,13 +77,17 @@ static void setInvPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int ...@@ -77,13 +77,17 @@ static void setInvPeriodicBoxSizeArg(OpenCLContext& cl, cl::Kernel& kernel, int
kernel.setArg<mm_float4>(index, cl.getInvPeriodicBoxSize()); kernel.setArg<mm_float4>(index, cl.getInvPeriodicBoxSize());
} }
static void setPeriodicBoxVecArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) { static void setPeriodicBoxArgs(OpenCLContext& cl, cl::Kernel& kernel, int index) {
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxSizeDouble());
kernel.setArg<mm_double4>(index++, cl.getInvPeriodicBoxSizeDouble());
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecXDouble()); kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecXDouble());
kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecYDouble()); kernel.setArg<mm_double4>(index++, cl.getPeriodicBoxVecYDouble());
kernel.setArg<mm_double4>(index, cl.getPeriodicBoxVecZDouble()); kernel.setArg<mm_double4>(index, cl.getPeriodicBoxVecZDouble());
} }
else { else {
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxSize());
kernel.setArg<mm_float4>(index++, cl.getInvPeriodicBoxSize());
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecX()); kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecX());
kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecY()); kernel.setArg<mm_float4>(index++, cl.getPeriodicBoxVecY());
kernel.setArg<mm_float4>(index, cl.getPeriodicBoxVecZ()); kernel.setArg<mm_float4>(index, cl.getPeriodicBoxVecZ());
...@@ -2400,8 +2404,8 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool ...@@ -2400,8 +2404,8 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer()); interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, interactionGroupData->getDeviceBuffer()); interactionGroupKernel.setArg<cl::Buffer>(index++, interactionGroupData->getDeviceBuffer());
setPeriodicBoxSizeArg(cl, interactionGroupKernel, index++); setPeriodicBoxArgs(cl, interactionGroupKernel, index);
setInvPeriodicBoxSizeArg(cl, interactionGroupKernel, index++); index += 5;
for (int i = 0; i < (int) params->getBuffers().size(); i++) for (int i = 0; i < (int) params->getBuffers().size(); i++)
interactionGroupKernel.setArg<cl::Memory>(index++, params->getBuffers()[i].getMemory()); interactionGroupKernel.setArg<cl::Memory>(index++, params->getBuffers()[i].getMemory());
if (globals != NULL) if (globals != NULL)
...@@ -2573,7 +2577,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2573,7 +2577,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
index += 2; // The periodic box size arguments are set when the kernel is executed. index += 5; // The periodic box size arguments are set when the kernel is executed.
computeBornSumKernel.setArg<cl_uint>(index++, maxTiles); computeBornSumKernel.setArg<cl_uint>(index++, maxTiles);
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer());
...@@ -2592,7 +2596,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2592,7 +2596,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
index += 2; // The periodic box size arguments are set when the kernel is executed. index += 5; // The periodic box size arguments are set when the kernel is executed.
force1Kernel.setArg<cl_uint>(index++, maxTiles); force1Kernel.setArg<cl_uint>(index++, maxTiles);
force1Kernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer());
...@@ -2625,18 +2629,16 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2625,18 +2629,16 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
reduceBornForceKernel.setArg<cl::Buffer>(index++, obcChain->getDeviceBuffer()); reduceBornForceKernel.setArg<cl::Buffer>(index++, obcChain->getDeviceBuffer());
} }
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
setPeriodicBoxSizeArg(cl, computeBornSumKernel, 5); setPeriodicBoxArgs(cl, computeBornSumKernel, 5);
setInvPeriodicBoxSizeArg(cl, computeBornSumKernel, 6); setPeriodicBoxArgs(cl, force1Kernel, 7);
setPeriodicBoxSizeArg(cl, force1Kernel, 7);
setInvPeriodicBoxSizeArg(cl, force1Kernel, 8);
if (maxTiles < nb.getInteractingTiles().getSize()) { if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize(); maxTiles = nb.getInteractingTiles().getSize();
computeBornSumKernel.setArg<cl::Buffer>(3, nb.getInteractingTiles().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(3, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl_uint>(7, maxTiles); computeBornSumKernel.setArg<cl_uint>(10, maxTiles);
computeBornSumKernel.setArg<cl::Buffer>(10, nb.getInteractingAtoms().getDeviceBuffer()); computeBornSumKernel.setArg<cl::Buffer>(13, nb.getInteractingAtoms().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(5, nb.getInteractingTiles().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(5, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg<cl_uint>(9, maxTiles); force1Kernel.setArg<cl_uint>(12, maxTiles);
force1Kernel.setArg<cl::Buffer>(12, nb.getInteractingAtoms().getDeviceBuffer()); force1Kernel.setArg<cl::Buffer>(15, nb.getInteractingAtoms().getDeviceBuffer());
} }
} }
cl.executeKernel(computeBornSumKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize()); cl.executeKernel(computeBornSumKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
...@@ -3450,7 +3452,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -3450,7 +3452,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
index += 2; // Periodic box size arguments are set when the kernel is executed. index += 5; // Periodic box size arguments are set when the kernel is executed.
pairValueKernel.setArg<cl_uint>(index++, maxTiles); pairValueKernel.setArg<cl_uint>(index++, maxTiles);
pairValueKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer());
...@@ -3493,7 +3495,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -3493,7 +3495,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
index += 2; // Periodic box size arguments are set when the kernel is executed. index += 5; // Periodic box size arguments are set when the kernel is executed.
pairEnergyKernel.setArg<cl_uint>(index++, maxTiles); pairEnergyKernel.setArg<cl_uint>(index++, maxTiles);
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getBlockCenters().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getBlockBoundingBoxes().getDeviceBuffer());
...@@ -3577,18 +3579,16 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include ...@@ -3577,18 +3579,16 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
globals->upload(globalParamValues); globals->upload(globalParamValues);
} }
if (nb.getUseCutoff()) { if (nb.getUseCutoff()) {
setPeriodicBoxSizeArg(cl, pairValueKernel, 8); setPeriodicBoxArgs(cl, pairValueKernel, 8);
setInvPeriodicBoxSizeArg(cl, pairValueKernel, 9); setPeriodicBoxArgs(cl, pairEnergyKernel, 9);
setPeriodicBoxSizeArg(cl, pairEnergyKernel, 9);
setInvPeriodicBoxSizeArg(cl, pairEnergyKernel, 10);
if (maxTiles < nb.getInteractingTiles().getSize()) { if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize(); maxTiles = nb.getInteractingTiles().getSize();
pairValueKernel.setArg<cl::Buffer>(6, nb.getInteractingTiles().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(6, nb.getInteractingTiles().getDeviceBuffer());
pairValueKernel.setArg<cl_uint>(10, maxTiles); pairValueKernel.setArg<cl_uint>(13, maxTiles);
pairValueKernel.setArg<cl::Buffer>(13, nb.getInteractingAtoms().getDeviceBuffer()); pairValueKernel.setArg<cl::Buffer>(16, nb.getInteractingAtoms().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(7, nb.getInteractingTiles().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(7, nb.getInteractingTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl_uint>(11, maxTiles); pairEnergyKernel.setArg<cl_uint>(14, maxTiles);
pairEnergyKernel.setArg<cl::Buffer>(14, nb.getInteractingAtoms().getDeviceBuffer()); pairEnergyKernel.setArg<cl::Buffer>(17, nb.getInteractingAtoms().getDeviceBuffer());
} }
} }
cl.executeKernel(pairValueKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize()); cl.executeKernel(pairValueKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
...@@ -4226,7 +4226,7 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl ...@@ -4226,7 +4226,7 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
donorKernel.setArg<cl::Buffer>(index++, acceptors->getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, acceptors->getDeviceBuffer());
donorKernel.setArg<cl::Buffer>(index++, donorBufferIndices->getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, donorBufferIndices->getDeviceBuffer());
donorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL); donorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
index += 2; // Periodic box size arguments are set when the kernel is executed. index += 5; // Periodic box size arguments are set when the kernel is executed.
if (globals != NULL) if (globals != NULL)
donorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); donorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
...@@ -4248,7 +4248,7 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl ...@@ -4248,7 +4248,7 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
acceptorKernel.setArg<cl::Buffer>(index++, acceptors->getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, acceptors->getDeviceBuffer());
acceptorKernel.setArg<cl::Buffer>(index++, acceptorBufferIndices->getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, acceptorBufferIndices->getDeviceBuffer());
acceptorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL); acceptorKernel.setArg(index++, 3*OpenCLContext::ThreadBlockSize*sizeof(mm_float4), NULL);
index += 2; // Periodic box size arguments are set when the kernel is executed. index += 5; // Periodic box size arguments are set when the kernel is executed.
if (globals != NULL) if (globals != NULL)
acceptorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) { for (int i = 0; i < (int) donorParams->getBuffers().size(); i++) {
...@@ -4262,11 +4262,9 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl ...@@ -4262,11 +4262,9 @@ double OpenCLCalcCustomHbondForceKernel::execute(ContextImpl& context, bool incl
for (int i = 0; i < (int) tabulatedFunctions.size(); i++) for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
acceptorKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer()); acceptorKernel.setArg<cl::Buffer>(index++, tabulatedFunctions[i]->getDeviceBuffer());
} }
setPeriodicBoxSizeArg(cl, donorKernel, 8); setPeriodicBoxArgs(cl, donorKernel, 8);
setInvPeriodicBoxSizeArg(cl, donorKernel, 9);
cl.executeKernel(donorKernel, max(numDonors, numAcceptors)); cl.executeKernel(donorKernel, max(numDonors, numAcceptors));
setPeriodicBoxSizeArg(cl, acceptorKernel, 8); setPeriodicBoxArgs(cl, acceptorKernel, 8);
setInvPeriodicBoxSizeArg(cl, acceptorKernel, 9);
cl.executeKernel(acceptorKernel, max(numDonors, numAcceptors)); cl.executeKernel(acceptorKernel, max(numDonors, numAcceptors));
return 0.0; return 0.0;
} }
...@@ -4853,7 +4851,7 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c ...@@ -4853,7 +4851,7 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
const vector<int>& atoms = iter->second; const vector<int>& atoms = iter->second;
string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]]; string deltaName = atomNames[atoms[0]]+atomNames[atoms[1]];
if (computedDeltas.count(deltaName) == 0) { if (computedDeltas.count(deltaName) == 0) {
compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize);\n"; compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName); computedDeltas.insert(deltaName);
} }
compute<<"real r_"<<deltaName<<" = sqrt(delta"<<deltaName<<".w);\n"; compute<<"real r_"<<deltaName<<" = sqrt(delta"<<deltaName<<".w);\n";
...@@ -4867,11 +4865,11 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c ...@@ -4867,11 +4865,11 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]]; string deltaName2 = atomNames[atoms[1]]+atomNames[atoms[2]];
string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]; string angleName = "angle_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]];
if (computedDeltas.count(deltaName1) == 0) { if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[0]]<<", periodicBoxSize, invPeriodicBoxSize);\n"; compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[0]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName1); computedDeltas.insert(deltaName1);
} }
if (computedDeltas.count(deltaName2) == 0) { if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[2]]<<", periodicBoxSize, invPeriodicBoxSize);\n"; compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[2]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName2); computedDeltas.insert(deltaName2);
} }
compute<<"real "<<angleName<<" = computeAngle(delta"<<deltaName1<<", delta"<<deltaName2<<");\n"; compute<<"real "<<angleName<<" = computeAngle(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
...@@ -4888,15 +4886,15 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c ...@@ -4888,15 +4886,15 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
string crossName2 = "cross_"+deltaName2+"_"+deltaName3; string crossName2 = "cross_"+deltaName2+"_"+deltaName3;
string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]]; string dihedralName = "dihedral_"+atomNames[atoms[0]]+atomNames[atoms[1]]+atomNames[atoms[2]]+atomNames[atoms[3]];
if (computedDeltas.count(deltaName1) == 0) { if (computedDeltas.count(deltaName1) == 0) {
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize);\n"; compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName1); computedDeltas.insert(deltaName1);
} }
if (computedDeltas.count(deltaName2) == 0) { if (computedDeltas.count(deltaName2) == 0) {
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize);\n"; compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[1]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName2); computedDeltas.insert(deltaName2);
} }
if (computedDeltas.count(deltaName3) == 0) { if (computedDeltas.count(deltaName3) == 0) {
compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[3]]<<", periodicBoxSize, invPeriodicBoxSize);\n"; compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[3]]<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);\n";
computedDeltas.insert(deltaName3); computedDeltas.insert(deltaName3);
} }
compute<<"real4 "<<crossName1<<" = computeCross(delta"<<deltaName1<<", delta"<<deltaName2<<");\n"; compute<<"real4 "<<crossName1<<" = computeCross(delta"<<deltaName1<<", delta"<<deltaName2<<");\n";
...@@ -5066,7 +5064,7 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c ...@@ -5066,7 +5064,7 @@ void OpenCLCalcCustomManyParticleForceKernel::initialize(const System& system, c
if (!centralParticleMode) { if (!centralParticleMode) {
for (int i = 1; i < particlesPerSet; i++) { for (int i = 1; i < particlesPerSet; i++) {
for (int j = i+1; j < particlesPerSet; j++) for (int j = i+1; j < particlesPerSet; j++)
verifyCutoff<<"includeInteraction &= (delta(pos"<<(i+1)<<", pos"<<(j+1)<<", periodicBoxSize, invPeriodicBoxSize).w < CUTOFF_SQUARED);\n"; verifyCutoff<<"includeInteraction &= (delta(pos"<<(i+1)<<", pos"<<(j+1)<<", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ).w < CUTOFF_SQUARED);\n";
} }
} }
} }
...@@ -5139,8 +5137,8 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo ...@@ -5139,8 +5137,8 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
forceKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
setPeriodicBoxSizeArg(cl, forceKernel, index++); setPeriodicBoxArgs(cl, forceKernel, index);
setInvPeriodicBoxSizeArg(cl, forceKernel, index++); index += 5;
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
forceKernel.setArg<cl::Buffer>(index++, neighbors->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, neighbors->getDeviceBuffer());
forceKernel.setArg<cl::Buffer>(index++, neighborStartIndex->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(index++, neighborStartIndex->getDeviceBuffer());
...@@ -5167,8 +5165,8 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo ...@@ -5167,8 +5165,8 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
// Set arguments for the block bounds kernel. // Set arguments for the block bounds kernel.
index = 0; index = 0;
setPeriodicBoxSizeArg(cl, blockBoundsKernel, index++); setPeriodicBoxArgs(cl, blockBoundsKernel, index);
setInvPeriodicBoxSizeArg(cl, blockBoundsKernel, index++); index += 5;
blockBoundsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer());
blockBoundsKernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer()); blockBoundsKernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer());
...@@ -5177,8 +5175,8 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo ...@@ -5177,8 +5175,8 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
// Set arguments for the neighbor list kernel. // Set arguments for the neighbor list kernel.
index = 0; index = 0;
setPeriodicBoxSizeArg(cl, neighborsKernel, index++); setPeriodicBoxArgs(cl, neighborsKernel, index);
setInvPeriodicBoxSizeArg(cl, neighborsKernel, index++); index += 5;
neighborsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(index++, blockCenter->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(index++, blockBoundingBox->getDeviceBuffer());
...@@ -5224,7 +5222,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo ...@@ -5224,7 +5222,7 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
int* numPairs = (int*) cl.getPinnedBuffer(); int* numPairs = (int*) cl.getPinnedBuffer();
cl::Event event; cl::Event event;
if (nonbondedMethod != NoCutoff) { if (nonbondedMethod != NoCutoff) {
neighborsKernel.setArg<int>(8, maxNeighborPairs); neighborsKernel.setArg<int>(11, maxNeighborPairs);
startIndicesKernel.setArg<int>(3, maxNeighborPairs); startIndicesKernel.setArg<int>(3, maxNeighborPairs);
copyPairsKernel.setArg<int>(3, maxNeighborPairs); copyPairsKernel.setArg<int>(3, maxNeighborPairs);
cl.executeKernel(blockBoundsKernel, cl.getNumAtomBlocks()); cl.executeKernel(blockBoundsKernel, cl.getNumAtomBlocks());
...@@ -5254,8 +5252,8 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo ...@@ -5254,8 +5252,8 @@ double OpenCLCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bo
maxNeighborPairs = (int) (1.1*(*numPairs)); maxNeighborPairs = (int) (1.1*(*numPairs));
neighborPairs = OpenCLArray::create<mm_int2>(cl, maxNeighborPairs, "customManyParticleNeighborPairs"); neighborPairs = OpenCLArray::create<mm_int2>(cl, maxNeighborPairs, "customManyParticleNeighborPairs");
neighbors = OpenCLArray::create<int>(cl, maxNeighborPairs, "customManyParticleNeighbors"); neighbors = OpenCLArray::create<int>(cl, maxNeighborPairs, "customManyParticleNeighbors");
forceKernel.setArg<cl::Buffer>(5, neighbors->getDeviceBuffer()); forceKernel.setArg<cl::Buffer>(8, neighbors->getDeviceBuffer());
neighborsKernel.setArg<cl::Buffer>(5, neighborPairs->getDeviceBuffer()); neighborsKernel.setArg<cl::Buffer>(8, neighborPairs->getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(0, neighborPairs->getDeviceBuffer()); copyPairsKernel.setArg<cl::Buffer>(0, neighborPairs->getDeviceBuffer());
copyPairsKernel.setArg<cl::Buffer>(1, neighbors->getDeviceBuffer()); copyPairsKernel.setArg<cl::Buffer>(1, neighbors->getDeviceBuffer());
continue; continue;
...@@ -6656,9 +6654,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, ...@@ -6656,9 +6654,7 @@ void OpenCLApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context,
kernel.setArg<cl_float>(0, (cl_float) scaleX); kernel.setArg<cl_float>(0, (cl_float) scaleX);
kernel.setArg<cl_float>(1, (cl_float) scaleY); kernel.setArg<cl_float>(1, (cl_float) scaleY);
kernel.setArg<cl_float>(2, (cl_float) scaleZ); kernel.setArg<cl_float>(2, (cl_float) scaleZ);
setPeriodicBoxSizeArg(cl, kernel, 4); setPeriodicBoxArgs(cl, kernel, 4);
setInvPeriodicBoxSizeArg(cl, kernel, 5);
setPeriodicBoxVecArgs(cl, kernel, 6);
cl.executeKernel(kernel, cl.getNumAtoms()); cl.executeKernel(kernel, cl.getNumAtoms());
for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++) for (int i = 0; i < (int) cl.getPosCellOffsets().size(); i++)
cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0); cl.getPosCellOffsets()[i] = mm_int4(0, 0, 0, 0);
......
...@@ -21,7 +21,8 @@ __kernel void computeN2Energy( ...@@ -21,7 +21,8 @@ __kernel void computeN2Energy(
__global const ushort2* exclusionTiles, __global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -60,7 +61,7 @@ __kernel void computeN2Energy( ...@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[atom2]; real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -110,7 +111,7 @@ __kernel void computeN2Energy( ...@@ -110,7 +111,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[atom2]; real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -266,10 +267,8 @@ __kernel void computeN2Energy( ...@@ -266,10 +267,8 @@ __kernel void computeN2Energy(
// box, then skip having to apply periodic boundary conditions later. // box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x]; real4 blockCenterX = blockCenter[x];
posq1.xyz -= floor((posq1.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
local_posq[get_local_id(0)].x -= floor((local_posq[get_local_id(0)].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(local_posq[get_local_id(0)], blockCenterX)
local_posq[get_local_id(0)].y -= floor((local_posq[get_local_id(0)].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
local_posq[get_local_id(0)].z -= floor((local_posq[get_local_id(0)].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
SYNC_WARPS; SYNC_WARPS;
unsigned int tj = tgx; unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
...@@ -310,7 +309,7 @@ __kernel void computeN2Energy( ...@@ -310,7 +309,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[atom2]; real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
......
...@@ -21,7 +21,8 @@ __kernel void computeN2Energy( ...@@ -21,7 +21,8 @@ __kernel void computeN2Energy(
__global const ushort2* exclusionTiles, __global const ushort2* exclusionTiles,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -60,7 +61,7 @@ __kernel void computeN2Energy( ...@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[j]; real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -126,7 +127,7 @@ __kernel void computeN2Energy( ...@@ -126,7 +127,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[j]; real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -272,7 +273,7 @@ __kernel void computeN2Energy( ...@@ -272,7 +273,7 @@ __kernel void computeN2Energy(
real4 blockCenterX = blockCenter[x]; real4 blockCenterX = blockCenter[x];
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++)
local_posq[tgx].xyz -= floor((local_posq[tgx].xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(local_posq[tgx], blockCenterX)
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) { for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
unsigned int atom1 = x*TILE_SIZE+tgx; unsigned int atom1 = x*TILE_SIZE+tgx;
real4 force = 0; real4 force = 0;
...@@ -332,7 +333,7 @@ __kernel void computeN2Energy( ...@@ -332,7 +333,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[j]; real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
......
...@@ -15,7 +15,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -15,7 +15,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
__local real* restrict local_value, __local real* restrict local_value,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[atom2]; real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -100,7 +101,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -100,7 +101,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[atom2]; real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -239,10 +240,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -239,10 +240,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
// box, then skip having to apply periodic boundary conditions later. // box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x]; real4 blockCenterX = blockCenter[x];
posq1.xyz -= floor((posq1.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
local_posq[get_local_id(0)].x -= floor((local_posq[get_local_id(0)].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(local_posq[get_local_id(0)], blockCenterX)
local_posq[get_local_id(0)].y -= floor((local_posq[get_local_id(0)].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
local_posq[get_local_id(0)].z -= floor((local_posq[get_local_id(0)].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
SYNC_WARPS; SYNC_WARPS;
unsigned int tj = tgx; unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
...@@ -278,7 +277,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -278,7 +277,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[atom2]; real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
......
...@@ -15,7 +15,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -15,7 +15,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
__local real* restrict local_value, __local real* restrict local_value,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#else #else
unsigned int numTiles unsigned int numTiles
#endif #endif
...@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[j]; real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -109,7 +110,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -109,7 +110,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[j]; real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -239,7 +240,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -239,7 +240,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 blockCenterX = blockCenter[x]; real4 blockCenterX = blockCenter[x];
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++)
local_posq[tgx].xyz -= floor((local_posq[tgx].xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(local_posq[tgx], blockCenterX)
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) { for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
unsigned int atom1 = x*TILE_SIZE+tgx; unsigned int atom1 = x*TILE_SIZE+tgx;
real value = 0; real value = 0;
...@@ -287,7 +288,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4* ...@@ -287,7 +288,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[j]; real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
......
...@@ -11,12 +11,10 @@ real4 delta(real4 vec1, real4 vec2) { ...@@ -11,12 +11,10 @@ real4 delta(real4 vec1, real4 vec2) {
* Compute the difference between two vectors, taking periodic boundary conditions into account * Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude. * and setting the fourth component to the squared magnitude.
*/ */
real4 deltaPeriodic(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize) { real4 deltaPeriodic(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0); real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
result.x -= floor(result.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(result)
result.y -= floor(result.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
result.z -= floor(result.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
result.w = result.x*result.x + result.y*result.y + result.z*result.z; result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result; return result;
...@@ -56,7 +54,8 @@ real4 computeCross(real4 vec1, real4 vec2) { ...@@ -56,7 +54,8 @@ real4 computeCross(real4 vec1, real4 vec2) {
* Compute forces on donors. * Compute forces on donors.
*/ */
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions, __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize __global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
real energy = 0; real energy = 0;
real4 f1 = (real4) 0; real4 f1 = (real4) 0;
...@@ -102,7 +101,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global ...@@ -102,7 +101,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
real4 a1 = posBuffer[3*index]; real4 a1 = posBuffer[3*index];
real4 a2 = posBuffer[3*index+1]; real4 a2 = posBuffer[3*index+1];
real4 a3 = posBuffer[3*index+2]; real4 a3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize); real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) { if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif #endif
...@@ -144,7 +143,8 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global ...@@ -144,7 +143,8 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
* Compute forces on acceptors. * Compute forces on acceptors.
*/ */
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions, __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize __global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
real4 f1 = (real4) 0; real4 f1 = (real4) 0;
real4 f2 = (real4) 0; real4 f2 = (real4) 0;
...@@ -189,7 +189,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo ...@@ -189,7 +189,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
real4 d1 = posBuffer[3*index]; real4 d1 = posBuffer[3*index];
real4 d2 = posBuffer[3*index+1]; real4 d2 = posBuffer[3*index+1];
real4 d3 = posBuffer[3*index+2]; real4 d3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize); real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) { if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif #endif
......
...@@ -14,12 +14,10 @@ inline void storeForce(int atom, real4 force, __global long* restrict forceBuffe ...@@ -14,12 +14,10 @@ inline void storeForce(int atom, real4 force, __global long* restrict forceBuffe
* Compute the difference between two vectors, taking periodic boundary conditions into account * Compute the difference between two vectors, taking periodic boundary conditions into account
* and setting the fourth component to the squared magnitude. * and setting the fourth component to the squared magnitude.
*/ */
inline real4 delta(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize) { inline real4 delta(real4 vec1, real4 vec2, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f); real4 result = (real4) (vec1.x-vec2.x, vec1.y-vec2.y, vec1.z-vec2.z, 0.0f);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
result.x -= floor(result.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(result)
result.y -= floor(result.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
result.z -= floor(result.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
result.w = result.x*result.x + result.y*result.y + result.z*result.z; result.w = result.x*result.x + result.y*result.y + result.z*result.z;
return result; return result;
...@@ -75,7 +73,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e ...@@ -75,7 +73,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
*/ */
__kernel void computeInteraction( __kernel void computeInteraction(
__global long* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global long* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq,
real4 periodicBoxSize, real4 invPeriodicBoxSize real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, __global const int* restrict neighbors, __global const int* restrict neighborStartIndex , __global const int* restrict neighbors, __global const int* restrict neighborStartIndex
#endif #endif
...@@ -138,16 +136,14 @@ __kernel void computeInteraction( ...@@ -138,16 +136,14 @@ __kernel void computeInteraction(
/** /**
* Find a bounding box for the atoms in each block. * Find a bounding box for the atoms in each block.
*/ */
__kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const real4* restrict posq, __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict numNeighborPairs) { __global const real4* restrict posq, __global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict numNeighborPairs) {
int index = get_global_id(0); int index = get_global_id(0);
int base = index*TILE_SIZE; int base = index*TILE_SIZE;
while (base < NUM_ATOMS) { while (base < NUM_ATOMS) {
real4 pos = posq[base]; real4 pos = posq[base];
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
pos.x -= floor(pos.x*invPeriodicBoxSize.x)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS(pos)
pos.y -= floor(pos.y*invPeriodicBoxSize.y)*periodicBoxSize.y;
pos.z -= floor(pos.z*invPeriodicBoxSize.z)*periodicBoxSize.z;
#endif #endif
real4 minPos = pos; real4 minPos = pos;
real4 maxPos = pos; real4 maxPos = pos;
...@@ -156,9 +152,7 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _ ...@@ -156,9 +152,7 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
pos = posq[i]; pos = posq[i];
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
real4 center = 0.5f*(maxPos+minPos); real4 center = 0.5f*(maxPos+minPos);
pos.x -= floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)
pos.y -= floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
minPos = (real4) (min(minPos.x,pos.x), min(minPos.y,pos.y), min(minPos.z,pos.z), 0); minPos = (real4) (min(minPos.x,pos.x), min(minPos.y,pos.y), min(minPos.z,pos.z), 0);
maxPos = (real4) (max(maxPos.x,pos.x), max(maxPos.y,pos.y), max(maxPos.z,pos.z), 0); maxPos = (real4) (max(maxPos.x,pos.x), max(maxPos.y,pos.y), max(maxPos.z,pos.z), 0);
...@@ -176,8 +170,8 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _ ...@@ -176,8 +170,8 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
/** /**
* Find a list of neighbors for each atom. * Find a list of neighbors for each atom.
*/ */
__kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const real4* restrict posq, __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global const real4* restrict blockCenter, __global const real4* restrict blockBoundingBox, __global int2* restrict neighborPairs, __global const real4* restrict posq, __global const real4* restrict blockCenter, __global const real4* restrict blockBoundingBox, __global int2* restrict neighborPairs,
__global int* restrict numNeighborPairs, __global int* restrict numNeighborsForAtom, int maxNeighborPairs __global int* restrict numNeighborPairs, __global int* restrict numNeighborsForAtom, int maxNeighborPairs
#ifdef USE_EXCLUSIONS #ifdef USE_EXCLUSIONS
, __global int* restrict exclusions, __global int* restrict exclusionStartIndex , __global int* restrict exclusions, __global int* restrict exclusionStartIndex
...@@ -212,9 +206,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g ...@@ -212,9 +206,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
real4 blockSize2 = blockBoundingBox[block2]; real4 blockSize2 = blockBoundingBox[block2];
real4 blockDelta = blockCenter1-blockCenter2; real4 blockDelta = blockCenter1-blockCenter2;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
blockDelta.x -= floor(blockDelta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(blockDelta)
blockDelta.y -= floor(blockDelta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
blockDelta.z -= floor(blockDelta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
blockDelta.x = max((real) 0, fabs(blockDelta.x)-blockSize1.x-blockSize2.x); blockDelta.x = max((real) 0, fabs(blockDelta.x)-blockSize1.x-blockSize2.x);
blockDelta.y = max((real) 0, fabs(blockDelta.y)-blockSize1.y-blockSize2.y); blockDelta.y = max((real) 0, fabs(blockDelta.y)-blockSize1.y-blockSize2.y);
...@@ -243,7 +235,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g ...@@ -243,7 +235,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
// Decide whether to include this atom pair in the neighbor list. // Decide whether to include this atom pair in the neighbor list.
real4 atomDelta = delta(pos1, pos2, periodicBoxSize, invPeriodicBoxSize); real4 atomDelta = delta(pos1, pos2, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ);
#ifdef USE_CENTRAL_PARTICLE #ifdef USE_CENTRAL_PARTICLE
bool includeAtom = (atom2 != atom1 && atom2 < NUM_ATOMS && atomDelta.w < CUTOFF_SQUARED); bool includeAtom = (atom2 != atom1 && atom2 < NUM_ATOMS && atomDelta.w < CUTOFF_SQUARED);
#else #else
......
...@@ -42,8 +42,8 @@ __kernel void computeInteractionGroups( ...@@ -42,8 +42,8 @@ __kernel void computeInteractionGroups(
#else #else
__global real4* restrict forceBuffers, __global real4* restrict forceBuffers,
#endif #endif
__global real* restrict energyBuffer, __global const real4* restrict posq, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict groupData,
__global const int4* restrict groupData, real4 periodicBoxSize, real4 invPeriodicBoxSize real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
const unsigned int totalWarps = get_global_size(0)/TILE_SIZE; const unsigned int totalWarps = get_global_size(0)/TILE_SIZE;
const unsigned int warp = get_global_id(0)/TILE_SIZE; // global warpIndex const unsigned int warp = get_global_id(0)/TILE_SIZE; // global warpIndex
...@@ -82,7 +82,7 @@ __kernel void computeInteractionGroups( ...@@ -82,7 +82,7 @@ __kernel void computeInteractionGroups(
posq2 = (real4) (localData[localIndex].x, localData[localIndex].y, localData[localIndex].z, localData[localIndex].q); posq2 = (real4) (localData[localIndex].x, localData[localIndex].y, localData[localIndex].z, localData[localIndex].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
......
...@@ -22,7 +22,8 @@ __kernel void computeBornSum( ...@@ -22,7 +22,8 @@ __kernel void computeBornSum(
__global const real4* restrict posq, __global const float2* restrict global_params, __global const real4* restrict posq, __global const float2* restrict global_params,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
#endif #endif
...@@ -58,7 +59,7 @@ __kernel void computeBornSum( ...@@ -58,7 +59,7 @@ __kernel void computeBornSum(
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 delta = (real4) (localData[tbx+j].x-posq1.x, localData[tbx+j].y-posq1.y, localData[tbx+j].z-posq1.z, 0); real4 delta = (real4) (localData[tbx+j].x-posq1.x, localData[tbx+j].y-posq1.y, localData[tbx+j].z-posq1.z, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -105,7 +106,7 @@ __kernel void computeBornSum( ...@@ -105,7 +106,7 @@ __kernel void computeBornSum(
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
real4 delta = (real4) (localData[tbx+tj].x-posq1.x, localData[tbx+tj].y-posq1.y, localData[tbx+tj].z-posq1.z, 0); real4 delta = (real4) (localData[tbx+tj].x-posq1.x, localData[tbx+tj].y-posq1.y, localData[tbx+tj].z-posq1.z, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -256,10 +257,8 @@ __kernel void computeBornSum( ...@@ -256,10 +257,8 @@ __kernel void computeBornSum(
// box, then skip having to apply periodic boundary conditions later. // box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x]; real4 blockCenterX = blockCenter[x];
posq1.xyz -= floor((posq1.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
localData[get_local_id(0)].x -= floor((localData[get_local_id(0)].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[get_local_id(0)], blockCenterX)
localData[get_local_id(0)].y -= floor((localData[get_local_id(0)].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[get_local_id(0)].z -= floor((localData[get_local_id(0)].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
SYNC_WARPS; SYNC_WARPS;
unsigned int tj = tgx; unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
...@@ -307,7 +306,7 @@ __kernel void computeBornSum( ...@@ -307,7 +306,7 @@ __kernel void computeBornSum(
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
real4 delta = (real4) (localData[tbx+tj].x-posq1.x, localData[tbx+tj].y-posq1.y, localData[tbx+tj].z-posq1.z, 0); real4 delta = (real4) (localData[tbx+tj].x-posq1.x, localData[tbx+tj].y-posq1.y, localData[tbx+tj].z-posq1.z, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[tbx+tj]; int atom2 = atomIndices[tbx+tj];
...@@ -391,7 +390,8 @@ __kernel void computeGBSAForce1( ...@@ -391,7 +390,8 @@ __kernel void computeGBSAForce1(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else #else
unsigned int numTiles, unsigned int numTiles,
#endif #endif
...@@ -430,7 +430,7 @@ __kernel void computeGBSAForce1( ...@@ -430,7 +430,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z, localData[tbx+j].q); real4 posq2 = (real4) (localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z, localData[tbx+j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -485,7 +485,7 @@ __kernel void computeGBSAForce1( ...@@ -485,7 +485,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q); real4 posq2 = (real4) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -645,10 +645,8 @@ __kernel void computeGBSAForce1( ...@@ -645,10 +645,8 @@ __kernel void computeGBSAForce1(
// box, then skip having to apply periodic boundary conditions later. // box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x]; real4 blockCenterX = blockCenter[x];
posq1.xyz -= floor((posq1.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
localData[get_local_id(0)].x -= floor((localData[get_local_id(0)].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[get_local_id(0)], blockCenterX)
localData[get_local_id(0)].y -= floor((localData[get_local_id(0)].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[get_local_id(0)].z -= floor((localData[get_local_id(0)].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
SYNC_WARPS; SYNC_WARPS;
unsigned int tj = tgx; unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
...@@ -700,7 +698,7 @@ __kernel void computeGBSAForce1( ...@@ -700,7 +698,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q); real4 posq2 = (real4) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z; real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2014 Stanford University and the Authors. * * Portions copyright (c) 2014-2015 Stanford University and the Authors. *
* Authors: Peter Eastman * * Authors: Peter Eastman *
* Contributors: * * Contributors: *
* * * *
...@@ -55,7 +55,17 @@ const double TOL = 1e-5; ...@@ -55,7 +55,17 @@ const double TOL = 1e-5;
OpenCLPlatform platform; OpenCLPlatform platform;
void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& positions, const vector<const int*>& expectedSets, double boxSize) { Vec3 computeDelta(const Vec3& pos1, const Vec3& pos2, bool periodic, const Vec3* periodicBoxVectors) {
Vec3 diff = pos1-pos2;
if (periodic) {
diff -= periodicBoxVectors[2]*floor(diff[2]/periodicBoxVectors[2][2]+0.5);
diff -= periodicBoxVectors[1]*floor(diff[1]/periodicBoxVectors[1][1]+0.5);
diff -= periodicBoxVectors[0]*floor(diff[0]/periodicBoxVectors[0][0]+0.5);
}
return diff;
}
void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& positions, const vector<const int*>& expectedSets, double boxSize, bool triclinic) {
// Create a System and Context. // Create a System and Context.
int numParticles = force->getNumParticles(); int numParticles = force->getNumParticles();
...@@ -63,7 +73,18 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p ...@@ -63,7 +73,18 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
System system; System system;
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
system.addParticle(1.0); system.addParticle(1.0);
system.setDefaultPeriodicBoxVectors(Vec3(boxSize, 0, 0), Vec3(0, boxSize, 0), Vec3(0, 0, boxSize)); Vec3 boxVectors[3];
if (triclinic) {
boxVectors[0] = Vec3(boxSize, 0, 0);
boxVectors[1] = Vec3(0.2*boxSize, boxSize, 0);
boxVectors[2] = Vec3(-0.3*boxSize, -0.1*boxSize, boxSize);
}
else {
boxVectors[0] = Vec3(boxSize, 0, 0);
boxVectors[1] = Vec3(0, boxSize, 0);
boxVectors[2] = Vec3(0, 0, boxSize);
}
system.setDefaultPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
system.addForce(force); system.addForce(force);
VerletIntegrator integrator(0.001); VerletIntegrator integrator(0.001);
Context context(system, integrator, platform); Context context(system, integrator, platform);
...@@ -74,20 +95,14 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p ...@@ -74,20 +95,14 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
// See if the energy matches the expected value. // See if the energy matches the expected value.
double expectedEnergy = 0; double expectedEnergy = 0;
bool periodic = (nonbondedMethod == CustomManyParticleForce::CutoffPeriodic);
for (int i = 0; i < (int) expectedSets.size(); i++) { for (int i = 0; i < (int) expectedSets.size(); i++) {
int p1 = expectedSets[i][0]; int p1 = expectedSets[i][0];
int p2 = expectedSets[i][1]; int p2 = expectedSets[i][1];
int p3 = expectedSets[i][2]; int p3 = expectedSets[i][2];
Vec3 d12 = positions[p2]-positions[p1]; Vec3 d12 = computeDelta(positions[p2], positions[p1], periodic, boxVectors);
Vec3 d13 = positions[p3]-positions[p1]; Vec3 d13 = computeDelta(positions[p3], positions[p1], periodic, boxVectors);
Vec3 d23 = positions[p3]-positions[p2]; Vec3 d23 = computeDelta(positions[p3], positions[p2], periodic, boxVectors);
if (nonbondedMethod == CustomManyParticleForce::CutoffPeriodic) {
for (int j = 0; j < 3; j++) {
d12[j] -= floor(d12[j]/boxSize+0.5f)*boxSize;
d13[j] -= floor(d13[j]/boxSize+0.5f)*boxSize;
d23[j] -= floor(d23[j]/boxSize+0.5f)*boxSize;
}
}
double r12 = sqrt(d12.dot(d12)); double r12 = sqrt(d12.dot(d12));
double r13 = sqrt(d13.dot(d13)); double r13 = sqrt(d13.dot(d13));
double r23 = sqrt(d23.dot(d23)); double r23 = sqrt(d23.dot(d23));
...@@ -210,7 +225,7 @@ void testNoCutoff() { ...@@ -210,7 +225,7 @@ void testNoCutoff() {
positions.push_back(Vec3(0.4, 0, -0.8)); positions.push_back(Vec3(0.4, 0, -0.8));
int sets[4][3] = {{0,1,2}, {1,2,3}, {2,3,0}, {3,0,1}}; int sets[4][3] = {{0,1,2}, {1,2,3}, {2,3,0}, {3,0,1}};
vector<const int*> expectedSets(&sets[0], &sets[4]); vector<const int*> expectedSets(&sets[0], &sets[4]);
validateAxilrodTeller(force, positions, expectedSets, 2.0); validateAxilrodTeller(force, positions, expectedSets, 2.0, false);
} }
void testCutoff() { void testCutoff() {
...@@ -235,7 +250,7 @@ void testCutoff() { ...@@ -235,7 +250,7 @@ void testCutoff() {
positions.push_back(Vec3(0.2, 0.5, -0.1)); positions.push_back(Vec3(0.2, 0.5, -0.1));
int sets[7][3] = {{0,1,2}, {0,1,3}, {0,1,4}, {0,2,4}, {0,3,4}, {1,2,4}, {1,3,4}}; int sets[7][3] = {{0,1,2}, {0,1,3}, {0,1,4}, {0,2,4}, {0,3,4}, {1,2,4}, {1,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[7]); vector<const int*> expectedSets(&sets[0], &sets[7]);
validateAxilrodTeller(force, positions, expectedSets, 2.0); validateAxilrodTeller(force, positions, expectedSets, 2.0, false);
} }
void testPeriodic() { void testPeriodic() {
...@@ -261,7 +276,33 @@ void testPeriodic() { ...@@ -261,7 +276,33 @@ void testPeriodic() {
double boxSize = 2.1; double boxSize = 2.1;
int sets[5][3] = {{0,1,3}, {0,1,4}, {0,2,4}, {0,3,4}, {1,3,4}}; int sets[5][3] = {{0,1,3}, {0,1,4}, {0,2,4}, {0,3,4}, {1,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[5]); vector<const int*> expectedSets(&sets[0], &sets[5]);
validateAxilrodTeller(force, positions, expectedSets, boxSize); validateAxilrodTeller(force, positions, expectedSets, boxSize, false);
}
void testTriclinic() {
CustomManyParticleForce* force = new CustomManyParticleForce(3,
"C*(1+3*cos(theta1)*cos(theta2)*cos(theta3))/(r12*r13*r23)^3;"
"theta1=angle(p1,p2,p3); theta2=angle(p2,p3,p1); theta3=angle(p3,p1,p2);"
"r12=distance(p1,p2); r13=distance(p1,p3); r23=distance(p2,p3)");
force->addGlobalParameter("C", 1.5);
force->setNonbondedMethod(CustomManyParticleForce::CutoffPeriodic);
force->setCutoffDistance(1.05);
vector<double> params;
force->addParticle(params);
force->addParticle(params);
force->addParticle(params);
force->addParticle(params);
force->addParticle(params);
vector<Vec3> positions;
positions.push_back(Vec3(0, 0, 0));
positions.push_back(Vec3(1, 0, 0));
positions.push_back(Vec3(0, 1.1, 0.3));
positions.push_back(Vec3(0.4, 0, -0.8));
positions.push_back(Vec3(0.2, 0.5, -0.1));
double boxSize = 2.1;
int sets[4][3] = {{0,1,3}, {0,1,4}, {0,3,4}, {1,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[4]);
validateAxilrodTeller(force, positions, expectedSets, boxSize, true);
} }
void testExclusions() { void testExclusions() {
...@@ -286,7 +327,7 @@ void testExclusions() { ...@@ -286,7 +327,7 @@ void testExclusions() {
force->addExclusion(0, 3); force->addExclusion(0, 3);
int sets[5][3] = {{0,1,4}, {1,2,3}, {1,2,4}, {1,3,4}, {2,3,4}}; int sets[5][3] = {{0,1,4}, {1,2,3}, {1,2,4}, {1,3,4}, {2,3,4}};
vector<const int*> expectedSets(&sets[0], &sets[5]); vector<const int*> expectedSets(&sets[0], &sets[5]);
validateAxilrodTeller(force, positions, expectedSets, 2.0); validateAxilrodTeller(force, positions, expectedSets, 2.0, false);
} }
void testAllTerms() { void testAllTerms() {
...@@ -672,6 +713,7 @@ int main(int argc, char* argv[]) { ...@@ -672,6 +713,7 @@ int main(int argc, char* argv[]) {
testNoCutoff(); testNoCutoff();
testCutoff(); testCutoff();
testPeriodic(); testPeriodic();
testTriclinic();
testExclusions(); testExclusions();
testAllTerms(); testAllTerms();
testParameters(); testParameters();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment