Commit cd874b2b authored by peastman's avatar peastman
Browse files

Merged changes from main branch

parents a783b996 b84e22ba
......@@ -25,6 +25,7 @@
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h"
#include <algorithm>
#include <iostream>
using namespace std;
using namespace OpenMM;
......@@ -213,7 +214,6 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType;
fvec4 blockCenter;
if (!periodic) {
......@@ -263,7 +263,6 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxn(int blockIndex, float* forces
template <int PERIODIC_TYPE>
void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize, const fvec4& blockCenter) {
// Load the positions and parameters of the atoms in the block.
const int* blockAtom = &neighborList->getSortedAtoms()[4*blockIndex];
fvec4 blockAtomPosq[4];
fvec4 blockAtomForceX(0.0f), blockAtomForceY(0.0f), blockAtomForceZ(0.0f);
......@@ -278,9 +277,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4 blockAtomCharge = fvec4(ONE_4PI_EPS0)*fvec4(blockAtomPosq[0][3], blockAtomPosq[1][3], blockAtomPosq[2][3], blockAtomPosq[3][3]);
fvec4 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first);
fvec4 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second);
fvec4 C6s(C6params[blockAtom[0]], C6params[blockAtom[1]], C6params[blockAtom[2]], C6params[blockAtom[3]]);
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
// Loop over neighbors for this block.
const vector<int>& neighbors = neighborList->getBlockNeighbors(blockIndex);
......@@ -318,7 +318,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec4 sig2 = inverseR*sig;
sig2 *= sig2;
fvec4 sig6 = sig2*sig2*sig2;
fvec4 epsSig6 = blockAtomEpsilon*atomEpsilon*sig6;
fvec4 eps = blockAtomEpsilon*atomEpsilon;
fvec4 epsSig6 = eps*sig6;
dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f);
if (useSwitch) {
......@@ -328,6 +329,17 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR = switchValue*dEdR - energy*switchDeriv*r;
energy *= switchValue;
}
if (ljpme) {
fvec4 C6ij = C6s*C6params[atom];
fvec4 inverseR2 = inverseR*inverseR;
fvec4 mysig2 = sig*sig;
fvec4 mysig6 = mysig2*mysig2*mysig2;
fvec4 emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
fvec4 potentialShift = eps*(1.0f-mysig6*inverseRcut6)*mysig6*inverseRcut6 - C6ij*inverseRcut6Expterm;
dEdR += 6.0f*C6ij*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
energy += emult + potentialShift;
}
}
else {
energy = 0.0f;
......@@ -362,7 +374,7 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
}
// Record the forces on the block atoms.
fvec4 f[4] = {blockAtomForceX, blockAtomForceY, blockAtomForceZ, 0.0f};
transpose(f[0], f[1], f[2], f[3]);
for (int j = 0; j < 4; j++)
......@@ -420,3 +432,30 @@ fvec4 CpuNonbondedForceVec4::ewaldScaleFunction(const fvec4& x) {
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
fvec4 CpuNonbondedForceVec4::exptermsApprox(const fvec4& r) {
fvec4 r1 = r*exptermsDXInv;
ivec4 index = min(floor(r1), NUM_TABLE_POINTS);
fvec4 coeff2 = r1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t1(&exptermsTable[index[0]]);
fvec4 t2(&exptermsTable[index[1]]);
fvec4 t3(&exptermsTable[index[2]]);
fvec4 t4(&exptermsTable[index[3]]);
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
fvec4 CpuNonbondedForceVec4::dExptermsApprox(const fvec4& r) {
fvec4 r1 = r*exptermsDXInv;
ivec4 index = min(floor(r1), NUM_TABLE_POINTS);
fvec4 coeff2 = r1-index;
fvec4 coeff1 = 1.0f-coeff2;
fvec4 t1(&dExptermsTable[index[0]]);
fvec4 t2(&dExptermsTable[index[1]]);
fvec4 t3(&dExptermsTable[index[2]]);
fvec4 t4(&dExptermsTable[index[3]]);
transpose(t1, t2, t3, t4);
return coeff1*t1 + coeff2*t2;
}
......@@ -27,6 +27,7 @@
#include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h"
#include <algorithm>
#include <iostream>
using namespace std;
using namespace OpenMM;
......@@ -80,8 +81,7 @@ CpuNonbondedForceVec8::CpuNonbondedForceVec8() {
enum PeriodicType {NoPeriodic, PeriodicPerAtom, PeriodicPerInteraction, PeriodicTriclinic};
void CpuNonbondedForceVec8::calculateBlockIxn(int blockIndex, float* forces, double* totalEnergy, const fvec4& boxSize, const fvec4& invBoxSize) {
// Determine whether we need to apply periodic boundary conditions.
// Determine whether we need to apply periodic boundary conditions.
PeriodicType periodicType;
fvec4 blockCenter;
if (!periodic) {
......@@ -308,6 +308,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
blockAtomCharge *= ONE_4PI_EPS0;
fvec8 blockAtomSigma(atomParameters[blockAtom[0]].first, atomParameters[blockAtom[1]].first, atomParameters[blockAtom[2]].first, atomParameters[blockAtom[3]].first, atomParameters[blockAtom[4]].first, atomParameters[blockAtom[5]].first, atomParameters[blockAtom[6]].first, atomParameters[blockAtom[7]].first);
fvec8 blockAtomEpsilon(atomParameters[blockAtom[0]].second, atomParameters[blockAtom[1]].second, atomParameters[blockAtom[2]].second, atomParameters[blockAtom[3]].second, atomParameters[blockAtom[4]].second, atomParameters[blockAtom[5]].second, atomParameters[blockAtom[6]].second, atomParameters[blockAtom[7]].second);
fvec8 C6s(C6params[blockAtom[0]], C6params[blockAtom[1]], C6params[blockAtom[2]], C6params[blockAtom[3]], C6params[blockAtom[4]], C6params[blockAtom[5]], C6params[blockAtom[6]], C6params[blockAtom[7]]);
const bool needPeriodic = (PERIODIC_TYPE == PeriodicPerInteraction || PERIODIC_TYPE == PeriodicTriclinic);
const float invSwitchingInterval = 1/(cutoffDistance-switchingDistance);
......@@ -348,7 +349,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
fvec8 sig2 = inverseR*sig;
sig2 *= sig2;
fvec8 sig6 = sig2*sig2*sig2;
fvec8 epsSig6 = blockAtomEpsilon*atomEpsilon*sig6;
fvec8 eps = blockAtomEpsilon*atomEpsilon;
fvec8 epsSig6 = eps*sig6;
dEdR = epsSig6*(12.0f*sig6 - 6.0f);
energy = epsSig6*(sig6-1.0f);
if (useSwitch) {
......@@ -358,6 +360,17 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
dEdR = switchValue*dEdR - energy*switchDeriv*r;
energy *= switchValue;
}
if (ljpme) {
fvec8 C6ij = C6s*C6params[atom];
fvec8 inverseR2 = inverseR*inverseR;
fvec8 mysig2 = sig*sig;
fvec8 mysig6 = mysig2*mysig2*mysig2;
fvec8 emult = C6ij*inverseR2*inverseR2*inverseR2*exptermsApprox(r);
fvec8 potentialShift = eps*(1.0f-mysig6*inverseRcut6)*mysig6*inverseRcut6 - C6ij*inverseRcut6Expterm;
dEdR += 6.0f*C6ij*inverseR2*inverseR2*inverseR2*dExptermsApprox(r);
energy += emult + potentialShift;
}
}
else {
energy = 0.0f;
......@@ -464,4 +477,45 @@ fvec8 CpuNonbondedForceVec8::ewaldScaleFunction(const fvec8& x) {
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
fvec8 CpuNonbondedForceVec8::exptermsApprox(const fvec8& r) {
fvec8 r1 = r*exptermsDXInv;
ivec8 index = min(floor(r1), NUM_TABLE_POINTS);
fvec8 coeff2 = r1-index;
fvec8 coeff1 = 1.0f-coeff2;
ivec4 indexLower = index.lowerVec();
ivec4 indexUpper = index.upperVec();
fvec4 t1(&exptermsTable[indexLower[0]]);
fvec4 t2(&exptermsTable[indexLower[1]]);
fvec4 t3(&exptermsTable[indexLower[2]]);
fvec4 t4(&exptermsTable[indexLower[3]]);
fvec4 t5(&exptermsTable[indexUpper[0]]);
fvec4 t6(&exptermsTable[indexUpper[1]]);
fvec4 t7(&exptermsTable[indexUpper[2]]);
fvec4 t8(&exptermsTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
fvec8 CpuNonbondedForceVec8::dExptermsApprox(const fvec8& r) {
fvec8 r1 = r*exptermsDXInv;
ivec8 index = min(floor(r1), NUM_TABLE_POINTS);
fvec8 coeff2 = r1-index;
fvec8 coeff1 = 1.0f-coeff2;
ivec4 indexLower = index.lowerVec();
ivec4 indexUpper = index.upperVec();
fvec4 t1(&dExptermsTable[indexLower[0]]);
fvec4 t2(&dExptermsTable[indexLower[1]]);
fvec4 t3(&dExptermsTable[indexLower[2]]);
fvec4 t4(&dExptermsTable[indexLower[3]]);
fvec4 t5(&dExptermsTable[indexUpper[0]]);
fvec4 t6(&dExptermsTable[indexUpper[1]]);
fvec4 t7(&dExptermsTable[indexUpper[2]]);
fvec4 t8(&dExptermsTable[indexUpper[3]]);
fvec8 s1, s2, s3, s4;
transpose(t1, t2, t3, t4, t5, t6, t7, t8, s1, s2, s3, s4);
return coeff1*s1 + coeff2*s2;
}
#endif
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013-2015 Stanford University and the Authors. *
* Portions copyright (c) 2013-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -35,52 +35,6 @@
using namespace OpenMM;
using namespace std;
class CpuSETTLE::ApplyToPositionsTask : public ThreadPool::Task {
public:
ApplyToPositionsTask(vector<OpenMM::Vec3>& atomCoordinates, vector<OpenMM::Vec3>& atomCoordinatesP, vector<double>& inverseMasses,
double tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), atomCoordinatesP(atomCoordinatesP),
inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
gmx_atomic_set(&atomicCounter, 0);
}
void execute(ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
}
}
vector<OpenMM::Vec3>& atomCoordinates;
vector<OpenMM::Vec3>& atomCoordinatesP;
vector<double>& inverseMasses;
double tolerance;
vector<ReferenceSETTLEAlgorithm*>& threadSettle;
gmx_atomic_t atomicCounter;
};
class CpuSETTLE::ApplyToVelocitiesTask : public ThreadPool::Task {
public:
ApplyToVelocitiesTask(vector<OpenMM::Vec3>& atomCoordinates, vector<OpenMM::Vec3>& velocities, vector<double>& inverseMasses,
double tolerance, vector<ReferenceSETTLEAlgorithm*>& threadSettle) : atomCoordinates(atomCoordinates), velocities(velocities),
inverseMasses(inverseMasses), tolerance(tolerance), threadSettle(threadSettle) {
gmx_atomic_set(&atomicCounter, 0);
}
void execute(ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
}
}
vector<OpenMM::Vec3>& atomCoordinates;
vector<OpenMM::Vec3>& velocities;
vector<double>& inverseMasses;
double tolerance;
vector<ReferenceSETTLEAlgorithm*>& threadSettle;
gmx_atomic_t atomicCounter;
};
CpuSETTLE::CpuSETTLE(const System& system, const ReferenceSETTLEAlgorithm& settle, ThreadPool& threads) : threads(threads) {
int numBlocks = 10*threads.getNumThreads();
int numClusters = settle.getNumClusters();
......@@ -107,13 +61,29 @@ CpuSETTLE::~CpuSETTLE() {
}
void CpuSETTLE::apply(vector<OpenMM::Vec3>& atomCoordinates, vector<OpenMM::Vec3>& atomCoordinatesP, vector<double>& inverseMasses, double tolerance) {
ApplyToPositionsTask task(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance, threadSettle);
threads.execute(task);
gmx_atomic_t atomicCounter;
gmx_atomic_set(&atomicCounter, 0);
threads.execute([&] (ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->apply(atomCoordinates, atomCoordinatesP, inverseMasses, tolerance);
}
});
threads.waitForThreads();
}
void CpuSETTLE::applyToVelocities(vector<OpenMM::Vec3>& atomCoordinates, vector<OpenMM::Vec3>& velocities, vector<double>& inverseMasses, double tolerance) {
ApplyToVelocitiesTask task(atomCoordinates, velocities, inverseMasses, tolerance, threadSettle);
threads.execute(task);
gmx_atomic_t atomicCounter;
gmx_atomic_set(&atomicCounter, 0);
threads.execute([&] (ThreadPool& threads, int threadIndex) {
while (true) {
int index = gmx_atomic_fetch_add(&atomicCounter, 1);
if (index >= threadSettle.size())
break;
threadSettle[index]->applyToVelocities(atomCoordinates, velocities, inverseMasses, tolerance);
}
});
threads.waitForThreads();
}
......@@ -16,7 +16,6 @@ ENDFOREACH(file)
ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME}_static ${PTHREADS_LIB_STATIC})
#-DPTW32_STATIC_LIB only works for the windows pthreads.
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB")
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_LINK_FLAGS}" COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CPU_BUILDING_STATIC_LIBRARY")
INSTALL_TARGETS(/lib/plugins RUNTIME_DIRECTORY /lib/plugins ${STATIC_TARGET})
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CpuTests.h"
#include "TestDispersionPME.h"
void runPlatformTests() {
}
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -494,6 +494,10 @@ public:
CudaNonbondedUtilities& getNonbondedUtilities() {
return *nonbonded;
}
/**
* Set the particle charges. These are packed into the fourth element of the posq array.
*/
void setCharges(const std::vector<double>& charges);
/**
* Get the thread used by this context for executing parallel computations.
*/
......@@ -577,6 +581,12 @@ public:
* and order to be revalidated.
*/
void invalidateMolecules();
/**
* Mark that the current molecule definitions from one particular force (and hence the atom order)
* may be invalid. This should be called whenever force field parameters change. It will cause the
* definitions and order to be revalidated.
*/
bool invalidateMolecules(CudaForceInfo* force);
private:
/**
* Compute a sorted list of device indices in decreasing order of desirability
......@@ -626,6 +636,7 @@ private:
CUfunction clearFourBuffersKernel;
CUfunction clearFiveBuffersKernel;
CUfunction clearSixBuffersKernel;
CUfunction setChargesKernel;
std::vector<CudaForceInfo*> forces;
std::vector<Molecule> molecules;
std::vector<MoleculeGroup> moleculeGroups;
......@@ -638,6 +649,7 @@ private:
CudaArray* energyBuffer;
CudaArray* energyParamDerivBuffer;
CudaArray* atomIndexDevice;
CudaArray* chargeBuffer;
std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -198,7 +198,6 @@ public:
*/
void loadCheckpoint(ContextImpl& context, std::istream& stream);
private:
class GetPositionsTask;
CudaContext& cu;
};
......@@ -292,9 +291,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force);
private:
class ForceInfo;
int numBonds;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaArray* params;
};
......@@ -332,9 +333,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomBondForce& force);
private:
class ForceInfo;
int numBonds;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaParameterSet* params;
CudaArray* globals;
......@@ -375,9 +378,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force);
private:
class ForceInfo;
int numAngles;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaArray* params;
};
......@@ -415,9 +420,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force);
private:
class ForceInfo;
int numAngles;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaParameterSet* params;
CudaArray* globals;
......@@ -458,9 +465,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaArray* params;
};
......@@ -498,9 +507,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaArray* params1;
CudaArray* params2;
......@@ -539,9 +550,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
std::vector<int2> mapPositionsVec;
CudaArray* coefficients;
......@@ -582,9 +595,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaParameterSet* params;
CudaArray* globals;
......@@ -599,7 +614,8 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
CudaCalcNonbondedForceKernel(std::string name, const Platform& platform, CudaContext& cu, const System& system) : CalcNonbondedForceKernel(name, platform),
cu(cu), hasInitializedFFT(false), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), directPmeGrid(NULL), reciprocalPmeGrid(NULL),
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), pmeio(NULL) {
pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeDispersionBsplineModuliX(NULL), pmeDispersionBsplineModuliY(NULL),
pmeDispersionBsplineModuliZ(NULL), pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), dispersionFft(NULL), fft(NULL), pmeio(NULL) {
}
~CudaCalcNonbondedForceKernel();
/**
......@@ -636,6 +652,15 @@ public:
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class SortTrait : public CudaSort::SortTrait {
int getDataSize() const {return 8;}
......@@ -647,12 +672,14 @@ private:
const char* getMaxValue() const {return "make_int2(2147483647, 2147483647)";}
const char* getSortKey() const {return "value.y";}
};
class ForceInfo;
class PmeIO;
class PmePreComputation;
class PmePostComputation;
class SyncStreamPreComputation;
class SyncStreamPostComputation;
CudaContext& cu;
ForceInfo* info;
bool hasInitializedFFT;
CudaArray* sigmaEpsilon;
CudaArray* exceptionParams;
......@@ -662,6 +689,9 @@ private:
CudaArray* pmeBsplineModuliX;
CudaArray* pmeBsplineModuliY;
CudaArray* pmeBsplineModuliZ;
CudaArray* pmeDispersionBsplineModuliX;
CudaArray* pmeDispersionBsplineModuliY;
CudaArray* pmeDispersionBsplineModuliZ;
CudaArray* pmeAtomRange;
CudaArray* pmeAtomGridIndex;
CudaArray* pmeEnergyBuffer;
......@@ -673,20 +703,29 @@ private:
CudaFFT3D* fft;
cufftHandle fftForward;
cufftHandle fftBackward;
CudaFFT3D* dispersionFft;
cufftHandle dispersionFftForward;
cufftHandle dispersionFftBackward;
CUfunction ewaldSumsKernel;
CUfunction ewaldForcesKernel;
CUfunction pmeGridIndexKernel;
CUfunction pmeDispersionGridIndexKernel;
CUfunction pmeSpreadChargeKernel;
CUfunction pmeDispersionSpreadChargeKernel;
CUfunction pmeFinishSpreadChargeKernel;
CUfunction pmeDispersionFinishSpreadChargeKernel;
CUfunction pmeEvalEnergyKernel;
CUfunction pmeEvalDispersionEnergyKernel;
CUfunction pmeConvolutionKernel;
CUfunction pmeDispersionConvolutionKernel;
CUfunction pmeInterpolateForceKernel;
std::map<std::string, std::string> pmeDefines;
CUfunction pmeInterpolateDispersionForceKernel;
std::vector<std::pair<int, int> > exceptionAtoms;
double ewaldSelfEnergy, dispersionCoefficient, alpha;
double ewaldSelfEnergy, dispersionCoefficient, alpha, dispersionAlpha;
int interpolateForceThreads;
int gridSizeX, gridSizeY, gridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT;
int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeStream, useCudaFFT, doLJPME;
NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5;
};
......@@ -724,8 +763,10 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
private:
class ForceInfo;
void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource, const std::vector<std::string>& tableTypes);
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params;
CudaArray* globals;
CudaArray* interactionGroupData;
......@@ -775,10 +816,12 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private:
class ForceInfo;
double prefactor, surfaceAreaFactor, cutoff;
bool hasCreatedKernels;
int maxTiles;
CudaContext& cu;
ForceInfo* info;
CudaArray* params;
CudaArray* bornSum;
CudaArray* bornRadii;
......@@ -825,10 +868,12 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private:
class ForceInfo;
double cutoff;
bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues;
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params;
CudaParameterSet* computedValues;
CudaParameterSet* energyDerivs;
......@@ -882,9 +927,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force);
private:
class ForceInfo;
int numParticles;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
const System& system;
CudaParameterSet* params;
CudaArray* globals;
......@@ -926,9 +973,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force);
private:
class ForceInfo;
int numDonors, numAcceptors;
bool hasInitializedKernel;
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* donorParams;
CudaParameterSet* acceptorParams;
CudaArray* globals;
......@@ -978,9 +1027,11 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force);
private:
class ForceInfo;
int numGroups, numBonds;
bool needEnergyParamDerivs;
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params;
CudaArray* globals;
CudaArray* groupParticles;
......@@ -1031,8 +1082,10 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force);
private:
class ForceInfo;
int numBonds;
CudaContext& cu;
ForceInfo* info;
CudaParameterSet* params;
CudaArray* globals;
std::vector<std::string> globalParamNames;
......@@ -1077,7 +1130,9 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomManyParticleForce& force);
private:
class ForceInfo;
CudaContext& cu;
ForceInfo* info;
bool hasInitializedKernel;
NonbondedMethod nonbondedMethod;
int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize;
......@@ -1139,9 +1194,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const GayBerneForce& force);
private:
class ForceInfo;
class ReorderListener;
void sortAtoms();
CudaContext& cu;
ForceInfo* info;
bool hasInitializedKernels;
int numRealParticles, numExceptions, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod;
......
......@@ -439,6 +439,15 @@ public:
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the dispersion parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class Task;
CudaPlatform::PlatformData& data;
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2016 Stanford University and the Authors. *
* Portions copyright (c) 2009-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -108,7 +108,8 @@ static int executeInWindows(const string &command) {
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false), isNvccAvailable(false),
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), energyParamDerivBuffer(NULL), atomIndexDevice(NULL), chargeBuffer(NULL),
integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
// Determine what compiler to use.
this->compiler = "\""+compiler+"\"";
......@@ -291,6 +292,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
clearFourBuffersKernel = getKernel(utilities, "clearFourBuffers");
clearFiveBuffersKernel = getKernel(utilities, "clearFiveBuffers");
clearSixBuffersKernel = getKernel(utilities, "clearSixBuffers");
setChargesKernel = getKernel(utilities, "setCharges");
// Set defines based on the requested precision.
......@@ -407,6 +409,8 @@ CudaContext::~CudaContext() {
delete energyParamDerivBuffer;
if (atomIndexDevice != NULL)
delete atomIndexDevice;
if (chargeBuffer != NULL)
delete chargeBuffer;
if (integration != NULL)
delete integration;
if (expression != NULL)
......@@ -860,6 +864,25 @@ void CudaContext::clearAutoclearBuffers() {
}
}
void CudaContext::setCharges(const vector<double>& charges) {
if (chargeBuffer == NULL)
chargeBuffer = new CudaArray(*this, numAtoms, useDoublePrecision ? sizeof(double) : sizeof(float), "chargeBuffer");
if (getUseDoublePrecision()) {
double* c = (double*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
c[i] = charges[i];
chargeBuffer->upload(c);
}
else {
float* c = (float*) getPinnedBuffer();
for (int i = 0; i < charges.size(); i++)
c[i] = (float) charges[i];
chargeBuffer->upload(c);
}
void* args[] = {&chargeBuffer->getDevicePointer(), &posq->getDevicePointer(), &atomIndexDevice->getDevicePointer(), &numAtoms};
executeKernel(setChargesKernel, args, numAtoms);
}
/**
* This class ensures that atom reordering doesn't break virtual sites.
*/
......@@ -1058,9 +1081,19 @@ void CudaContext::findMoleculeGroups() {
}
void CudaContext::invalidateMolecules() {
for (int i = 0; i < forces.size(); i++)
if (invalidateMolecules(forces[i]))
return;
}
bool CudaContext::invalidateMolecules(CudaForceInfo* force) {
if (numAtoms == 0 || nonbonded == NULL || !nonbonded->getUseCutoff())
return;
return false;
bool valid = true;
int forceIndex = -1;
for (int i = 0; i < forces.size(); i++)
if (forces[i] == force)
forceIndex = i;
for (int group = 0; valid && group < (int) moleculeGroups.size(); group++) {
MoleculeGroup& mol = moleculeGroups[group];
vector<int>& instances = mol.instances;
......@@ -1075,22 +1108,21 @@ void CudaContext::invalidateMolecules() {
Molecule& m2 = molecules[instances[j]];
int offset2 = offsets[j];
for (int i = 0; i < (int) atoms.size() && valid; i++) {
for (int k = 0; k < (int) forces.size(); k++)
if (!forces[k]->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2))
valid = false;
if (!force->areParticlesIdentical(atoms[i]+offset1, atoms[i]+offset2))
valid = false;
}
// See if the force groups are identical.
for (int i = 0; i < (int) forces.size() && valid; i++) {
for (int k = 0; k < (int) m1.groups[i].size() && valid; k++)
if (!forces[i]->areGroupsIdentical(m1.groups[i][k], m2.groups[i][k]))
if (valid && forceIndex > -1) {
for (int k = 0; k < (int) m1.groups[forceIndex].size() && valid; k++)
if (!force->areGroupsIdentical(m1.groups[forceIndex][k], m2.groups[forceIndex][k]))
valid = false;
}
}
}
if (valid)
return;
return false;
// The list of which molecules are identical is no longer valid. We need to restore the
// atoms to their original order, rebuild the list of identical molecules, and sort them
......@@ -1158,6 +1190,7 @@ void CudaContext::invalidateMolecules() {
for (int i = 0; i < (int) reorderListeners.size(); i++)
reorderListeners[i]->execute();
reorderAtoms();
return true;
}
void CudaContext::reorderAtoms() {
......
......@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -147,11 +147,29 @@ void CudaUpdateStateDataKernel::setTime(ContextImpl& context, double time) {
contexts[i]->setTime(time);
}
class CudaUpdateStateDataKernel::GetPositionsTask : public ThreadPool::Task {
public:
GetPositionsTask(CudaContext& cu, vector<Vec3>& positions, vector<float4>& posCorrection) : cu(cu), positions(positions), posCorrection(posCorrection) {
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
cu.setAsCurrent();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
vector<float4> posCorrection;
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
}
else if (cu.getUseMixedPrecision()) {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq, false);
posCorrection.resize(numParticles);
cu.getPosqCorrection().download(posCorrection);
}
void execute(ThreadPool& threads, int threadIndex) {
else {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
}
// Filling in the output array is done in parallel for speed.
cu.getPlatformData().threads.execute([&] (ThreadPool& threads, int threadIndex) {
// Compute the position of each particle to return to the user. This is done in parallel for speed.
const vector<int>& order = cu.getAtomIndex();
......@@ -186,36 +204,7 @@ public:
positions[order[i]] = Vec3(pos.x, pos.y, pos.z)-boxVectors[0]*offset.x-boxVectors[1]*offset.y-boxVectors[2]*offset.z;
}
}
}
CudaContext& cu;
vector<Vec3>& positions;
vector<float4>& posCorrection;
};
void CudaUpdateStateDataKernel::getPositions(ContextImpl& context, vector<Vec3>& positions) {
cu.setAsCurrent();
int numParticles = context.getSystem().getNumParticles();
positions.resize(numParticles);
vector<float4> posCorrection;
if (cu.getUseDoublePrecision()) {
double4* posq = (double4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
}
else if (cu.getUseMixedPrecision()) {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq, false);
posCorrection.resize(numParticles);
cu.getPosqCorrection().download(posCorrection);
}
else {
float4* posq = (float4*) cu.getPinnedBuffer();
cu.getPosq().download(posq);
}
// Filling in the output array is done in parallel for speed.
GetPositionsTask task(cu, positions, posCorrection);
cu.getPlatformData().threads.execute(task);
});
cu.getPlatformData().threads.waitForThreads();
}
......@@ -502,9 +491,9 @@ void CudaVirtualSitesKernel::computePositions(ContextImpl& context) {
cu.getIntegrationUtilities().computeVirtualSites();
}
class CudaHarmonicBondForceInfo : public CudaForceInfo {
class CudaCalcHarmonicBondForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaHarmonicBondForceInfo(const HarmonicBondForce& force) : force(force) {
ForceInfo(const HarmonicBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
......@@ -556,7 +545,8 @@ void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const Har
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicBondForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::bondForce, replacements), force.getForceGroup());
cu.addForce(new CudaHarmonicBondForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcHarmonicBondForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
......@@ -589,9 +579,9 @@ void CudaCalcHarmonicBondForceKernel::copyParametersToContext(ContextImpl& conte
cu.invalidateMolecules();
}
class CudaCustomBondForceInfo : public CudaForceInfo {
class CudaCalcCustomBondForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaCustomBondForceInfo(const CustomBondForce& force) : force(force) {
ForceInfo(const CustomBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
......@@ -645,7 +635,8 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomBondForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
// Record information for the expressions.
......@@ -743,9 +734,9 @@ void CudaCalcCustomBondForceKernel::copyParametersToContext(ContextImpl& context
cu.invalidateMolecules();
}
class CudaHarmonicAngleForceInfo : public CudaForceInfo {
class CudaCalcHarmonicAngleForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaHarmonicAngleForceInfo(const HarmonicAngleForce& force) : force(force) {
ForceInfo(const HarmonicAngleForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
......@@ -799,7 +790,8 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
replacements["COMPUTE_FORCE"] = CudaKernelSources::harmonicAngleForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::angleForce, replacements), force.getForceGroup());
cu.addForce(new CudaHarmonicAngleForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
......@@ -832,9 +824,9 @@ void CudaCalcHarmonicAngleForceKernel::copyParametersToContext(ContextImpl& cont
cu.invalidateMolecules();
}
class CudaCustomAngleForceInfo : public CudaForceInfo {
class CudaCalcCustomAngleForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaCustomAngleForceInfo(const CustomAngleForce& force) : force(force) {
ForceInfo(const CustomAngleForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumAngles();
......@@ -889,7 +881,8 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomAngleForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
// Record information for the expressions.
......@@ -987,9 +980,9 @@ void CudaCalcCustomAngleForceKernel::copyParametersToContext(ContextImpl& contex
cu.invalidateMolecules();
}
class CudaPeriodicTorsionForceInfo : public CudaForceInfo {
class CudaCalcPeriodicTorsionForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaPeriodicTorsionForceInfo(const PeriodicTorsionForce& force) : force(force) {
ForceInfo(const PeriodicTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
......@@ -1044,7 +1037,8 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
replacements["COMPUTE_FORCE"] = CudaKernelSources::periodicTorsionForce;
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(params->getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
cu.addForce(new CudaPeriodicTorsionForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
......@@ -1077,9 +1071,9 @@ void CudaCalcPeriodicTorsionForceKernel::copyParametersToContext(ContextImpl& co
cu.invalidateMolecules();
}
class CudaRBTorsionForceInfo : public CudaForceInfo {
class CudaCalcRBTorsionForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaRBTorsionForceInfo(const RBTorsionForce& force) : force(force) {
ForceInfo(const RBTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
......@@ -1141,7 +1135,8 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
replacements["PARAMS1"] = cu.getBondedUtilities().addArgument(params1->getDevicePointer(), "float4");
replacements["PARAMS2"] = cu.getBondedUtilities().addArgument(params2->getDevicePointer(), "float2");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::torsionForce, replacements), force.getForceGroup());
cu.addForce(new CudaRBTorsionForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
......@@ -1177,9 +1172,9 @@ void CudaCalcRBTorsionForceKernel::copyParametersToContext(ContextImpl& context,
cu.invalidateMolecules();
}
class CudaCMAPTorsionForceInfo : public CudaForceInfo {
class CudaCalcCMAPTorsionForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaCMAPTorsionForceInfo(const CMAPTorsionForce& force) : force(force) {
ForceInfo(const CMAPTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
......@@ -1259,7 +1254,8 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP
replacements["MAP_POS"] = cu.getBondedUtilities().addArgument(mapPositions->getDevicePointer(), "int2");
replacements["MAPS"] = cu.getBondedUtilities().addArgument(torsionMaps->getDevicePointer(), "int");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::cmapTorsionForce, replacements), force.getForceGroup());
cu.addForce(new CudaCMAPTorsionForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcCMAPTorsionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
......@@ -1309,9 +1305,9 @@ void CudaCalcCMAPTorsionForceKernel::copyParametersToContext(ContextImpl& contex
torsionMaps->upload(torsionMapsVec);
}
class CudaCustomTorsionForceInfo : public CudaForceInfo {
class CudaCalcCustomTorsionForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaCustomTorsionForceInfo(const CustomTorsionForce& force) : force(force) {
ForceInfo(const CustomTorsionForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumTorsions();
......@@ -1366,7 +1362,8 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomTorsionForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
// Record information for the expressions.
......@@ -1464,9 +1461,9 @@ void CudaCalcCustomTorsionForceKernel::copyParametersToContext(ContextImpl& cont
cu.invalidateMolecules();
}
class CudaNonbondedForceInfo : public CudaForceInfo {
class CudaCalcNonbondedForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaNonbondedForceInfo(const NonbondedForce& force) : force(force) {
ForceInfo(const NonbondedForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, sigma1, sigma2, epsilon1, epsilon2;
......@@ -1607,6 +1604,12 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
delete pmeBsplineModuliY;
if (pmeBsplineModuliZ != NULL)
delete pmeBsplineModuliZ;
if (pmeDispersionBsplineModuliX != NULL)
delete pmeDispersionBsplineModuliX;
if (pmeDispersionBsplineModuliY != NULL)
delete pmeDispersionBsplineModuliY;
if (pmeDispersionBsplineModuliZ != NULL)
delete pmeDispersionBsplineModuliZ;
if (pmeAtomRange != NULL)
delete pmeAtomRange;
if (pmeAtomGridIndex != NULL)
......@@ -1617,12 +1620,18 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
delete sort;
if (fft != NULL)
delete fft;
if (dispersionFft != NULL)
delete dispersionFft;
if (pmeio != NULL)
delete pmeio;
if (hasInitializedFFT) {
if (useCudaFFT) {
cufftDestroy(fftForward);
cufftDestroy(fftBackward);
if (doLJPME) {
cufftDestroy(dispersionFftForward);
cufftDestroy(dispersionFftBackward);
}
}
if (usePmeStream) {
cuStreamDestroy(pmeStream);
......@@ -1658,6 +1667,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
vector<vector<int> > exclusionList(numParticles);
double sumSquaredCharges = 0.0;
double sumSquaredC6 = 0.0;
hasCoulomb = false;
hasLJ = false;
for (int i = 0; i < numParticles; i++) {
......@@ -1667,9 +1677,13 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
posqd[i] = make_double4(0, 0, 0, charge);
else
posqf[i] = make_float4(0, 0, 0, (float) charge);
sigmaEpsilonVector[i] = make_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon)));
double sig = 0.5*sigma;
double eps = 2.0*sqrt(epsilon);
sigmaEpsilonVector[i] = make_float2(sig, eps);
exclusionList[i].push_back(i);
sumSquaredCharges += charge*charge;
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
if (charge != 0.0)
hasCoulomb = true;
if (epsilon != 0.0)
......@@ -1684,6 +1698,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
nonbondedMethod = CalcNonbondedForceKernel::NonbondedMethod(force.getNonbondedMethod());
bool useCutoff = (nonbondedMethod != NoCutoff);
bool usePeriodic = (nonbondedMethod != NoCutoff && nonbondedMethod != CutoffNonPeriodic);
doLJPME = (nonbondedMethod == LJPME);
map<string, string> defines;
defines["HAS_COULOMB"] = (hasCoulomb ? "1" : "0");
defines["HAS_LENNARD_JONES"] = (hasLJ ? "1" : "0");
......@@ -1705,7 +1720,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines["LJ_SWITCH_C5"] = cu.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
}
}
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0)
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && !doLJPME)
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(system, force);
else
dispersionCoefficient = 0.0;
......@@ -1740,22 +1755,36 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
}
}
else if (nonbondedMethod == PME) {
else if (nonbondedMethod == PME || nonbondedMethod == LJPME) {
// Compute the PME parameters.
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ);
NonbondedForceImpl::calcPMEParameters(system, force, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
gridSizeZ = CudaFFT3D::findLegalDimension(gridSizeZ);
if (doLJPME) {
NonbondedForceImpl::calcPMEParameters(system, force, dispersionAlpha, dispersionGridSizeX,
dispersionGridSizeY, dispersionGridSizeZ, true);
dispersionGridSizeX = CudaFFT3D::findLegalDimension(dispersionGridSizeX);
dispersionGridSizeY = CudaFFT3D::findLegalDimension(dispersionGridSizeY);
dispersionGridSizeZ = CudaFFT3D::findLegalDimension(dispersionGridSizeZ);
}
defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1";
defines["DO_LJPME"] = doLJPME ? "1" : "0";
if (doLJPME)
defines["EWALD_DISPERSION_ALPHA"] = cu.doubleToString(dispersionAlpha);
if (cu.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
if (doLJPME)
ewaldSelfEnergy += pow(dispersionAlpha, 6)*sumSquaredC6/12.0;
char deviceName[100];
cuDeviceGetName(deviceName, 100, cu.getDevice());
usePmeStream = (!cu.getPlatformData().disablePmeStream && string(deviceName) != "GeForce GTX 980"); // Using a separate stream is slower on GTX 980
map<string, string> pmeDefines;
pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = cu.intToString(numParticles);
pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
......@@ -1772,7 +1801,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if (cu.getPlatformData().deterministicForces)
pmeDefines["USE_DETERMINISTIC_FORCES"] = "1";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
if (cu.getPlatformData().useCpuPme) {
if (cu.getPlatformData().useCpuPme && !doLJPME) {
// Create the CPU PME kernel.
try {
......@@ -1796,16 +1825,48 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
cuFuncSetCacheConfig(pmeSpreadChargeKernel, CU_FUNC_CACHE_PREFER_L1);
cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
if (doLJPME) {
pmeDefines["EWALD_ALPHA"] = cu.doubleToString(dispersionAlpha);
pmeDefines["GRID_SIZE_X"] = cu.intToString(dispersionGridSizeX);
pmeDefines["GRID_SIZE_Y"] = cu.intToString(dispersionGridSizeY);
pmeDefines["GRID_SIZE_Z"] = cu.intToString(dispersionGridSizeZ);
pmeDefines["EPSILON_FACTOR"] = "1";
pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(dispersionAlpha*dispersionAlpha));
pmeDefines["USE_LJPME"] = "1";
double invRCut6 = pow(force.getCutoffDistance(), -6);
double dalphaR = dispersionAlpha * force.getCutoffDistance();
double dar2 = dalphaR*dalphaR;
double dar4 = dar2*dar2;
double multShift6 = -invRCut6*(1.0 - exp(-dar2) * (1.0 + dar2 + 0.5*dar4));
defines["INVCUT6"] = cu.doubleToString(invRCut6);
defines["MULTSHIFT6"] = cu.doubleToString(multShift6);
module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
pmeDispersionFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
pmeDispersionGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeDispersionSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
pmeDispersionConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
pmeEvalDispersionEnergyKernel = cu.getKernel(module, "gridEvaluateEnergy");
pmeInterpolateDispersionForceKernel = cu.getKernel(module, "gridInterpolateForce");
cuFuncSetCacheConfig(pmeDispersionSpreadChargeKernel, CU_FUNC_CACHE_PREFER_L1);
}
// Create required data structures.
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
directPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "reciprocalPmeGrid");
int gridElements = gridSizeX*gridSizeY*gridSizeZ;
if (doLJPME)
gridElements = max(gridElements, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
directPmeGrid = new CudaArray(cu, gridElements, cu.getComputeCapability() >= 2.0 ? 2*elementSize : 2*sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid = new CudaArray(cu, gridElements, 2*elementSize, "reciprocalPmeGrid");
cu.addAutoclearBuffer(*directPmeGrid);
pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
if (doLJPME) {
pmeDispersionBsplineModuliX = new CudaArray(cu, dispersionGridSizeX, elementSize, "pmeDispersionBsplineModuliX");
pmeDispersionBsplineModuliY = new CudaArray(cu, dispersionGridSizeY, elementSize, "pmeDispersionBsplineModuliY");
pmeDispersionBsplineModuliZ = new CudaArray(cu, dispersionGridSizeZ, elementSize, "pmeDispersionBsplineModuliZ");
}
pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
int energyElementSize = (cu.getUseDoublePrecision() || cu.getUseMixedPrecision() ? sizeof(double) : sizeof(float));
......@@ -1822,17 +1883,32 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
result = cufftPlan3d(&fftBackward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
if (doLJPME) {
result = cufftPlan3d(&dispersionFftForward, dispersionGridSizeX, dispersionGridSizeY,
dispersionGridSizeZ, cu.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing disperison FFT: "+cu.intToString(result));
result = cufftPlan3d(&dispersionFftBackward, dispersionGridSizeX, dispersionGridSizeY,
dispersionGridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing disperison FFT: "+cu.intToString(result));
}
}
else
else {
fft = new CudaFFT3D(cu, gridSizeX, gridSizeY, gridSizeZ, true);
if (doLJPME)
dispersionFft = new CudaFFT3D(cu, dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ, true);
}
// Prepare for doing PME on its own stream.
if (usePmeStream) {
cuStreamCreate(&pmeStream, CU_STREAM_NON_BLOCKING);
if (useCudaFFT) {
cufftSetStream(fftForward, pmeStream);
cufftSetStream(fftBackward, pmeStream);
cufftSetStream(dispersionFftForward, pmeStream);
cufftSetStream(dispersionFftBackward, pmeStream);
}
CHECK_RESULT(cuEventCreate(&pmeSyncEvent, CU_EVENT_DISABLE_TIMING), "Error creating event for NonbondedForce");
int recipForceGroup = force.getReciprocalSpaceForceGroup();
......@@ -1845,84 +1921,106 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize the b-spline moduli.
int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
vector<double> data(PmeOrder);
vector<double> ddata(PmeOrder);
vector<double> bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
// Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PmeOrder-1);
data[PmeOrder-1] = 0.0;
for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplines_data[i] = 0.0;
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
vector<double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplines_data[j]*cos(arg);
ss += bsplines_data[j]*sin(arg);
}
moduli[i] = sc*sc+ss*ss;
}
for (int i = 0; i < ndata; i++)
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[i-1]+moduli[i+1])*0.5;
if (cu.getUseDoublePrecision()) {
if (dim == 0)
pmeBsplineModuliX->upload(moduli);
else if (dim == 1)
pmeBsplineModuliY->upload(moduli);
else
pmeBsplineModuliZ->upload(moduli);
for (int grid = 0; grid < 2; grid++) {
int xsize, ysize, zsize;
CudaArray *xmoduli, *ymoduli, *zmoduli;
if (grid == 0) {
xsize = gridSizeX;
ysize = gridSizeY;
zsize = gridSizeZ;
xmoduli = pmeBsplineModuliX;
ymoduli = pmeBsplineModuliY;
zmoduli = pmeBsplineModuliZ;
}
else {
vector<float> modulif(ndata);
if (!doLJPME)
continue;
xsize = dispersionGridSizeX;
ysize = dispersionGridSizeY;
zsize = dispersionGridSizeZ;
xmoduli = pmeDispersionBsplineModuliX;
ymoduli = pmeDispersionBsplineModuliY;
zmoduli = pmeDispersionBsplineModuliZ;
}
int maxSize = max(max(xsize, ysize), zsize);
vector<double> data(PmeOrder);
vector<double> ddata(PmeOrder);
vector<double> bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
// Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PmeOrder-1);
data[PmeOrder-1] = 0.0;
for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplines_data[i] = 0.0;
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? xsize : dim == 1 ? ysize : zsize);
vector<double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplines_data[j]*cos(arg);
ss += bsplines_data[j]*sin(arg);
}
moduli[i] = sc*sc+ss*ss;
}
for (int i = 0; i < ndata; i++)
modulif[i] = (float) moduli[i];
if (dim == 0)
pmeBsplineModuliX->upload(modulif);
else if (dim == 1)
pmeBsplineModuliY->upload(modulif);
else
pmeBsplineModuliZ->upload(modulif);
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[i-1]+moduli[i+1])*0.5;
if (cu.getUseDoublePrecision()) {
if (dim == 0)
xmoduli->upload(moduli);
else if (dim == 1)
ymoduli->upload(moduli);
else
zmoduli->upload(moduli);
}
else {
vector<float> modulif(ndata);
for (int i = 0; i < ndata; i++)
modulif[i] = (float) moduli[i];
if (dim == 0)
xmoduli->upload(modulif);
else if (dim == 1)
ymoduli->upload(modulif);
else
zmoduli->upload(modulif);
}
}
}
}
}
}
// Add the interaction to the default nonbonded kernel.
string source = cu.replaceStrings(CudaKernelSources::coulombLennardJones, defines);
cu.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup(), true);
if (hasLJ)
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2, sizeof(float2), sigmaEpsilon->getDevicePointer()));
cu.getNonbondedUtilities().addParameter(CudaNonbondedUtilities::ParameterInfo("sigmaEpsilon", "float", 2,
sizeof(float2), sigmaEpsilon->getDevicePointer()));
// Initialize the exceptions.
......@@ -1946,7 +2044,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
replacements["PARAMS"] = cu.getBondedUtilities().addArgument(exceptionParams->getDevicePointer(), "float4");
cu.getBondedUtilities().addInteraction(atoms, cu.replaceStrings(CudaKernelSources::nonbondedExceptions, replacements), force.getForceGroup());
}
cu.addForce(new CudaNonbondedForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy, bool includeDirect, bool includeReciprocal) {
......@@ -1959,9 +2058,9 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
if (directPmeGrid != NULL && includeReciprocal) {
if (usePmeStream)
cu.setCurrentStream(pmeStream);
// Invert the periodic box vectors.
Vec3 boxVectors[3];
cu.getPeriodicBoxVectors(boxVectors[0], boxVectors[1], boxVectors[2]);
double determinant = boxVectors[0][0]*boxVectors[1][1]*boxVectors[2][2];
......@@ -1985,7 +2084,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
recipBoxVectorPointer[1] = &recipBoxVectorsFloat[1];
recipBoxVectorPointer[2] = &recipBoxVectorsFloat[2];
}
// Execute the reciprocal space kernels.
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
......@@ -2002,7 +2101,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, directPmeGrid->getSize(), 256);
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, gridSizeX*gridSizeY*gridSizeZ, 256);
}
if (useCudaFFT) {
......@@ -2041,11 +2140,73 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeInterpolateForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
// As written, we check only the Electrostatic grid pointer to get here. We could separate them out, but for
// now we assume that LJPME can only be used if electrostatic PME is also active.
if (doLJPME) {
void* gridIndexArgs[] = {&cu.getPosq().getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionGridIndexKernel, gridIndexArgs, cu.getNumAtoms());
sort->sort(*pmeAtomGridIndex);
cu.clearBuffer(*directPmeGrid);
void* spreadArgs[] = {&cu.getPosq().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer(),
&sigmaEpsilon->getDevicePointer()};
cu.executeKernel(pmeDispersionSpreadChargeKernel, spreadArgs, cu.getNumAtoms(), 128);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0 || cu.getPlatformData().deterministicForces) {
void* finishSpreadArgs[] = {&directPmeGrid->getDevicePointer()};
cu.executeKernel(pmeDispersionFinishSpreadChargeKernel, finishSpreadArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
}
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecD2Z(dispersionFftForward, (double*) directPmeGrid->getDevicePointer(), (double2*) reciprocalPmeGrid->getDevicePointer());
else
cufftExecR2C(dispersionFftForward, (float*) directPmeGrid->getDevicePointer(), (float2*) reciprocalPmeGrid->getDevicePointer());
}
else {
dispersionFft->execFFT(*directPmeGrid, *reciprocalPmeGrid, true);
}
if (includeEnergy) {
void* computeEnergyArgs[] = {&reciprocalPmeGrid->getDevicePointer(), usePmeStream ? &pmeEnergyBuffer->getDevicePointer() : &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX->getDevicePointer(), &pmeDispersionBsplineModuliY->getDevicePointer(), &pmeDispersionBsplineModuliZ->getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeEvalDispersionEnergyKernel, computeEnergyArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ);
}
void* convolutionArgs[] = {&reciprocalPmeGrid->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&pmeDispersionBsplineModuliX->getDevicePointer(), &pmeDispersionBsplineModuliY->getDevicePointer(), &pmeDispersionBsplineModuliZ->getDevicePointer(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeDispersionConvolutionKernel, convolutionArgs, dispersionGridSizeX*dispersionGridSizeY*dispersionGridSizeZ, 256);
if (useCudaFFT) {
if (cu.getUseDoublePrecision())
cufftExecZ2D(dispersionFftBackward, (double2*) reciprocalPmeGrid->getDevicePointer(), (double*) directPmeGrid->getDevicePointer());
else
cufftExecC2R(dispersionFftBackward, (float2*) reciprocalPmeGrid->getDevicePointer(), (float*) directPmeGrid->getDevicePointer());
}
else {
dispersionFft->execFFT(*reciprocalPmeGrid, *directPmeGrid, false);
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer(),
&sigmaEpsilon->getDevicePointer()};
cu.executeKernel(pmeInterpolateDispersionForceKernel, interpolateArgs, cu.getNumAtoms(), 128);
}
if (usePmeStream) {
cuEventRecord(pmeSyncEvent, pmeStream);
cu.restoreDefaultStream();
}
}
double energy = (includeReciprocal ? ewaldSelfEnergy : 0.0);
if (dispersionCoefficient != 0.0 && includeDirect) {
double4 boxSize = cu.getPeriodicBoxSize();
......@@ -2087,25 +2248,23 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Record the per-particle parameters.
CudaArray& posq = cu.getPosq();
posq.download(cu.getPinnedBuffer());
float4* posqf = (float4*) cu.getPinnedBuffer();
double4* posqd = (double4*) cu.getPinnedBuffer();
vector<double> chargeVector(cu.getNumAtoms());
vector<float2> sigmaEpsilonVector(cu.getPaddedNumAtoms(), make_float2(0, 0));
double sumSquaredCharges = 0.0;
double sumSquaredC6 = 0.0;
const vector<int>& order = cu.getAtomIndex();
for (int i = 0; i < force.getNumParticles(); i++) {
int index = order[i];
double charge, sigma, epsilon;
force.getParticleParameters(index, charge, sigma, epsilon);
if (cu.getUseDoublePrecision())
posqd[i].w = charge;
else
posqf[i].w = (float) charge;
sigmaEpsilonVector[index] = make_float2((float) (0.5*sigma), (float) (2.0*sqrt(epsilon)));
force.getParticleParameters(i, charge, sigma, epsilon);
chargeVector[i] = charge;
double sig = (0.5*sigma);
double eps = (2.0*sqrt(epsilon));
sigmaEpsilonVector[i] = make_float2((float) sig, (float) eps);
double C6 = 8.0*sig*sig*sig*eps;
sumSquaredC6 += C6*C6;
sumSquaredCharges += charge*charge;
}
posq.upload(cu.getPinnedBuffer());
cu.setCharges(chargeVector);
sigmaEpsilon->upload(sigmaEpsilonVector);
// Record the exceptions.
......@@ -2123,8 +2282,10 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
// Compute other values.
if (nonbondedMethod == Ewald || nonbondedMethod == PME)
if (nonbondedMethod == Ewald || nonbondedMethod == PME || nonbondedMethod == LJPME)
ewaldSelfEnergy = (cu.getContextIndex() == 0 ? -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI) : 0.0);
if (nonbondedMethod == LJPME)
ewaldSelfEnergy += (cu.getContextIndex() == 0 ? pow(dispersionAlpha, 6)*sumSquaredC6/12.0 : 0);
if (force.getUseDispersionCorrection() && cu.getContextIndex() == 0 && (nonbondedMethod == CutoffPeriodic || nonbondedMethod == Ewald || nonbondedMethod == PME))
dispersionCoefficient = NonbondedForceImpl::calcDispersionCorrection(context.getSystem(), force);
cu.invalidateMolecules();
......@@ -2143,9 +2304,23 @@ void CudaCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
}
}
class CudaCustomNonbondedForceInfo : public CudaForceInfo {
void CudaCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (!doLJPME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
if (cu.getPlatformData().useCpuPme)
//cpuPme.getAs<CalcPmeReciprocalForceKernel>().getLJPMEParameters(alpha, nx, ny, nz);
throw OpenMMException("getPMEParametersInContext: CPUPME has not been implemented for LJPME yet.");
else {
alpha = this->dispersionAlpha;
nx = dispersionGridSizeX;
ny = dispersionGridSizeY;
nz = dispersionGridSizeZ;
}
}
class CudaCalcCustomNonbondedForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaCustomNonbondedForceInfo(const CustomNonbondedForce& force) : force(force) {
ForceInfo(const CustomNonbondedForce& force) : force(force) {
if (force.getNumInteractionGroups() > 0) {
groupsForParticle.resize(force.getNumParticles());
for (int i = 0; i < force.getNumInteractionGroups(); i++) {
......@@ -2324,7 +2499,8 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
cu.getNonbondedUtilities().addArgument(CudaNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(float), globals->getDevicePointer()));
}
}
cu.addForce(new CudaCustomNonbondedForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
// Record information for the long range correction.
......@@ -2660,9 +2836,9 @@ void CudaCalcCustomNonbondedForceKernel::copyParametersToContext(ContextImpl& co
cu.invalidateMolecules();
}
class CudaGBSAOBCForceInfo : public CudaForceInfo {
class CudaCalcGBSAOBCForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaGBSAOBCForceInfo(const GBSAOBCForce& force) : force(force) {
ForceInfo(const GBSAOBCForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
double charge1, charge2, radius1, radius2, scale1, scale2;
......@@ -2733,7 +2909,8 @@ void CudaCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCF
nb.addInteraction(useCutoff, usePeriodic, false, cutoff, vector<vector<int> >(), source, force.getForceGroup());
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("obcParams", "float", 2, sizeof(float2), params->getDevicePointer()));
nb.addParameter(CudaNonbondedUtilities::ParameterInfo("bornForce", "long long", 1, sizeof(long long), bornForce->getDevicePointer()));
cu.addForce(new CudaGBSAOBCForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
}
double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
......@@ -2846,23 +3023,17 @@ void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, c
// Record the per-particle parameters.
CudaArray& posq = cu.getPosq();
float4* posqf = (float4*) cu.getPinnedBuffer();
double4* posqd = (double4*) cu.getPinnedBuffer();
posq.download(cu.getPinnedBuffer());
vector<double> chargeVector(cu.getNumAtoms());
vector<float2> paramsVector(cu.getPaddedNumAtoms(), make_float2(1, 1));
const double dielectricOffset = 0.009;
for (int i = 0; i < numParticles; i++) {
double charge, radius, scalingFactor;
force.getParticleParameters(i, charge, radius, scalingFactor);
chargeVector[i] = charge;
radius -= dielectricOffset;
paramsVector[i] = make_float2((float) radius, (float) (scalingFactor*radius));
if (cu.getUseDoublePrecision())
posqd[i].w = charge;
else
posqf[i].w = (float) charge;
}
posq.upload(cu.getPinnedBuffer());
cu.setCharges(chargeVector);
params->upload(paramsVector);
// Mark that the current reordering may be invalid.
......@@ -2870,9 +3041,9 @@ void CudaCalcGBSAOBCForceKernel::copyParametersToContext(ContextImpl& context, c
cu.invalidateMolecules();
}
class CudaCustomGBForceInfo : public CudaForceInfo {
class CudaCalcCustomGBForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaCustomGBForceInfo(const CustomGBForce& force) : force(force) {
ForceInfo(const CustomGBForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
vector<double> params1;
......@@ -3660,7 +3831,8 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
for (int i = 0; i < (int) arguments.size(); i++)
cu.getNonbondedUtilities().addArgument(arguments[i]);
}
cu.addForce(new CudaCustomGBForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
cu.addAutoclearBuffer(*longEnergyDerivs);
}
......@@ -3876,9 +4048,9 @@ void CudaCalcCustomGBForceKernel::copyParametersToContext(ContextImpl& context,
cu.invalidateMolecules();
}
class CudaCustomExternalForceInfo : public CudaForceInfo {
class CudaCalcCustomExternalForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaCustomExternalForceInfo(const CustomExternalForce& force, int numParticles) : force(force), indices(numParticles, -1) {
ForceInfo(const CustomExternalForce& force, int numParticles) : force(force), indices(numParticles, -1) {
vector<double> params;
for (int i = 0; i < force.getNumParticles(); i++) {
int particle;
......@@ -3935,7 +4107,8 @@ void CudaCalcCustomExternalForceKernel::initialize(const System& system, const C
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomExternalForceInfo(force, system.getNumParticles()));
info = new ForceInfo(force, system.getNumParticles());
cu.addForce(info);
// Record information for the expressions.
......@@ -4034,9 +4207,9 @@ void CudaCalcCustomExternalForceKernel::copyParametersToContext(ContextImpl& con
cu.invalidateMolecules();
}
class CudaCustomHbondForceInfo : public CudaForceInfo {
class CudaCalcCustomHbondForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaCustomHbondForceInfo(const CustomHbondForce& force) : force(force) {
ForceInfo(const CustomHbondForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
return true;
......@@ -4183,7 +4356,8 @@ void CudaCalcCustomHbondForceKernel::initialize(const System& system, const Cust
}
acceptors->upload(acceptorVector);
acceptorParams->setParameterValues(acceptorParamVector);
cu.addForce(new CudaCustomHbondForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
// Record exclusions.
......@@ -4551,9 +4725,9 @@ void CudaCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& contex
cu.invalidateMolecules();
}
class CudaCustomCentroidBondForceInfo : public CudaForceInfo {
class CudaCalcCustomCentroidBondForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaCustomCentroidBondForceInfo(const CustomCentroidBondForce& force) : force(force) {
ForceInfo(const CustomCentroidBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
......@@ -4621,7 +4795,8 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
numBonds = force.getNumBonds();
if (numBonds == 0)
return;
cu.addForce(new CudaCustomCentroidBondForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
// Record the groups.
......@@ -5009,9 +5184,9 @@ void CudaCalcCustomCentroidBondForceKernel::copyParametersToContext(ContextImpl&
cu.invalidateMolecules();
}
class CudaCustomCompoundBondForceInfo : public CudaForceInfo {
class CudaCalcCustomCompoundBondForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaCustomCompoundBondForceInfo(const CustomCompoundBondForce& force) : force(force) {
ForceInfo(const CustomCompoundBondForce& force) : force(force) {
}
int getNumParticleGroups() {
return force.getNumBonds();
......@@ -5064,7 +5239,8 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomCompoundBondForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
// Record the tabulated functions.
......@@ -5325,9 +5501,9 @@ void CudaCalcCustomCompoundBondForceKernel::copyParametersToContext(ContextImpl&
cu.invalidateMolecules();
}
class CudaCustomManyParticleForceInfo : public CudaForceInfo {
class CudaCalcCustomManyParticleForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaCustomManyParticleForceInfo(const CustomManyParticleForce& force) : force(force) {
ForceInfo(const CustomManyParticleForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
vector<double> params1, params2;
......@@ -5412,7 +5588,8 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
paramVector[i][j] = (float) parameters[j];
}
params->setParameterValues(paramVector);
cu.addForce(new CudaCustomManyParticleForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
// Record the tabulated functions.
......@@ -5992,9 +6169,9 @@ void CudaCalcCustomManyParticleForceKernel::copyParametersToContext(ContextImpl&
cu.invalidateMolecules();
}
class CudaGayBerneForceInfo : public CudaForceInfo {
class CudaCalcGayBerneForceKernel::ForceInfo : public CudaForceInfo {
public:
CudaGayBerneForceInfo(const GayBerneForce& force) : force(force) {
ForceInfo(const GayBerneForce& force) : force(force) {
}
bool areParticlesIdentical(int particle1, int particle2) {
int xparticle1, yparticle1;
......@@ -6205,7 +6382,8 @@ void CudaCalcGayBerneForceKernel::initialize(const System& system, const GayBern
neighborsKernel = cu.getKernel(module, "findNeighbors");
forceKernel = cu.getKernel(module, "computeForce");
torqueKernel = cu.getKernel(module, "applyTorques");
cu.addForce(new CudaGayBerneForceInfo(force));
info = new ForceInfo(force);
cu.addForce(info);
cu.addReorderListener(new ReorderListener(*this));
}
......
......@@ -628,6 +628,10 @@ void CudaParallelCalcNonbondedForceKernel::getPMEParameters(double& alpha, int&
dynamic_cast<const CudaCalcNonbondedForceKernel&>(kernels[0].getImpl()).getPMEParameters(alpha, nx, ny, nz);
}
void CudaParallelCalcNonbondedForceKernel::getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
dynamic_cast<const CudaCalcNonbondedForceKernel&>(kernels[0].getImpl()).getLJPMEParameters(alpha, nx, ny, nz);
}
class CudaParallelCalcCustomNonbondedForceKernel::Task : public CudaContext::WorkTask {
public:
Task(ContextImpl& context, CudaCalcCustomNonbondedForceKernel& kernel, bool includeForce,
......
......@@ -247,6 +247,10 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
CHECK_RESULT(cuDeviceGetName(name, 1000, contexts[i]->getDevice()), "Error querying device name");
deviceName << name;
}
size_t printfsize;
cuCtxGetLimit(&printfsize, CU_LIMIT_PRINTF_FIFO_SIZE);
cuCtxSetLimit(CU_LIMIT_PRINTF_FIFO_SIZE, 10*printfsize);
useCpuPme = (cpuPmeProperty == "true" && !contexts[0]->getUseDoublePrecision());
disablePmeStream = (pmeStreamProperty == "true");
deterministicForces = (deterministicForcesProperty == "true");
......
......@@ -17,6 +17,26 @@
const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
#endif
real tempForce = 0.0f;
#if HAS_LENNARD_JONES
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space. The real terms have an additive contribution
// added in, but for excluded terms the multiplicative term is just subtracted.
// These factors are needed in both clauses of the needCorrection statement, so
// I declare them up here.
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = sigmaEpsilon1*sigmaEpsilon2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
#endif
#endif
if (needCorrection) {
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
......@@ -29,6 +49,13 @@
includeInteraction = false;
tempEnergy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*138.935456f*posq1.w*posq2.w;
}
#if HAS_LENNARD_JONES
#if DO_LJPME
// The multiplicative grid term
tempEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
#endif
}
else {
#if HAS_LENNARD_JONES
......@@ -36,7 +63,8 @@
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(sigmaEpsilon1.y*sigmaEpsilon2.y);
real eps = sigmaEpsilon1.y*sigmaEpsilon2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH
......@@ -48,6 +76,22 @@
ljEnergy *= switchValue;
}
#endif
#if DO_LJPME
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
#endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += includeInteraction ? ljEnergy + prefactor*erfcAlphaR : 0;
#else
......
......@@ -21,7 +21,11 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) {
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex
#ifdef USE_LJPME
, const float2* __restrict__ sigmaEpsilon
#endif
) {
real3 data[PME_ORDER];
const real scale = RECIP(PME_ORDER-1);
......@@ -62,7 +66,13 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
data[0] = scale*(make_real3(1)-dr)*data[0];
// Spread the charge from this atom onto each grid point.
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
const real charge = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
const real charge = pos.w;
#endif
for (int ix = 0; ix < PME_ORDER; ix++) {
int xbase = gridIndex.x+ix;
xbase -= (xbase >= GRID_SIZE_X ? GRID_SIZE_X : 0);
......@@ -80,7 +90,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = ybase + zindex;
real add = pos.w*dx*dy*data[iz].z;
real add = charge*dx*dy*data[iz].z;
#ifdef USE_DOUBLE_PRECISION
unsigned long long * ulonglong_p = (unsigned long long *) originalPmeGrid;
atomicAdd(&ulonglong_p[index], static_cast<unsigned long long>((long long) (add*0x100000000)));
......@@ -121,7 +131,15 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
#ifdef USE_LJPME
const real recipScaleFactor = -2*M_PI*SQRT(M_PI)*RECIP(6*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = RECIP(M_PI*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
#endif
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < gridSize; index += blockDim.x*gridDim.x) {
// real indices
......@@ -140,12 +158,23 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict
real bz = pmeBsplineModuliZ[kz];
real2 grid = halfcomplex_pmeGrid[index];
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = ERFC(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
halfcomplex_pmeGrid[index] = make_real2(grid.x*eterm, grid.y*eterm);
#else
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
if (kx != 0 || ky != 0 || kz != 0) {
halfcomplex_pmeGrid[index] = make_real2(grid.x*eterm, grid.y*eterm);
}
#endif
}
}
......@@ -156,8 +185,16 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
real4 periodicBoxSize, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
#ifdef USE_LJPME
const real recipScaleFactor = -2*M_PI*SQRT(M_PI)*RECIP(6*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
real bfac = M_PI / EWALD_ALPHA;
real fac1 = 2*M_PI*M_PI*M_PI*SQRT(M_PI);
real fac2 = EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA;
real fac3 = -2*EWALD_ALPHA*M_PI*M_PI;
#else
const real recipScaleFactor = RECIP(M_PI*periodicBoxSize.x*periodicBoxSize.y*periodicBoxSize.z);
#endif
mixed energy = 0;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < gridSize; index += blockDim.x*gridDim.x) {
// real indices
......@@ -175,8 +212,19 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz];
#ifdef USE_LJPME
real denom = recipScaleFactor/(bx*by*bz);
real m = SQRT(m2);
real m3 = m*m2;
real b = bfac*m;
real expfac = -b*b;
real expterm = EXP(expfac);
real erfcterm = ERFC(b);
real eterm = (fac1*erfcterm*m3 + expterm*(fac2 + fac3*m2)) * denom;
#else
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
#endif
if (kz >= (GRID_SIZE_Z/2+1)) {
kx = ((kx == 0) ? kx : GRID_SIZE_X-kx);
......@@ -185,11 +233,12 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
}
int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
real2 grid = halfcomplex_pmeGrid[indexInHalfComplexGrid];
if (kx != 0 || ky != 0 || kz != 0) {
#ifndef USE_LJPME
if (kx != 0 || ky != 0 || kz != 0)
#endif
energy += eterm*(grid.x*grid.x + grid.y*grid.y);
}
}
#ifdef USE_PME_STREAM
#if defined(USE_PME_STREAM) && !defined(USE_LJPME)
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] = 0.5f*energy;
#else
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*energy;
......@@ -199,7 +248,11 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
extern "C" __global__
void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, const real* __restrict__ originalPmeGrid,
real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex) {
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ, const int2* __restrict__ pmeAtomGridIndex
#ifdef USE_LJPME
, const float2* __restrict__ sigmaEpsilon
#endif
) {
real3 data[PME_ORDER];
real3 ddata[PME_ORDER];
const real scale = RECIP(PME_ORDER-1);
......@@ -271,7 +324,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
}
}
}
#ifdef USE_LJPME
const float2 sigEps = sigmaEpsilon[atom];
real q = 8*sigEps.x*sigEps.x*sigEps.x*sigEps.y;
#else
real q = pos.w*EPSILON_FACTOR;
#endif
real forceX = -q*(force.x*GRID_SIZE_X*recipBoxVecX.x);
real forceY = -q*(force.x*GRID_SIZE_X*recipBoxVecY.x+force.y*GRID_SIZE_Y*recipBoxVecY.y);
real forceZ = -q*(force.x*GRID_SIZE_X*recipBoxVecZ.x+force.y*GRID_SIZE_Y*recipBoxVecZ.y+force.z*GRID_SIZE_Z*recipBoxVecZ.z);
......
......@@ -73,4 +73,11 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
clearSingleBuffer(buffer6, size6);
}
/**
* Record the atomic charges into the posq array.
*/
__global__ void setCharges(real* __restrict__ charges, real4* __restrict__ posq, int* __restrict__ atomOrder, int numAtoms) {
for (int i = blockDim.x*blockIdx.x+threadIdx.x; i < numAtoms; i += blockDim.x*gridDim.x)
posq[i].w = charges[atomOrder[i]];
}
}
\ No newline at end of file
......@@ -14,8 +14,7 @@ SET_SOURCE_FILES_PROPERTIES(${CUDA_KERNELS_CPP} ${CUDA_KERNELS_H} PROPERTIES GEN
ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${API_ABS_INCLUDE_FILES})
TARGET_LINK_LIBRARIES(${STATIC_TARGET} ${OPENMM_LIBRARY_NAME} ${CUDA_CUDA_LIBRARY} ${CUDA_cufft_LIBRARY} ${PTHREADS_LIB_STATIC})
#-DPTW32_STATIC_LIB only works for the windows pthreads.
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CUDA_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB")
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES COMPILE_FLAGS "${EXTRA_COMPILE_FLAGS} -DOPENMM_CUDA_BUILDING_STATIC_LIBRARY")
IF (APPLE)
SET_TARGET_PROPERTIES(${STATIC_TARGET} PROPERTIES LINK_FLAGS "${EXTRA_COMPILE_FLAGS} -F/Library/Frameworks -framework CUDA")
ELSE (APPLE)
......
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CudaTests.h"
#include "TestDispersionPME.h"
void runPlatformTests() {
}
......@@ -609,6 +609,10 @@ public:
OpenCLNonbondedUtilities& getNonbondedUtilities() {
return *nonbonded;
}
/**
* Set the particle charges. These are packed into the fourth element of the posq array.
*/
void setCharges(const std::vector<double>& charges);
/**
* Get the thread used by this context for executing parallel computations.
*/
......@@ -692,6 +696,12 @@ public:
* and order to be revalidated.
*/
void invalidateMolecules();
/**
* Mark that the current molecule definitions from one particular force (and hence the atom order)
* may be invalid. This should be called whenever force field parameters change. It will cause the
* definitions and order to be revalidated.
*/
bool invalidateMolecules(OpenCLForceInfo* force);
private:
struct Molecule;
struct MoleculeGroup;
......@@ -739,6 +749,7 @@ private:
cl::Kernel clearSixBuffersKernel;
cl::Kernel reduceReal4Kernel;
cl::Kernel reduceForcesKernel;
cl::Kernel setChargesKernel;
std::vector<OpenCLForceInfo*> forces;
std::vector<Molecule> molecules;
std::vector<MoleculeGroup> moleculeGroups;
......@@ -754,6 +765,7 @@ private:
OpenCLArray* energyBuffer;
OpenCLArray* energyParamDerivBuffer;
OpenCLArray* atomIndexDevice;
OpenCLArray* chargeBuffer;
std::vector<std::string> energyParamDerivNames;
std::map<std::string, double> energyParamDerivWorkspace;
std::vector<int> atomIndex;
......
......@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2016 Stanford University and the Authors. *
* Portions copyright (c) 2008-2017 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
......@@ -176,7 +176,6 @@ public:
*/
void loadCheckpoint(ContextImpl& context, std::istream& stream);
private:
class GetPositionsTask;
OpenCLContext& cl;
};
......@@ -270,9 +269,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const HarmonicBondForce& force);
private:
class ForceInfo;
int numBonds;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLArray* params;
};
......@@ -310,9 +311,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomBondForce& force);
private:
class ForceInfo;
int numBonds;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLParameterSet* params;
OpenCLArray* globals;
......@@ -353,9 +356,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const HarmonicAngleForce& force);
private:
class ForceInfo;
int numAngles;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLArray* params;
};
......@@ -393,9 +398,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomAngleForce& force);
private:
class ForceInfo;
int numAngles;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLParameterSet* params;
OpenCLArray* globals;
......@@ -436,9 +443,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const PeriodicTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLArray* params;
};
......@@ -476,9 +485,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const RBTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLArray* params;
};
......@@ -516,9 +527,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CMAPTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
std::vector<mm_int2> mapPositionsVec;
OpenCLArray* coefficients;
......@@ -559,9 +572,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomTorsionForce& force);
private:
class ForceInfo;
int numTorsions;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLParameterSet* params;
OpenCLArray* globals;
......@@ -576,8 +591,9 @@ class OpenCLCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
OpenCLCalcNonbondedForceKernel(std::string name, const Platform& platform, OpenCLContext& cl, const System& system) : CalcNonbondedForceKernel(name, platform),
hasInitializedKernel(false), cl(cl), sigmaEpsilon(NULL), exceptionParams(NULL), cosSinSums(NULL), pmeGrid(NULL),
pmeGrid2(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeBsplineTheta(NULL),
pmeAtomRange(NULL), pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), pmeio(NULL) {
pmeGrid2(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeDispersionBsplineModuliX(NULL),
pmeDispersionBsplineModuliY(NULL), pmeDispersionBsplineModuliZ(NULL), pmeBsplineTheta(NULL), pmeAtomRange(NULL),
pmeAtomGridIndex(NULL), pmeEnergyBuffer(NULL), sort(NULL), fft(NULL), dispersionFft(NULL), pmeio(NULL) {
}
~OpenCLCalcNonbondedForceKernel();
/**
......@@ -607,13 +623,22 @@ public:
void copyParametersToContext(ContextImpl& context, const NonbondedForce& force);
/**
* Get the parameters being used for PME.
*
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class SortTrait : public OpenCLSort::SortTrait {
int getDataSize() const {return 8;}
......@@ -625,12 +650,14 @@ private:
const char* getMaxValue() const {return "(int2) (INT_MAX, INT_MAX)";}
const char* getSortKey() const {return "value.y";}
};
class ForceInfo;
class PmeIO;
class PmePreComputation;
class PmePostComputation;
class SyncQueuePreComputation;
class SyncQueuePostComputation;
OpenCLContext& cl;
ForceInfo* info;
bool hasInitializedKernel;
OpenCLArray* sigmaEpsilon;
OpenCLArray* exceptionParams;
......@@ -640,6 +667,9 @@ private:
OpenCLArray* pmeBsplineModuliX;
OpenCLArray* pmeBsplineModuliY;
OpenCLArray* pmeBsplineModuliZ;
OpenCLArray* pmeDispersionBsplineModuliX;
OpenCLArray* pmeDispersionBsplineModuliY;
OpenCLArray* pmeDispersionBsplineModuliZ;
OpenCLArray* pmeBsplineTheta;
OpenCLArray* pmeAtomRange;
OpenCLArray* pmeAtomGridIndex;
......@@ -648,25 +678,34 @@ private:
cl::CommandQueue pmeQueue;
cl::Event pmeSyncEvent;
OpenCLFFT3D* fft;
OpenCLFFT3D* dispersionFft;
Kernel cpuPme;
PmeIO* pmeio;
SyncQueuePostComputation* syncQueue;
cl::Kernel ewaldSumsKernel;
cl::Kernel ewaldForcesKernel;
cl::Kernel pmeGridIndexKernel;
cl::Kernel pmeAtomRangeKernel;
cl::Kernel pmeDispersionAtomRangeKernel;
cl::Kernel pmeZIndexKernel;
cl::Kernel pmeDispersionZIndexKernel;
cl::Kernel pmeUpdateBsplinesKernel;
cl::Kernel pmeDispersionUpdateBsplinesKernel;
cl::Kernel pmeSpreadChargeKernel;
cl::Kernel pmeDispersionSpreadChargeKernel;
cl::Kernel pmeFinishSpreadChargeKernel;
cl::Kernel pmeDispersionFinishSpreadChargeKernel;
cl::Kernel pmeConvolutionKernel;
cl::Kernel pmeDispersionConvolutionKernel;
cl::Kernel pmeEvalEnergyKernel;
cl::Kernel pmeDispersionEvalEnergyKernel;
cl::Kernel pmeInterpolateForceKernel;
cl::Kernel pmeDispersionInterpolateForceKernel;
std::map<std::string, std::string> pmeDefines;
std::vector<std::pair<int, int> > exceptionAtoms;
double ewaldSelfEnergy, dispersionCoefficient, alpha;
double ewaldSelfEnergy, dispersionCoefficient, alpha, dispersionAlpha;
int gridSizeX, gridSizeY, gridSizeZ;
bool hasCoulomb, hasLJ, usePmeQueue;
int dispersionGridSizeX, dispersionGridSizeY, dispersionGridSizeZ;
bool hasCoulomb, hasLJ, usePmeQueue, doLJPME;
NonbondedMethod nonbondedMethod;
static const int PmeOrder = 5;
};
......@@ -704,8 +743,10 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomNonbondedForce& force);
private:
class ForceInfo;
void initInteractionGroups(const CustomNonbondedForce& force, const std::string& interactionSource, const std::vector<std::string>& tableTypes);
OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* params;
OpenCLArray* globals;
OpenCLArray* interactionGroupData;
......@@ -756,10 +797,12 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const GBSAOBCForce& force);
private:
class ForceInfo;
double prefactor, surfaceAreaFactor, cutoff;
bool hasCreatedKernels;
int maxTiles;
OpenCLContext& cl;
ForceInfo* info;
OpenCLArray* params;
OpenCLArray* bornSum;
OpenCLArray* longBornSum;
......@@ -807,10 +850,12 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomGBForce& force);
private:
class ForceInfo;
double cutoff;
bool hasInitializedKernels, needParameterGradient, needEnergyParamDerivs;
int maxTiles, numComputedValues;
OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* params;
OpenCLParameterSet* computedValues;
OpenCLParameterSet* energyDerivs;
......@@ -864,9 +909,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomExternalForce& force);
private:
class ForceInfo;
int numParticles;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
const System& system;
OpenCLParameterSet* params;
OpenCLArray* globals;
......@@ -908,9 +955,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const CustomHbondForce& force);
private:
class ForceInfo;
int numDonors, numAcceptors;
bool hasInitializedKernel;
OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* donorParams;
OpenCLParameterSet* acceptorParams;
OpenCLArray* globals;
......@@ -961,9 +1010,11 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomCentroidBondForce& force);
private:
class ForceInfo;
int numGroups, numBonds;
bool needEnergyParamDerivs;
OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* params;
OpenCLArray* globals;
OpenCLArray* groupParticles;
......@@ -1013,8 +1064,10 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomCompoundBondForce& force);
private:
class ForceInfo;
int numBonds;
OpenCLContext& cl;
ForceInfo* info;
OpenCLParameterSet* params;
OpenCLArray* globals;
std::vector<std::string> globalParamNames;
......@@ -1059,7 +1112,9 @@ public:
void copyParametersToContext(ContextImpl& context, const CustomManyParticleForce& force);
private:
class ForceInfo;
OpenCLContext& cl;
ForceInfo* info;
bool hasInitializedKernel;
NonbondedMethod nonbondedMethod;
int maxNeighborPairs, forceWorkgroupSize, findNeighborsWorkgroupSize;
......@@ -1119,9 +1174,11 @@ public:
*/
void copyParametersToContext(ContextImpl& context, const GayBerneForce& force);
private:
class ForceInfo;
class ReorderListener;
void sortAtoms();
OpenCLContext& cl;
ForceInfo* info;
bool hasInitializedKernels;
int numRealParticles, maxNeighborBlocks;
GayBerneForce::NonbondedMethod nonbondedMethod;
......
......@@ -431,13 +431,22 @@ public:
void copyParametersToContext(ContextImpl& context, const NonbondedForce& force);
/**
* Get the parameters being used for PME.
*
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
/**
* Get the parameters being used for the dispersion term in LJPME.
*
* @param alpha the separation parameter
* @param nx the number of grid points along the X axis
* @param ny the number of grid points along the Y axis
* @param nz the number of grid points along the Z axis
*/
void getLJPMEParameters(double& alpha, int& nx, int& ny, int& nz) const;
private:
class Task;
OpenCLPlatform::PlatformData& data;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment