Commit 505c2afe authored by Peter Eastman's avatar Peter Eastman
Browse files

Converted WcaDispersionForce to new CUDA platform (not yet fully debugged)

parent 1ec69c95
...@@ -41,10 +41,8 @@ extern "C" OPENMM_EXPORT void registerKernelFactories() { ...@@ -41,10 +41,8 @@ extern "C" OPENMM_EXPORT void registerKernelFactories() {
Platform& platform = Platform::getPlatformByName("CUDA"); Platform& platform = Platform::getPlatformByName("CUDA");
AmoebaCudaKernelFactory* factory = new AmoebaCudaKernelFactory(); AmoebaCudaKernelFactory* factory = new AmoebaCudaKernelFactory();
platform.registerKernelFactory(CalcAmoebaHarmonicBondForceKernel::Name(), factory); platform.registerKernelFactory(CalcAmoebaHarmonicBondForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaUreyBradleyForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaHarmonicAngleForceKernel::Name(), factory); platform.registerKernelFactory(CalcAmoebaHarmonicAngleForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaHarmonicInPlaneAngleForceKernel::Name(), factory); platform.registerKernelFactory(CalcAmoebaHarmonicInPlaneAngleForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaTorsionForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaPiTorsionForceKernel::Name(), factory); platform.registerKernelFactory(CalcAmoebaPiTorsionForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaStretchBendForceKernel::Name(), factory); platform.registerKernelFactory(CalcAmoebaStretchBendForceKernel::Name(), factory);
platform.registerKernelFactory(CalcAmoebaOutOfPlaneBendForceKernel::Name(), factory); platform.registerKernelFactory(CalcAmoebaOutOfPlaneBendForceKernel::Name(), factory);
...@@ -82,9 +80,6 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl ...@@ -82,9 +80,6 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
if (name == CalcAmoebaHarmonicInPlaneAngleForceKernel::Name()) if (name == CalcAmoebaHarmonicInPlaneAngleForceKernel::Name())
return new CudaCalcAmoebaHarmonicInPlaneAngleForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcAmoebaHarmonicInPlaneAngleForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcAmoebaTorsionForceKernel::Name())
// return new CudaCalcAmoebaTorsionForceKernel(name, platform, cu, context.getSystem());
if (name == CalcAmoebaPiTorsionForceKernel::Name()) if (name == CalcAmoebaPiTorsionForceKernel::Name())
return new CudaCalcAmoebaPiTorsionForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcAmoebaPiTorsionForceKernel(name, platform, cu, context.getSystem());
...@@ -106,11 +101,8 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl ...@@ -106,11 +101,8 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
if (name == CalcAmoebaVdwForceKernel::Name()) if (name == CalcAmoebaVdwForceKernel::Name())
return new CudaCalcAmoebaVdwForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcAmoebaVdwForceKernel(name, platform, cu, context.getSystem());
// if (name == CalcAmoebaWcaDispersionForceKernel::Name()) if (name == CalcAmoebaWcaDispersionForceKernel::Name())
// return new CudaCalcAmoebaWcaDispersionForceKernel(name, platform, cu, context.getSystem()); return new CudaCalcAmoebaWcaDispersionForceKernel(name, platform, cu, context.getSystem());
//
// if (name == CalcAmoebaUreyBradleyForceKernel::Name())
// return new CudaCalcAmoebaUreyBradleyForceKernel(name, platform, cu, context.getSystem());
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str()); throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
} }
...@@ -1867,75 +1867,82 @@ double CudaCalcAmoebaVdwForceKernel::execute(ContextImpl& context, bool includeF ...@@ -1867,75 +1867,82 @@ double CudaCalcAmoebaVdwForceKernel::execute(ContextImpl& context, bool includeF
return dispersionCoefficient/(box.x*box.y*box.z); return dispersionCoefficient/(box.x*box.y*box.z);
} }
///* -------------------------------------------------------------------------- * /* -------------------------------------------------------------------------- *
// * AmoebaWcaDispersion * * AmoebaWcaDispersion *
// * -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
//
//static void computeAmoebaWcaDispersionForce( CudaContext& cu ) { class CudaCalcAmoebaWcaDispersionForceKernel::ForceInfo : public CudaForceInfo {
// public:
// data.initializeGpu(); ForceInfo(const AmoebaWcaDispersionForce& force) : force(force) {
// if( 0 && data.getLog() ){ }
// (void) fprintf( data.getLog(), "Calling computeAmoebaWcaDispersionForce " ); (void) fflush( data.getLog() ); bool areParticlesIdentical(int particle1, int particle2) {
// } double radius1, radius2, epsilon1, epsilon2;
// force.getParticleParameters(particle1, radius1, epsilon1);
// kCalculateAmoebaWcaDispersionForces( data.getAmoebaGpu() ); force.getParticleParameters(particle2, radius2, epsilon2);
// return (radius1 == radius2 && epsilon1 == epsilon2);
// if( 0 && data.getLog() ){ }
// (void) fprintf( data.getLog(), " -- completed\n" ); (void) fflush( data.getLog() ); private:
// } const AmoebaWcaDispersionForce& force;
//} };
//
//class CudaCalcAmoebaWcaDispersionForceKernel::ForceInfo : public CudaForceInfo { CudaCalcAmoebaWcaDispersionForceKernel::CudaCalcAmoebaWcaDispersionForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) :
//public: CalcAmoebaWcaDispersionForceKernel(name, platform), cu(cu), system(system), radiusEpsilon(NULL) {
// ForceInfo(const AmoebaWcaDispersionForce& force) : force(force) { }
// }
// bool areParticlesIdentical(int particle1, int particle2) { CudaCalcAmoebaWcaDispersionForceKernel::~CudaCalcAmoebaWcaDispersionForceKernel() {
// double radius1, radius2, epsilon1, epsilon2; if (radiusEpsilon != NULL)
// force.getParticleParameters(particle1, radius1, epsilon1); delete radiusEpsilon;
// force.getParticleParameters(particle2, radius2, epsilon2); }
// return (radius1 == radius2 && epsilon1 == epsilon2);
// } void CudaCalcAmoebaWcaDispersionForceKernel::initialize(const System& system, const AmoebaWcaDispersionForce& force) {
//private: int numParticles = system.getNumParticles();
// const AmoebaWcaDispersionForce& force; int paddedNumAtoms = cu.getPaddedNumAtoms();
//};
// // Record parameters.
//CudaCalcAmoebaWcaDispersionForceKernel::CudaCalcAmoebaWcaDispersionForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) :
// CalcAmoebaWcaDispersionForceKernel(name, platform), cu(cu), system(system) { vector<float2> radiusEpsilonVec(paddedNumAtoms, make_float2(0, 0));
// data.incrementKernelCount(); for (int i = 0; i < numParticles; i++) {
//} double radius, epsilon;
// force.getParticleParameters(i, radius, epsilon);
//CudaCalcAmoebaWcaDispersionForceKernel::~CudaCalcAmoebaWcaDispersionForceKernel() { radiusEpsilonVec[i] = make_float2((float) radius, (float) epsilon);
// data.decrementKernelCount(); }
//} radiusEpsilon = CudaArray::create<float2>(cu, paddedNumAtoms, "radiusEpsilon");
// radiusEpsilon->upload(radiusEpsilonVec);
//void CudaCalcAmoebaWcaDispersionForceKernel::initialize(const System& system, const AmoebaWcaDispersionForce& force) {
// // Create the kernel.
// // per-particle parameters
// map<string, string> defines;
// int numParticles = system.getNumParticles(); defines["NUM_ATOMS"] = cu.intToString(numParticles);
// std::vector<float> radii(numParticles); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
// std::vector<float> epsilons(numParticles); defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize());
// for( int ii = 0; ii < numParticles; ii++ ){ defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
// defines["EPSO"] = cu.doubleToString(force.getEpso());
// double radius, epsilon; defines["EPSH"] = cu.doubleToString(force.getEpsh());
// force.getParticleParameters( ii, radius, epsilon ); defines["RMINO"] = cu.doubleToString(force.getRmino());
// defines["RMINH"] = cu.doubleToString(force.getRminh());
// radii[ii] = static_cast<float>( radius ); defines["AWATER"] = cu.doubleToString(force.getAwater());
// epsilons[ii] = static_cast<float>( epsilon ); defines["SHCTD"] = cu.doubleToString(force.getEpso());
// } defines["EPSO"] = cu.doubleToString(force.getShctd());
// float totalMaximumDispersionEnergy = static_cast<float>( AmoebaWcaDispersionForceImpl::getTotalMaximumDispersionEnergy( force ) ); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::amoebaWcaForce, defines);
// gpuSetAmoebaWcaDispersionParameters( data.getAmoebaGpu(), radii, epsilons, totalMaximumDispersionEnergy, forceKernel = cu.getKernel(module, "computeWCAForce");
// static_cast<float>( force.getEpso( )), totalMaximumDispersionEnergy = AmoebaWcaDispersionForceImpl::getTotalMaximumDispersionEnergy(force);
// static_cast<float>( force.getEpsh( )),
// static_cast<float>( force.getRmino( )), // Add an interaction to the default nonbonded kernel. This doesn't actually do any calculations. It's
// static_cast<float>( force.getRminh( )), // just so that CudaNonbondedUtilities will keep track of the tiles.
// static_cast<float>( force.getAwater( )),
// static_cast<float>( force.getShctd( )), vector<vector<int> > exclusions;
// static_cast<float>( force.getDispoff( ) ) ); cu.getNonbondedUtilities().addInteraction(false, false, false, 0, exclusions, "", force.getForceGroup());
// data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force)); cu.addForce(new ForceInfo(force));
//} }
//
//double CudaCalcAmoebaWcaDispersionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) { double CudaCalcAmoebaWcaDispersionForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
// computeAmoebaWcaDispersionForce( data ); CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
// return 0.0; int startTileIndex = nb.getStartTileIndex();
//} int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks();
int forceThreadBlockSize = nb.getForceThreadBlockSize();
void* forceArgs[] = {&cu.getForce().getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&cu.getPosq().getDevicePointer(), &startTileIndex, &numTileIndices, &radiusEpsilon->getDevicePointer()};
cu.executeKernel(forceKernel, forceArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
return totalMaximumDispersionEnergy;
}
...@@ -524,6 +524,9 @@ private: ...@@ -524,6 +524,9 @@ private:
class ForceInfo; class ForceInfo;
CudaContext& cu; CudaContext& cu;
System& system; System& system;
double totalMaximumDispersionEnergy;
CudaArray* radiusEpsilon;
CUfunction forceKernel;
}; };
} // namespace OpenMM } // namespace OpenMM
......
#define TILE_SIZE 32
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct {
real3 pos, force;
float radius, epsilon, padding;
} AtomData;
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const float2* __restrict__ radiusEpsilon) {
real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
float2 temp = radiusEpsilon[atom];
data.radius = temp.x;
data.epsilon = temp.y;
}
__device__ void initParticleParameters(float radius, float epsilon, real& rmixo, real& rmixh, real& emixo, real& emixh) {
real sqrtEps = SQRT(epsilon);
real denominator = SQRT(EPSO) + sqrtEps;
emixo = 4*EPSO*epsilon / (denominator*denominator);
denominator = SQRT(EPSH) + sqrtEps;
emixh = 4*EPSH*epsilon / (denominator*denominator);
real radius2 = radius*radius;
real rmino2 = RMINO*RMINO;
rmixo = 2*(rmino2*RMINO + radius2*radius) / (rmino2 + radius2);
real rminh2 = RMINH*RMINH;
rmixh = 2*(rminh2*RMINH + radius2*radius) / (rminh2+radius2);
}
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real rmixo, real rmixh, real emixo, real emixh, real3& force, real& energy) {
// get deltaR and r between 2 atoms
force = atom2.pos - atom1.pos;
real r2 = dot(force, force);
if (r2 <= 0) {
force = make_real3(0);
energy = 0;
return;
}
real rI = RSQRT(r2);
real r = RECIP(rI);
real sk = atom2.radius*SHCTD;
real sk2 = sk*sk;
if (atom1.radius >= (r+sk)) {
force = make_real3(0);
energy = 0;
return;
}
real rmax = atom1.radius > (r - sk) ? atom1.radius : (r - sk);
real lik = rmax;
real lik2 = lik*lik;
real lik3 = lik2*lik;
real lik4 = lik2*lik2;
real uik = (r+sk) < rmixo ? (r+sk) : rmixo;
real uik2 = uik*uik;
real uik3 = uik2*uik;
real uik4 = uik2*uik2;
real term = 4*M_PI/(48*r)*(3*(lik4-uik4) - 8*r*(lik3-uik3) + 6*(r2-sk2)*(lik2-uik2));
real r3 = r2*r;
real dl1 = lik2*(-lik2 + 2*(r2 + sk2));
real dl2 = lik*(-lik3 + 4*lik2*r - 6*lik*r2 + 2*lik*sk2 + 4*r3 - 4*r*sk2);
real dl = atom1.radius > (r-sk)? dl1 : dl2;
real du1 = uik2*(-uik2 + 2*(r2 + sk2));
real du2 = uik*(-uik3 + 4*uik2*r - 2*uik*(3*r2 - sk2) + 4*r*(r2 - sk2));
real du = (r+sk) > rmixo ? -du1 : -du2;
real mask2 = lik < rmixo ? 1 : 0;
real sum = -mask2*(emixo*term);
real de = -mask2*emixo*M_PI*(dl+du)/(4*r2);
uik = (r+sk) < rmixh ? (r+sk) : rmixh;
uik2 = uik*uik;
uik3 = uik2*uik;
uik4 = uik2*uik2;
term = (M_PI)/ (12*r) * (3*(lik4-uik4) - 8*r*(lik3-uik3) + 6*(r2-sk2)*(lik2-uik2));
dl1 = lik2*(-lik2 + 2*r2 + 2*sk2);
dl2 = lik*(-lik3 + 4*lik2*r - 6*lik*r2 + 2*lik*sk2 + 4*r3 - 4*r*sk2);
dl = atom1.radius > (r-sk) ? dl1 : dl2;
du1 = -uik2*(-uik2 + 2*r2 + 2*sk2);
du2 = -uik*(-uik3 + 4*uik2*r - 6*uik*r2 + 2*uik*sk2 + 4*r3 - 4*r*sk2);
du = (r+sk) > rmixh ? du1 : du2;
mask2 = lik < rmixh ? 1 : 0;
sum -= mask2*(2*emixh*term);
de -= mask2*(2*emixh*M_PI*(dl+du)/(4*r2));
uik = r + sk;
uik2 = uik*uik;
uik3 = uik2*uik;
uik4 = uik2*uik2;
real uik5 = uik4*uik;
real uik6 = uik3*uik3;
real uik10 = uik5*uik5;
real uik11 = uik10*uik;
real uik12 = uik6*uik6;
real uik13 = uik12*uik;
lik = rmax > rmixo ? rmax : rmixo;
lik2 = lik*lik;
lik3 = lik2*lik;
lik4 = lik2*lik2;
real lik5 = lik4*lik;
real lik6 = lik3*lik3;
real lik10 = lik5*lik5;
real lik11 = lik10*lik;
real lik12 = lik6*lik6;
real lik13 = lik12*lik;
term = 4*M_PI/(120*r*lik5*uik5)*(15*uik*lik*r*(uik4-lik4) - 10*uik2*lik2*(uik3-lik3) + 6*(sk2-r2)*(uik5-lik5));
dl1 = (-5*lik2 + 3*r2 + 3*sk2)/lik5;
dl2 = (5*lik3 - 33*lik*r2 - 3*lik*sk2 + 15*(lik2*r+r3-r*sk2))/lik6;
dl = (atom1.radius > (r-sk)) || (rmax < rmixo) ? -dl1 : dl2;
du = (-5*uik3 + 33*uik*r2 + 3*uik*sk2 - 15*(uik2*r+r3-r*sk2))/uik6;
real rmixo7 = rmixo*rmixo*rmixo;
rmixo7 = rmixo7*rmixo7*rmixo;
real ao = emixo*rmixo7;
real idisp = -2*ao*term;
mask2 = uik > rmixo ? 1 : 0;
de -= mask2*(2*ao*M_PI*(dl + du)/(15*r2));
term = 4*M_PI/(2640*r*lik12*uik12) * (120*uik*lik*r*(uik11-lik11) - 66*uik2*lik2*(uik10-lik10) + 55*(sk2-r2)*(uik12-lik12));
dl1 = (6*lik2 - 5*r2 - 5*sk2)/lik12;
dl2 = (6*lik3 - 125*lik*r2 - 5*lik*sk2 + 60*(lik2*r+r3-r*sk2))/lik13;
dl = (atom1.radius > (r-sk)) || (rmax < rmixo) ? dl1 : dl2;
du = (-6*uik3 + 125*uik*r2 + 5*uik*sk2 - 60*(uik2*r+r3-r*sk2))/uik13;
de += mask2*(ao*rmixo7*M_PI*(dl + du)/(60*r2));
real irep = ao*rmixo7*term;
sum += mask2*(irep + idisp);
lik = rmax > rmixh ? rmax : rmixh;
lik2 = lik*lik;
lik3 = lik2*lik;
lik4 = lik2*lik2;
lik5 = lik4*lik;
lik6 = lik3*lik3;
lik10 = lik5*lik5;
lik11 = lik10*lik;
lik12 = lik6*lik6;
lik13 = lik12*lik;
term = 4*M_PI / (120*r*lik5*uik5) * (15*uik*lik*r*(uik4-lik4) - 10*uik2*lik2*(uik3-lik3) + 6*(sk2-r2)*(uik5-lik5));
dl1 = (-5*lik2 + 3*r2 + 3*sk2)/lik5;
dl2 = (5*lik3 - 33*lik*r2 - 3*lik*sk2+ 15*(lik2*r+r3-r*sk2))/lik6;
dl = (atom1.radius > (r-sk)) || (rmax < rmixh) ? -dl1 : dl2;
du = -(5*uik3 - 33*uik*r2 - 3*uik*sk2 + 15*(uik2*r+r3-r*sk2))/uik6;
real rmixh7 = rmixh*rmixh*rmixh;
rmixh7 = rmixh7*rmixh7*rmixh;
real ah = emixh*rmixh7;
idisp = -4*ah*term;
mask2 = uik > rmixh ? 1 : 0;
de -= mask2*(4*ah*M_PI*(dl + du)/(15*r2));
term = 4*M_PI / (2640*r*lik12*uik12) * (120*uik*lik*r*(uik11-lik11) - 66*uik2*lik2*(uik10-lik10) + 55*(sk2-r2)*(uik12-lik12));
dl1 = -(-6*lik2 + 5*r2 + 5*sk2)/lik12;
dl2 = (6*lik3 - 125*lik*r2 - 5*lik*sk2 + 60*(lik2*r+r3-r*sk2))/lik13;
dl = ((atom1.radius > (r-sk)) || (rmax < rmixh)) ? dl1 : dl2;
du = -(6*uik3 - 125*uik*r2 -5*uik*sk2 + 60*(uik2*r+r3-r*sk2))/uik13;
irep = 2*ah*rmixh7*term;
de += mask2*(ah*rmixh7*M_PI*(dl+du)/(30*r2));
sum += mask2*(irep+idisp);
energy = sum;
de *= -AWATER*rI;
force *= de;
}
/**
* Compute WCA interaction.
*/
extern "C" __global__ void computeWCAForce(unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer,
const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices, const float2* __restrict__ radiusEpsilon) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int numTiles = numTileIndices;
unsigned int pos = startTileIndex+warp*numTiles/totalWarps;
unsigned int end = startTileIndex+(warp+1)*numTiles/totalWarps;
real energy = 0;
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
const unsigned int localGroupIndex = threadIdx.x/TILE_SIZE;
unsigned int x, y;
AtomData data;
if (pos < end) {
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData(data, atom1, posq, radiusEpsilon);
loadAtomData(localData[threadIdx.x], y*TILE_SIZE+tgx, posq, radiusEpsilon);
real emixo, emixh, rmixo, rmixh;
initParticleParameters(data.radius, data.epsilon, rmixo, rmixh, emixo, emixh);
data.force = make_real3(0);
localData[threadIdx.x].force = make_real3(0);
// Compute forces.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
if (atom2 != 0)
continue;
computeOneInteraction(data, localData[tbx+j], rmixo, rmixh, emixo, emixh, tempForce, tempEnergy);
data.force += tempForce;
localData[tbx+j].force -= tempForce;
energy += (x == y ? 0.5f*tempEnergy : tempEnergy);
real emjxo, emjxh, rmjxo, rmjxh;
initParticleParameters(localData[tbx+j].radius, localData[tbx+j].epsilon, rmjxo, rmjxh, emjxo, emjxh);
computeOneInteraction(localData[tbx+j], data, rmjxo, rmjxh, emjxo, emjxh, tempForce, tempEnergy);
data.force -= tempForce;
localData[tbx+j].force += tempForce;
energy += (x == y ? 0.5f*tempEnergy : tempEnergy);
}
}
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
pos++;
} while (pos < end);
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Mark Friedrichs *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of CudaAmoebaWcaDispersionForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "OpenMMAmoeba.h"
#include "AmoebaTinkerParameterFile.h"
#include "openmm/System.h"
#include "openmm/AmoebaWcaDispersionForce.h"
#include "openmm/LangevinIntegrator.h"
#include <iostream>
#include <vector>
#include <stdlib.h>
#include <stdio.h>
#define ASSERT_EQUAL_TOL_MOD(expected, found, tol, testname) {double _scale_ = std::abs(expected) > 1.0 ? std::abs(expected) : 1.0; if (!(std::abs((expected)-(found))/_scale_ <= (tol))) {std::stringstream details; details << testname << " Expected "<<(expected)<<", found "<<(found); throwException(__FILE__, __LINE__, details.str());}};
#define ASSERT_EQUAL_VEC_MOD(expected, found, tol,testname) {ASSERT_EQUAL_TOL_MOD((expected)[0], (found)[0], (tol),(testname)); ASSERT_EQUAL_TOL_MOD((expected)[1], (found)[1], (tol),(testname)); ASSERT_EQUAL_TOL_MOD((expected)[2], (found)[2], (tol),(testname));};
using namespace OpenMM;
const double TOL = 1e-4;
void setupAndGetForcesEnergyWcaDispersionAmmonia( std::vector<Vec3>& forces, double& energy, FILE* log ){
// beginning of WcaDispersion setup
System system;
AmoebaWcaDispersionForce* amoebaWcaDispersionForce = new AmoebaWcaDispersionForce();;
int numberOfParticles = 8;
amoebaWcaDispersionForce->setEpso( 4.6024000e-01 );
amoebaWcaDispersionForce->setEpsh( 5.6484000e-02 );
amoebaWcaDispersionForce->setRmino( 1.7025000e-01 );
amoebaWcaDispersionForce->setRminh( 1.3275000e-01 );
amoebaWcaDispersionForce->setDispoff( 2.6000000e-02 );
amoebaWcaDispersionForce->setAwater( 3.3428000e+01 );
amoebaWcaDispersionForce->setSlevy( 1.0000000e+00 );
amoebaWcaDispersionForce->setShctd( 8.1000000e-01 );
// addParticle: radius, epsilon
for( unsigned int ii = 0; ii < 2; ii++ ){
system.addParticle( 1.4007000e+01 );
amoebaWcaDispersionForce->addParticle( 1.8550000e-01, 4.3932000e-01 );
system.addParticle( 1.0080000e+00 );
amoebaWcaDispersionForce->addParticle( 1.3500000e-01, 8.3680000e-02 );
system.addParticle( 1.0080000e+00 );
amoebaWcaDispersionForce->addParticle( 1.3500000e-01, 8.3680000e-02 );
system.addParticle( 1.0080000e+00 );
amoebaWcaDispersionForce->addParticle( 1.3500000e-01, 8.3680000e-02 );
}
std::vector<Vec3> positions(numberOfParticles);
positions[0] = Vec3( 1.5927280e-01, 1.7000000e-06, 1.6491000e-03 );
positions[1] = Vec3( 2.0805540e-01, -8.1258800e-02, 3.7282500e-02 );
positions[2] = Vec3( 2.0843610e-01, 8.0953200e-02, 3.7462200e-02 );
positions[3] = Vec3( 1.7280780e-01, 2.0730000e-04, -9.8741700e-02 );
positions[4] = Vec3( -1.6743680e-01, 1.5900000e-05, -6.6149000e-03 );
positions[5] = Vec3( -2.0428260e-01, 8.1071500e-02, 4.1343900e-02 );
positions[6] = Vec3( -6.7308300e-02, 1.2800000e-05, 1.0623300e-02 );
positions[7] = Vec3( -2.0426290e-01, -8.1231400e-02, 4.1033500e-02 );
system.addForce(amoebaWcaDispersionForce);
std::string platformName;
platformName = "CUDA";
LangevinIntegrator integrator(0.0, 0.1, 0.01);
Context context(system, integrator, Platform::getPlatformByName( platformName ) );
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
forces = state.getForces();
energy = state.getPotentialEnergy();
}
void compareForcesEnergy( std::string& testName, double expectedEnergy, double energy,
std::vector<Vec3>& expectedForces,
std::vector<Vec3>& forces, double tolerance, FILE* log ) {
#ifdef AMOEBA_DEBUG
if( log ){
(void) fprintf( log, "%s: expected energy=%14.7e %14.7e\n", testName.c_str(), expectedEnergy, energy );
for( unsigned int ii = 0; ii < forces.size(); ii++ ){
(void) fprintf( log, "%6u [%14.7e %14.7e %14.7e] [%14.7e %14.7e %14.7e]\n", ii,
expectedForces[ii][0], expectedForces[ii][1], expectedForces[ii][2], forces[ii][0], forces[ii][1], forces[ii][2] );
}
(void) fflush( log );
}
#endif
for( unsigned int ii = 0; ii < forces.size(); ii++ ){
ASSERT_EQUAL_VEC_MOD( expectedForces[ii], forces[ii], tolerance, testName );
}
ASSERT_EQUAL_TOL_MOD( expectedEnergy, energy, tolerance, testName );
}
// test Wca dispersion
void testWcaDispersionAmmonia( FILE* log ) {
std::string testName = "testWcaDispersionAmmonia";
int numberOfParticles = 8;
std::vector<Vec3> forces;
double energy;
setupAndGetForcesEnergyWcaDispersionAmmonia( forces, energy, log );
std::vector<Vec3> expectedForces(numberOfParticles);
// TINKER-computed values
double expectedEnergy = -2.6981209e+01;
expectedForces[0] = Vec3( 4.7839388e+00, -7.3510133e-04, -5.0382764e-01 );
expectedForces[1] = Vec3( 1.4657758e+00, 1.2431003e+00, -6.7075886e-01 );
expectedForces[2] = Vec3( 1.4563936e+00, -1.2399917e+00, -6.7443841e-01 );
expectedForces[3] = Vec3( 2.1116744e+00, -2.7407512e-03, 1.3271245e+00 );
expectedForces[4] = Vec3( -4.7528440e+00, -1.5148066e-03, 1.2653813e+00 );
expectedForces[5] = Vec3( -1.1875619e+00, -1.2866678e+00, -3.9109060e-01 );
expectedForces[6] = Vec3( -2.6885679e+00, -4.3038639e-04, 3.3763583e-02 );
expectedForces[7] = Vec3( -1.1888087e+00, 1.2889802e+00, -3.8615387e-01 );
double tolerance = 1.0e-04;
compareForcesEnergy( testName, expectedEnergy, energy, expectedForces, forces, tolerance, log );
}
int main( int numberOfArguments, char* argv[] ) {
try {
std::cout << "TestCudaAmoebaWcaDispersionForce running test..." << std::endl;
registerAmoebaCudaKernelFactories();
FILE* log = NULL;
// test Wca dispersion force using two ammonia molecules
testWcaDispersionAmmonia( log );
} catch(const std::exception& e) {
std::cout << "exception: " << e.what() << std::endl;
std::cout << "FAIL - ERROR. Test failed." << std::endl;
return 1;
}
std::cout << "Done" << std::endl;
return 0;
}
/* -------------------------------------------------------------------------- *
* OpenMMAmoeba *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Mark Friedrichs *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the Reference implementation of CudaAmoebaWcaDispersionForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "OpenMMAmoeba.h"
#include "openmm/System.h"
#include "openmm/AmoebaWcaDispersionForce.h"
#include "openmm/LangevinIntegrator.h"
#include <iostream>
#include <vector>
#include <stdlib.h>
#include <stdio.h>
#define ASSERT_EQUAL_TOL_MOD(expected, found, tol, testname) {double _scale_ = std::abs(expected) > 1.0 ? std::abs(expected) : 1.0; if (!(std::abs((expected)-(found))/_scale_ <= (tol))) {std::stringstream details; details << testname << " Expected "<<(expected)<<", found "<<(found); throwException(__FILE__, __LINE__, details.str());}};
#define ASSERT_EQUAL_VEC_MOD(expected, found, tol,testname) {ASSERT_EQUAL_TOL_MOD((expected)[0], (found)[0], (tol),(testname)); ASSERT_EQUAL_TOL_MOD((expected)[1], (found)[1], (tol),(testname)); ASSERT_EQUAL_TOL_MOD((expected)[2], (found)[2], (tol),(testname));};
using namespace OpenMM;
const double TOL = 1e-4;
void setupAndGetForcesEnergyWcaDispersionAmmonia( std::vector<Vec3>& forces, double& energy, FILE* log ){
// beginning of WcaDispersion setup
System system;
AmoebaWcaDispersionForce* amoebaWcaDispersionForce = new AmoebaWcaDispersionForce();;
int numberOfParticles = 8;
amoebaWcaDispersionForce->setEpso( 4.6024000e-01 );
amoebaWcaDispersionForce->setEpsh( 5.6484000e-02 );
amoebaWcaDispersionForce->setRmino( 1.7025000e-01 );
amoebaWcaDispersionForce->setRminh( 1.3275000e-01 );
amoebaWcaDispersionForce->setDispoff( 2.6000000e-02 );
amoebaWcaDispersionForce->setAwater( 3.3428000e+01 );
amoebaWcaDispersionForce->setSlevy( 1.0000000e+00 );
amoebaWcaDispersionForce->setShctd( 8.1000000e-01 );
// addParticle: radius, epsilon
for( unsigned int ii = 0; ii < 2; ii++ ){
system.addParticle( 1.4007000e+01 );
amoebaWcaDispersionForce->addParticle( 1.8550000e-01, 4.3932000e-01 );
system.addParticle( 1.0080000e+00 );
amoebaWcaDispersionForce->addParticle( 1.3500000e-01, 8.3680000e-02 );
system.addParticle( 1.0080000e+00 );
amoebaWcaDispersionForce->addParticle( 1.3500000e-01, 8.3680000e-02 );
system.addParticle( 1.0080000e+00 );
amoebaWcaDispersionForce->addParticle( 1.3500000e-01, 8.3680000e-02 );
}
std::vector<Vec3> positions(numberOfParticles);
positions[0] = Vec3( 1.5927280e-01, 1.7000000e-06, 1.6491000e-03 );
positions[1] = Vec3( 2.0805540e-01, -8.1258800e-02, 3.7282500e-02 );
positions[2] = Vec3( 2.0843610e-01, 8.0953200e-02, 3.7462200e-02 );
positions[3] = Vec3( 1.7280780e-01, 2.0730000e-04, -9.8741700e-02 );
positions[4] = Vec3( -1.6743680e-01, 1.5900000e-05, -6.6149000e-03 );
positions[5] = Vec3( -2.0428260e-01, 8.1071500e-02, 4.1343900e-02 );
positions[6] = Vec3( -6.7308300e-02, 1.2800000e-05, 1.0623300e-02 );
positions[7] = Vec3( -2.0426290e-01, -8.1231400e-02, 4.1033500e-02 );
system.addForce(amoebaWcaDispersionForce);
std::string platformName;
platformName = "Reference";
LangevinIntegrator integrator(0.0, 0.1, 0.01);
Context context(system, integrator, Platform::getPlatformByName( platformName ) );
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
forces = state.getForces();
energy = state.getPotentialEnergy();
}
void compareForcesEnergy( std::string& testName, double expectedEnergy, double energy,
std::vector<Vec3>& expectedForces,
std::vector<Vec3>& forces, double tolerance, FILE* log ) {
#ifdef AMOEBA_DEBUG
if( log ){
(void) fprintf( log, "%s: expected energy=%14.7e %14.7e\n", testName.c_str(), expectedEnergy, energy );
for( unsigned int ii = 0; ii < forces.size(); ii++ ){
(void) fprintf( log, "%6u [%14.7e %14.7e %14.7e] [%14.7e %14.7e %14.7e]\n", ii,
expectedForces[ii][0], expectedForces[ii][1], expectedForces[ii][2], forces[ii][0], forces[ii][1], forces[ii][2] );
}
(void) fflush( log );
}
#endif
for( unsigned int ii = 0; ii < forces.size(); ii++ ){
ASSERT_EQUAL_VEC_MOD( expectedForces[ii], forces[ii], tolerance, testName );
}
ASSERT_EQUAL_TOL_MOD( expectedEnergy, energy, tolerance, testName );
}
// test Wca dispersion
void testWcaDispersionAmmonia( FILE* log ) {
std::string testName = "testWcaDispersionAmmonia";
int numberOfParticles = 8;
std::vector<Vec3> forces;
double energy;
setupAndGetForcesEnergyWcaDispersionAmmonia( forces, energy, log );
std::vector<Vec3> expectedForces(numberOfParticles);
// TINKER-computed values
double expectedEnergy = -2.6981209e+01;
expectedForces[0] = Vec3( 4.7839388e+00, -7.3510133e-04, -5.0382764e-01 );
expectedForces[1] = Vec3( 1.4657758e+00, 1.2431003e+00, -6.7075886e-01 );
expectedForces[2] = Vec3( 1.4563936e+00, -1.2399917e+00, -6.7443841e-01 );
expectedForces[3] = Vec3( 2.1116744e+00, -2.7407512e-03, 1.3271245e+00 );
expectedForces[4] = Vec3( -4.7528440e+00, -1.5148066e-03, 1.2653813e+00 );
expectedForces[5] = Vec3( -1.1875619e+00, -1.2866678e+00, -3.9109060e-01 );
expectedForces[6] = Vec3( -2.6885679e+00, -4.3038639e-04, 3.3763583e-02 );
expectedForces[7] = Vec3( -1.1888087e+00, 1.2889802e+00, -3.8615387e-01 );
double tolerance = 1.0e-04;
compareForcesEnergy( testName, expectedEnergy, energy, expectedForces, forces, tolerance, log );
}
int main( int numberOfArguments, char* argv[] ) {
try {
std::cout << "TestCudaAmoebaWcaDispersionForce running test..." << std::endl;
FILE* log = NULL;
// test Wca dispersion force using two ammonia molecules
testWcaDispersionAmmonia( log );
} catch(const std::exception& e) {
std::cout << "exception: " << e.what() << std::endl;
std::cout << "FAIL - ERROR. Test failed." << std::endl;
return 1;
}
std::cout << "Done" << std::endl;
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment