Commit 776f6f22 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to convert AmoebaMultipoleForce

parent b186314c
...@@ -865,7 +865,7 @@ private: ...@@ -865,7 +865,7 @@ private:
CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) :
CalcAmoebaMultipoleForceKernel(name, platform), cu(cu), system(system), hasInitializedScaleFactors(false), CalcAmoebaMultipoleForceKernel(name, platform), cu(cu), system(system), hasInitializedScaleFactors(false),
multipoleParticles(NULL), torqueBufferIndices(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL),
labFrameDipoles(NULL), labFrameQuadrupoles(NULL), field(NULL), fieldPolar(NULL), dampingAndThole(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL), field(NULL), fieldPolar(NULL), dampingAndThole(NULL),
inducedDipole(NULL), inducedDipolePolar(NULL), currentEpsilon(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL), inducedDipole(NULL), inducedDipolePolar(NULL), currentEpsilon(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
pmeGrid(NULL) { pmeGrid(NULL) {
...@@ -875,8 +875,6 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() { ...@@ -875,8 +875,6 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
cu.setAsCurrent(); cu.setAsCurrent();
if (multipoleParticles != NULL) if (multipoleParticles != NULL)
delete multipoleParticles; delete multipoleParticles;
if (torqueBufferIndices != NULL)
delete torqueBufferIndices;
if (molecularDipoles != NULL) if (molecularDipoles != NULL)
delete molecularDipoles; delete molecularDipoles;
if (molecularQuadrupoles != NULL) if (molecularQuadrupoles != NULL)
...@@ -933,8 +931,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -933,8 +931,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
multipoleParticlesVec.push_back(make_int4(atomX, atomY, atomZ, axisType)); multipoleParticlesVec.push_back(make_int4(atomX, atomY, atomZ, axisType));
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
molecularDipolesVec.push_back((float) dipole[j]); molecularDipolesVec.push_back((float) dipole[j]);
for (int j = 0; j < 9; j++) molecularQuadrupolesVec.push_back((float) quadrupole[0]);
molecularQuadrupolesVec.push_back((float) quadrupole[j]); molecularQuadrupolesVec.push_back((float) quadrupole[1]);
molecularQuadrupolesVec.push_back((float) quadrupole[2]);
molecularQuadrupolesVec.push_back((float) quadrupole[4]);
molecularQuadrupolesVec.push_back((float) quadrupole[5]);
} }
int paddedNumAtoms = cu.getPaddedNumAtoms(); int paddedNumAtoms = cu.getPaddedNumAtoms();
for (int i = numMultipoles; i < paddedNumAtoms; i++) { for (int i = numMultipoles; i < paddedNumAtoms; i++) {
...@@ -943,14 +944,14 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -943,14 +944,14 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
multipoleParticlesVec.push_back(make_int4(0, 0, 0, 0)); multipoleParticlesVec.push_back(make_int4(0, 0, 0, 0));
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
molecularDipolesVec.push_back(0); molecularDipolesVec.push_back(0);
for (int j = 0; j < 9; j++) for (int j = 0; j < 5; j++)
molecularQuadrupolesVec.push_back(0); molecularQuadrupolesVec.push_back(0);
} }
dampingAndThole = CudaArray::create<float2>(cu, paddedNumAtoms, "dampingAndThole"); dampingAndThole = CudaArray::create<float2>(cu, paddedNumAtoms, "dampingAndThole");
polarizability = CudaArray::create<float>(cu, paddedNumAtoms, "polarizability"); polarizability = CudaArray::create<float>(cu, paddedNumAtoms, "polarizability");
multipoleParticles = CudaArray::create<int4>(cu, paddedNumAtoms, "multipoleParticles"); multipoleParticles = CudaArray::create<int4>(cu, paddedNumAtoms, "multipoleParticles");
molecularDipoles = CudaArray::create<float>(cu, 3*paddedNumAtoms, "molecularDipoles"); molecularDipoles = CudaArray::create<float>(cu, 3*paddedNumAtoms, "molecularDipoles");
molecularQuadrupoles = CudaArray::create<float>(cu, 9*paddedNumAtoms, "molecularQuadrupoles"); molecularQuadrupoles = CudaArray::create<float>(cu, 5*paddedNumAtoms, "molecularQuadrupoles");
dampingAndThole->upload(dampingAndTholeVec); dampingAndThole->upload(dampingAndTholeVec);
polarizability->upload(polarizabilityVec); polarizability->upload(polarizabilityVec);
multipoleParticles->upload(multipoleParticlesVec); multipoleParticles->upload(multipoleParticlesVec);
...@@ -962,7 +963,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -962,7 +963,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
labFrameDipoles = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "labFrameDipoles"); labFrameDipoles = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "labFrameDipoles");
labFrameQuadrupoles = new CudaArray(cu, 9*paddedNumAtoms, elementSize, "labFrameQuadrupoles"); labFrameQuadrupoles = new CudaArray(cu, 5*paddedNumAtoms, elementSize, "labFrameQuadrupoles");
field = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "field"); field = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "field");
fieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "fieldPolar"); fieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "fieldPolar");
inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole"); inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole");
...@@ -999,6 +1000,15 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -999,6 +1000,15 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
polarizationFlagValues.push_back(make_int2(i, atoms[j])); polarizationFlagValues.push_back(make_int2(i, atoms[j]));
} }
// Record other options.
if (force.getPolarizationType() == AmoebaMultipoleForce::Mutual) {
maxInducedIterations = force.getMutualInducedMaxIterations();
inducedEpsilon = force.getMutualInducedTargetEpsilon();
}
else
maxInducedIterations == 0;
// Create the kernels. // Create the kernels.
...@@ -1007,13 +1017,29 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1007,13 +1017,29 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
map<string, string> defines; map<string, string> defines;
defines["NUM_ATOMS"] = cu.intToString(numMultipoles); defines["NUM_ATOMS"] = cu.intToString(numMultipoles);
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["SCALING_DISTANCE_CUTOFF"] = cu.doubleToString(50.0);
defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize()); defines["THREAD_BLOCK_SIZE"] = cu.intToString(cu.getNonbondedUtilities().getForceThreadBlockSize());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks()); defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456); // DIVIDE BY INNER DIELECTRIC!!!
if (force.getPolarizationType() == AmoebaMultipoleForce::Direct)
defines["DIRECT_POLARIZATION"] = "";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoles, defines); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoles, defines);
computeMomentsKernel = cu.getKernel(module, "computeLabFrameMoments"); computeMomentsKernel = cu.getKernel(module, "computeLabFrameMoments");
recordInducedDipolesKernel = cu.getKernel(module, "recordInducedDipoles");
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleFixedField, defines); module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleFixedField, defines);
computeFixedFieldKernel = cu.getKernel(module, "computeFixedField"); computeFixedFieldKernel = cu.getKernel(module, "computeFixedField");
stringstream electrostaticsSource;
electrostaticsSource << CudaKernelSources::vectorOps;
electrostaticsSource << CudaAmoebaKernelSources::multipoleElectrostatics;
electrostaticsSource << "#define F1\n";
electrostaticsSource << CudaAmoebaKernelSources::electrostaticPairForce;
electrostaticsSource << "#undef F1\n";
electrostaticsSource << "#define T1\n";
electrostaticsSource << CudaAmoebaKernelSources::electrostaticPairForce;
electrostaticsSource << "#undef T1\n";
electrostaticsSource << "#define T2\n";
electrostaticsSource << CudaAmoebaKernelSources::electrostaticPairForce;
module = cu.createModule(electrostaticsSource.str(), defines);
electrostaticsKernel = cu.getKernel(module, "computeElectrostatics");
// Set up PME. // Set up PME.
...@@ -1389,30 +1415,34 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1389,30 +1415,34 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* computeMomentsArgs[] = {&cu.getPosq().getDevicePointer(), &multipoleParticles->getDevicePointer(), void* computeMomentsArgs[] = {&cu.getPosq().getDevicePointer(), &multipoleParticles->getDevicePointer(),
&molecularDipoles->getDevicePointer(), &molecularQuadrupoles->getDevicePointer(), &molecularDipoles->getDevicePointer(), &molecularQuadrupoles->getDevicePointer(),
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer()}; &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer()};
cu.executeKernel(computeMomentsKernel, computeMomentsArgs, cu.getPaddedNumAtoms()); cu.executeKernel(computeMomentsKernel, computeMomentsArgs, cu.getNumAtoms());
vector<float> d, q;
labFrameDipoles->download(d);
labFrameQuadrupoles->download(q);
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d %g %g %g\n", i, d[3*i], d[3*i+1], d[3*i+2]);
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d %g %g %g %g %g %g %g %g %g\n", i, q[9*i], q[9*i+1], q[9*i+2], q[9*i+3], q[9*i+4], q[9*i+5], q[9*i+6], q[9*i+7], q[9*i+8]);
int startTileIndex = nb.getStartTileIndex(); int startTileIndex = nb.getStartTileIndex();
int numTileIndices = nb.getNumTiles(); int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks(); int numForceThreadBlocks = nb.getNumForceThreadBlocks();
int forceThreadBlockSize = nb.getForceThreadBlockSize(); int forceThreadBlockSize = nb.getForceThreadBlockSize();
if (pmeGrid == NULL) { if (pmeGrid == NULL) {
// Compute induced dipoles.
void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(), void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusions().getDevicePointer(), &nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(), &nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(),
&covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices, &covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices,
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &dampingAndThole->getDevicePointer()}; &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
vector<unsigned long long> f; void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
field->download(f); &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()};
int pad = cu.getPaddedNumAtoms(); cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms());
for (int i = 0; i < cu.getNumAtoms(); i++) { for (int i = 0; i < maxInducedIterations; i++) {
printf("%d %g %g %g\n", i, f[i]/(double)0xFFFFFFFF, f[i+pad]/(double)0xFFFFFFFF, f[i+pad*2]/(double)0xFFFFFFFF);
} }
// Compute electrostatic force.
void* electrostaticsArgs[] = {&cu.getForce().getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&cu.getPosq().getDevicePointer(), &nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(),
&covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices,
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
} }
return 0.0; return 0.0;
} }
...@@ -1598,28 +1628,22 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba ...@@ -1598,28 +1628,22 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
throw OpenMMException("Illegal combining rule for sigma: "+sigmaCombiningRule); throw OpenMMException("Illegal combining rule for sigma: "+sigmaCombiningRule);
string epsilonCombiningRule = force.getEpsilonCombiningRule(); string epsilonCombiningRule = force.getEpsilonCombiningRule();
if (epsilonCombiningRule == "ARITHMETIC") if (epsilonCombiningRule == "ARITHMETIC")
replacements["EPILON_COMBINING_RULE"] = "1"; replacements["EPSILON_COMBINING_RULE"] = "1";
else if (epsilonCombiningRule == "GEOMETRIC") else if (epsilonCombiningRule == "GEOMETRIC")
replacements["EPILON_COMBINING_RULE"] = "2"; replacements["EPSILON_COMBINING_RULE"] = "2";
else if (epsilonCombiningRule == "HARMONIC") else if (epsilonCombiningRule == "HARMONIC")
replacements["EPILON_COMBINING_RULE"] = "3"; replacements["EPSILON_COMBINING_RULE"] = "3";
else if (epsilonCombiningRule == "HHG") else if (epsilonCombiningRule == "HHG")
replacements["EPILON_COMBINING_RULE"] = "4"; replacements["EPSILON_COMBINING_RULE"] = "4";
else else
throw OpenMMException("Illegal combining rule for sigma: "+sigmaCombiningRule); throw OpenMMException("Illegal combining rule for sigma: "+sigmaCombiningRule);
double cutoff = force.getCutoff(); double cutoff = force.getCutoff();
double taperCutoff = cutoff*0.9; double taperCutoff = cutoff*0.9;
replacements["CUTOFF_DISTANCE"] = cu.doubleToString(force.getCutoff()); replacements["CUTOFF_DISTANCE"] = cu.doubleToString(force.getCutoff());
replacements["TAPER_CUTOFF"] = cu.doubleToString(taperCutoff); replacements["TAPER_CUTOFF"] = cu.doubleToString(taperCutoff);
double cutoff2 = cutoff*cutoff; replacements["TAPER_C3"] = cu.doubleToString(10/pow(taperCutoff-cutoff, 3.0));
double taperCutoff2 = taperCutoff*taperCutoff; replacements["TAPER_C4"] = cu.doubleToString(15/pow(taperCutoff-cutoff, 4.0));
double denom = pow(cutoff-taperCutoff, -5.0); replacements["TAPER_C5"] = cu.doubleToString(6/pow(taperCutoff-cutoff, 5.0));
replacements["TAPER_C0"] = cu.doubleToString(cutoff*cutoff2 * (cutoff2-5.0*cutoff*taperCutoff+10.0*taperCutoff2)*denom);
replacements["TAPER_C1"] = cu.doubleToString(-30.0*cutoff2*taperCutoff2*denom);
replacements["TAPER_C2"] = cu.doubleToString(30.0*(cutoff2*taperCutoff+cutoff*taperCutoff2)*denom);
replacements["TAPER_C3"] = cu.doubleToString(-10.0*(cutoff2+4.0*cutoff*taperCutoff+taperCutoff2)*denom);
replacements["TAPER_C4"] = cu.doubleToString(15.0*(cutoff+taperCutoff)*denom);
replacements["TAPER_C5"] = cu.doubleToString(-6.0*denom);
nonbonded->addInteraction(force.getUseNeighborList(), force.getPBC(), true, force.getCutoff(), exclusions, nonbonded->addInteraction(force.getUseNeighborList(), force.getPBC(), true, force.getCutoff(), exclusions,
cu.replaceStrings(CudaAmoebaKernelSources::amoebaVdwForce2, replacements), force.getForceGroup()); cu.replaceStrings(CudaAmoebaKernelSources::amoebaVdwForce2, replacements), force.getForceGroup());
......
...@@ -374,14 +374,14 @@ public: ...@@ -374,14 +374,14 @@ public:
private: private:
class ForceInfo; class ForceInfo;
void initializeScaleFactors(); void initializeScaleFactors();
int numMultipoles; int numMultipoles, maxInducedIterations;
double inducedEpsilon;
bool hasInitializedScaleFactors; bool hasInitializedScaleFactors;
CudaContext& cu; CudaContext& cu;
System& system; System& system;
std::vector<int3> covalentFlagValues; std::vector<int3> covalentFlagValues;
std::vector<int2> polarizationFlagValues; std::vector<int2> polarizationFlagValues;
CudaArray* multipoleParticles; CudaArray* multipoleParticles;
CudaArray* torqueBufferIndices;
CudaArray* molecularDipoles; CudaArray* molecularDipoles;
CudaArray* molecularQuadrupoles; CudaArray* molecularQuadrupoles;
CudaArray* labFrameDipoles; CudaArray* labFrameDipoles;
...@@ -412,7 +412,7 @@ private: ...@@ -412,7 +412,7 @@ private:
CudaArray* pmeAtomGridIndex; CudaArray* pmeAtomGridIndex;
CudaSort* sort; CudaSort* sort;
cufftHandle fft; cufftHandle fft;
CUfunction computeMomentsKernel, computeFixedFieldKernel; CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, electrostaticsKernel;
}; };
/** /**
......
...@@ -15,13 +15,11 @@ ...@@ -15,13 +15,11 @@
real sigma = 2*(sigmaEpsilon1.x*sigma1_2 + sigmaEpsilon2.x*sigma2_2)/(sigma1_2+sigma2_2); real sigma = 2*(sigmaEpsilon1.x*sigma1_2 + sigmaEpsilon2.x*sigma2_2)/(sigma1_2+sigma2_2);
#endif #endif
#if EPSILON_COMBINING_RULE == 1 #if EPSILON_COMBINING_RULE == 1
real epsilon = sigmaEpsilon1.y + sigmaEpsilon2.y; real epsilon = 0.5f*(sigmaEpsilon1.y + sigmaEpsilon2.y);
#elif EPSILON_COMBINING_RULE == 2 #elif EPSILON_COMBINING_RULE == 2
real epsilon = 2*SQRT(sigmaEpsilon1.y*sigmaEpsilon2.y); real epsilon = SQRT(sigmaEpsilon1.y*sigmaEpsilon2.y);
#elif EPSILON_COMBINING_RULE == 3 #elif EPSILON_COMBINING_RULE == 3
real epsilon1_2 = sigmaEpsilon1.x*sigmaEpsilon1.x; real epsilon = 2*(sigmaEpsilon1.y*sigmaEpsilon2.y)/(sigmaEpsilon1.y+sigmaEpsilon2.y);
real epsilon2_2 = sigmaEpsilon2.x*sigmaEpsilon2.x;
real epsilon = 2*(sigmaEpsilon1.x*epsilon1_2 + sigmaEpsilon2.x*epsilon2_2)/(epsilon1_2+epsilon2_2);
#else #else
real epsilon_s = SQRT(sigmaEpsilon1.y) + SQRT(sigmaEpsilon2.y); real epsilon_s = SQRT(sigmaEpsilon1.y) + SQRT(sigmaEpsilon2.y);
real epsilon = 4*sigmaEpsilon1.y*sigmaEpsilon2.y/(epsilon_s*epsilon_s); real epsilon = 4*sigmaEpsilon1.y*sigmaEpsilon2.y/(epsilon_s*epsilon_s);
...@@ -39,13 +37,14 @@ ...@@ -39,13 +37,14 @@
real tmp = sigma7*invRho; real tmp = sigma7*invRho;
real gTau = epsilon*tau7*r6*1.12f*tmp*tmp; real gTau = epsilon*tau7*r6*1.12f*tmp*tmp;
real termEnergy = epsilon*sigma7*tau7*((sigma7*1.12f*invRho)-2.0f); real termEnergy = epsilon*sigma7*tau7*((sigma7*1.12f*invRho)-2.0f);
real deltaE = (-7.0f*(dTau*termEnergy+gTau))*invR; real deltaE = -7.0f*(dTau*termEnergy+gTau);
if (r > TAPER_CUTOFF) { if (r > TAPER_CUTOFF) {
real taper = TAPER_C0+r*(TAPER_C1+r*(TAPER_C2+r*(TAPER_C3+r*(TAPER_C4+r*TAPER_C5)))); real x = r-TAPER_CUTOFF;
real dtaper = TAPER_C1+r*(2*TAPER_C2+r*(3*TAPER_C3+r*(4*TAPER_C4+r*5*TAPER_C5))); real taper = 1+x*x*x*(TAPER_C3+x*(TAPER_C4+x*TAPER_C5));
real dtaper = x*x*(3*TAPER_C3+x*(4*TAPER_C4+x*5*TAPER_C5));
deltaE = termEnergy*dtaper + deltaE*taper; deltaE = termEnergy*dtaper + deltaE*taper;
termEnergy *= taper; termEnergy *= taper;
} }
tempEnergy += (includeInteraction ? termEnergy : 0); tempEnergy += (includeInteraction ? termEnergy : 0);
dEdR -= (includeInteraction ? deltaE : 0); dEdR -= (includeInteraction ? deltaE*invR : 0);
} }
/**
* This defines three different closely related functions, depending on which constant (F1, T1, or T3) is defined.
*/
#if defined F1
__device__ void computeOneInteractionF1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real& energy, real3& outputForce) {
#elif defined T1
__device__ void computeOneInteractionT1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce) {
#else
__device__ void computeOneInteractionT3(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce) {
#endif
#ifdef F1
const float uScale = 1;
real ddsc3_0 = 0;
real ddsc3_1 = 0;
real ddsc3_2 = 0;
real ddsc5_0 = 0;
real ddsc5_1 = 0;
real ddsc5_2 = 0;
real ddsc7_0 = 0;
real ddsc7_1 = 0;
real ddsc7_2 = 0;
#endif
real xr = atom2.posq.x - atom1.posq.x;
real yr = atom2.posq.y - atom1.posq.y;
real zr = atom2.posq.z - atom1.posq.z;
real r2 = xr*xr + yr*yr + zr*zr;
real r = SQRT(r2);
real rr1 = RECIP(r);
real rr2 = rr1*rr1;
real rr3 = rr1*rr2;
real rr5 = 3*rr3*rr2;
real rr7 = 5*rr5*rr2;
real rr9 = 7*rr7*rr2;
#ifdef F1
real rr11 = 9*rr9*rr2;
#endif
real scale3 = 1;
real scale5 = 1;
real scale7 = 1;
real pdamp = atom1.damp*atom2.damp;
if (pdamp != 0) {
real ratio = r/pdamp;
float pGamma = atom2.thole > atom1.thole ? atom1.thole : atom2.thole;
real damp = ratio*ratio*ratio*pGamma;
real dampExp = EXP(-damp);
real damp1 = damp + 1;
real damp2 = damp*damp;
scale3 = 1 - dampExp;
scale5 = 1 - damp1*dampExp;
scale7 = 1 - (damp1 + 0.6f*damp2)*dampExp;
#ifdef F1
real factor = 3*damp*dampExp*rr2;
real factor7 = -0.2f + 0.6f*damp;
ddsc3_0 = factor*xr;
ddsc5_0 = ddsc3_0*damp;
ddsc7_0 = ddsc5_0*factor7;
ddsc3_1 = factor*yr;
ddsc5_1 = ddsc3_1*damp;
ddsc7_1 = ddsc5_1*factor7;
ddsc3_2 = factor*zr;
ddsc5_2 = ddsc3_2*damp;
ddsc7_2 = ddsc5_2*factor7;
#endif
}
#if defined F1
real scale3i = rr3*scale3*uScale;
real scale5i = rr5*scale5*uScale;
#endif
real dsc3 = rr3*scale3*dScale;
real psc3 = rr3*scale3*pScale;
real dsc5 = rr5*scale5*dScale;
real psc5 = rr5*scale5*pScale;
real dsc7 = rr7*scale7*dScale;
real psc7 = rr7*scale7*pScale;
real atom2quadrupoleZZ = 1-atom2.quadrupoleXX-atom2.quadrupoleYY;
real qJr_0 = atom2.quadrupoleXX*xr + atom2.quadrupoleXY*yr + atom2.quadrupoleXZ*zr;
real qJr_1 = atom2.quadrupoleXY*xr + atom2.quadrupoleYY*yr + atom2.quadrupoleYZ*zr;
real qJr_2 = atom2.quadrupoleXZ*xr + atom2.quadrupoleYZ*yr + atom2quadrupoleZZ*zr;
real atom1quadrupoleZZ = 1-atom1.quadrupoleXX-atom1.quadrupoleYY;
real qIr_0 = atom1.quadrupoleXX*xr + atom1.quadrupoleXY*yr + atom1.quadrupoleXZ*zr;
real qIr_1 = atom1.quadrupoleXY*xr + atom1.quadrupoleYY*yr + atom1.quadrupoleYZ*zr;
real qIr_2 = atom1.quadrupoleXZ*xr + atom1.quadrupoleYZ*yr + atom1quadrupoleZZ*zr;
#if defined F1
real sc2 = atom1.dipole.x*atom2.dipole.x + atom1.dipole.y*atom2.dipole.y + atom1.dipole.z*atom2.dipole.z;
#endif
#if defined F1 || defined T1
real sc4 = atom2.dipole.x*xr + atom2.dipole.y*yr + atom2.dipole.z*zr;
real sc6 = qJr_0*xr + qJr_1*yr + qJr_2*zr;
#endif
#if defined F1 || defined T3
real sc3 = atom1.dipole.x*xr + atom1.dipole.y*yr + atom1.dipole.z*zr;
real sc5 = qIr_0*xr + qIr_1*yr + qIr_2*zr;
#endif
#if defined F1
real sc7 = qIr_0*atom2.dipole.x + qIr_1*atom2.dipole.y + qIr_2*atom2.dipole.z;
real sc8 = qJr_0*atom1.dipole.x + qJr_1*atom1.dipole.y + qJr_2*atom1.dipole.z;
real sc9 = qIr_0*qJr_0 + qIr_1*qJr_1 + qIr_2*qJr_2;
real sc10 = atom1.quadrupoleXX*atom2.quadrupoleXX + atom1.quadrupoleXY*atom2.quadrupoleXY + atom1.quadrupoleXZ*atom2.quadrupoleXZ +
atom1.quadrupoleXY*atom2.quadrupoleXY + atom1.quadrupoleYY*atom2.quadrupoleYY + atom1.quadrupoleYZ*atom2.quadrupoleYZ +
atom1.quadrupoleXZ*atom2.quadrupoleXZ + atom1.quadrupoleYZ*atom2.quadrupoleYZ + atom1quadrupoleZZ*atom2quadrupoleZZ;
real sci1 = atom1.inducedDipole.x*atom2.dipole.x + atom1.inducedDipole.y*atom2.dipole.y + atom1.inducedDipole.z*atom2.dipole.z +
atom2.inducedDipole.x*atom1.dipole.x + atom2.inducedDipole.y*atom1.dipole.y + atom2.inducedDipole.z*atom1.dipole.z;
#endif
#if defined F1 || defined T3
real sci3 = atom1.inducedDipole.x*xr + atom1.inducedDipole.y*yr + atom1.inducedDipole.z*zr;
#endif
#if defined F1
real sci7 = qIr_0*atom2.inducedDipole.x + qIr_1*atom2.inducedDipole.y + qIr_2*atom2.inducedDipole.z;
real sci8 = qJr_0*atom1.inducedDipole.x + qJr_1*atom1.inducedDipole.y + qJr_2*atom1.inducedDipole.z;
#endif
#if defined F1 || defined T1
real sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
#endif
#if defined F1
real scip1 = atom1.inducedDipolePolar.x*atom2.dipole.x + atom1.inducedDipolePolar.y*atom2.dipole.y + atom1.inducedDipolePolar.z*atom2.dipole.z +
atom2.inducedDipolePolar.x*atom1.dipole.x + atom2.inducedDipolePolar.y*atom1.dipole.y + atom2.inducedDipolePolar.z*atom1.dipole.z;
real scip2 = atom1.inducedDipole.x*atom2.inducedDipolePolar.x + atom1.inducedDipole.y*atom2.inducedDipolePolar.y + atom1.inducedDipole.z*atom2.inducedDipolePolar.z +
atom2.inducedDipole.x*atom1.inducedDipolePolar.x + atom2.inducedDipole.y*atom1.inducedDipolePolar.y + atom2.inducedDipole.z*atom1.inducedDipolePolar.z;
#endif
#if defined F1 || defined T3
real scip3 = ((atom1.inducedDipolePolar.x)*(xr) + (atom1.inducedDipolePolar.y)*(yr) + (atom1.inducedDipolePolar.z)*(zr));
#endif
#if defined F1 || defined T1
real scip4 = ((atom2.inducedDipolePolar.x)*(xr) + (atom2.inducedDipolePolar.y)*(yr) + (atom2.inducedDipolePolar.z)*(zr));
#endif
#ifdef F1
real scip7 = ((qIr_0)*(atom2.inducedDipolePolar.x) + (qIr_1)*(atom2.inducedDipolePolar.y) + (qIr_2)*(atom2.inducedDipolePolar.z));
real scip8 = ((qJr_0)*(atom1.inducedDipolePolar.x) + (qJr_1)*(atom1.inducedDipolePolar.y) + (qJr_2)*(atom1.inducedDipolePolar.z));
real gli1 = atom2.posq.w*sci3 - atom1.posq.w*sci4;
real gli6 = sci1;
real glip1 = atom2.posq.w*scip3 - atom1.posq.w*scip4;
real glip6 = scip1;
real gli2 = -sc3*sci4 - sci3*sc4;
real gli3 = sci3*sc6 - sci4*sc5;
real gli7 = 2*(sci7-sci8);
real glip2 = -sc3*scip4 - scip3*sc4;
real glip3 = scip3*sc6 - scip4*sc5;
real glip7 = 2*(scip7-scip8);
real factor3 = rr3*((gli1 + gli6)*pScale + (glip1 + glip6)*dScale);
real factor5 = rr5*((gli2 + gli7)*pScale + (glip2 + glip7)*dScale);
real factor7 = rr7*(gli3*pScale + glip3*dScale);
real ftm2i_0 = -0.5f*(factor3*ddsc3_0 + factor5*ddsc5_0 + factor7*ddsc7_0);
real ftm2i_1 = -0.5f*(factor3*ddsc3_1 + factor5*ddsc5_1 + factor7*ddsc7_1);
real ftm2i_2 = -0.5f*(factor3*ddsc3_2 + factor5*ddsc5_2 + factor7*ddsc7_2);
real gl0 = atom1.posq.w*atom2.posq.w;
real gl1 = atom2.posq.w*sc3 - atom1.posq.w*sc4;
real gl2 = atom1.posq.w*sc6 + atom2.posq.w*sc5 - sc3*sc4;
real gl3 = sc3*sc6 - sc4*sc5;
real gl4 = sc5*sc6;
real gl6 = sc2;
real gl7 = 2*(sc7-sc8);
real gl8 = 2*sc10;
real gl5 = -4*sc9;
real gf1 = rr3*gl0 + rr5*(gl1+gl6) + rr7*(gl2+gl7+gl8) + rr9*(gl3+gl5) + rr11*gl4;
#endif
#if defined F1 || defined T1
real gf2 = -atom2.posq.w*rr3 + sc4*rr5 - sc6*rr7;
real gf5 = 2*(-atom2.posq.w*rr5+sc4*rr7-sc6*rr9);
#endif
#if defined F1 || defined T3
real gf3 = atom1.posq.w*rr3 + sc3*rr5 + sc5*rr7;
real gf6 = 2*(-atom1.posq.w*rr5-sc3*rr7-sc5*rr9);
#endif
#ifdef F1
real em = mScale*(rr1*gl0 + rr3*(gl1+gl6) + rr5*(gl2+gl7+gl8) + rr7*(gl3+gl5) + rr9*gl4);
real ei = 0.5f*((gli1+gli6)*psc3 + (gli2+gli7)*psc5 + gli3*psc7);
energy = em+ei;
#endif
#if defined F1 || defined T1
real qIdJ_0 = atom1.quadrupoleXX*atom2.dipole.x + atom1.quadrupoleXY*atom2.dipole.y + atom1.quadrupoleXZ*atom2.dipole.z;
real qIdJ_1 = atom1.quadrupoleXY*atom2.dipole.x + atom1.quadrupoleYY*atom2.dipole.y + atom1.quadrupoleYZ*atom2.dipole.z;
real qIdJ_2 = atom1.quadrupoleXZ*atom2.dipole.x + atom1.quadrupoleYZ*atom2.dipole.y + atom1quadrupoleZZ*atom2.dipole.z;
real qIqJr_0 = atom1.quadrupoleXX*qJr_0 + atom1.quadrupoleXY*qJr_1 + atom1.quadrupoleXZ*qJr_2;
real qIqJr_1 = atom1.quadrupoleXY*qJr_0 + atom1.quadrupoleYY*qJr_1 + atom1.quadrupoleYZ*qJr_2;
real qIqJr_2 = atom1.quadrupoleXZ*qJr_0 + atom1.quadrupoleYZ*qJr_1 + atom1quadrupoleZZ*qJr_2;
#endif
#ifdef F1
real qkqir_0 = atom2.quadrupoleXX*qIr_0 + atom2.quadrupoleXY*qIr_1 + atom2.quadrupoleXZ*qIr_2;
real qkqir_1 = atom2.quadrupoleXY*qIr_0 + atom2.quadrupoleYY*qIr_1 + atom2.quadrupoleYZ*qIr_2;
real qkqir_2 = atom2.quadrupoleXZ*qIr_0 + atom2.quadrupoleYZ*qIr_1 + atom2quadrupoleZZ*qIr_2;
real qkdi_0 = atom2.quadrupoleXX*atom1.dipole.x + atom2.quadrupoleXY*atom1.dipole.y + atom2.quadrupoleXZ*atom1.dipole.z;
real qkdi_1 = atom2.quadrupoleXY*atom1.dipole.x + atom2.quadrupoleYY*atom1.dipole.y + atom2.quadrupoleYZ*atom1.dipole.z;
real qkdi_2 = atom2.quadrupoleXZ*atom1.dipole.x + atom2.quadrupoleYZ*atom1.dipole.y + atom2quadrupoleZZ*atom1.dipole.z;
real ftm2_0 = mScale*(gf1*xr + gf2*atom1.dipole.x + gf3*atom2.dipole.x + 2*rr5*(qkdi_0 - qIdJ_0) + gf5*qIr_0 + gf6*qJr_0 + 4*rr7*(qIqJr_0 + qkqir_0));
real ftm2_1 = mScale*(gf1*yr + gf2*atom1.dipole.y + gf3*atom2.dipole.y + 2*rr5*(qkdi_1 - qIdJ_1) + gf5*qIr_1 + gf6*qJr_1 + 4*rr7*(qIqJr_1 + qkqir_1));
real ftm2_2 = mScale*(gf1*zr + gf2*atom1.dipole.z + gf3*atom2.dipole.z + 2*rr5*(qkdi_2 - qIdJ_2) + gf5*qIr_2 + gf6*qJr_2 + 4*rr7*(qIqJr_2 + qkqir_2));
real gfi1 = rr2*(1.5f*((gli1+gli6)*psc3 + (glip1+glip6)*dsc3 + scip2*scale3i) + 2.5f*((gli7+gli2)*psc5 + (glip7+glip2)*dsc5 - (sci3*scip4+scip3*sci4)*scale5i) + 3.5f*(gli3*psc7+glip3*dsc7));
ftm2i_0 += gfi1*xr;
ftm2i_1 += gfi1*yr;
ftm2i_2 += gfi1*zr;
#endif
#if defined F1 || defined T1
real gfi5 = (sci4*psc7 + scip4*dsc7);
#endif
#if defined F1 || defined T3
real gfi6 = -(sci3*psc7 + scip3*dsc7);
#endif
#if defined F1 || defined T1
real qIuJ_0 = atom1.quadrupoleXX*atom2.inducedDipole.x + atom1.quadrupoleXY*atom2.inducedDipole.y + atom1.quadrupoleXZ*atom2.inducedDipole.z;
real qIuJ_1 = atom1.quadrupoleXY*atom2.inducedDipole.x + atom1.quadrupoleYY*atom2.inducedDipole.y + atom1.quadrupoleYZ*atom2.inducedDipole.z;
real qIuJ_2 = atom1.quadrupoleXZ*atom2.inducedDipole.x + atom1.quadrupoleYZ*atom2.inducedDipole.y + atom1quadrupoleZZ*atom2.inducedDipole.z;
real qIuJp_0 = atom1.quadrupoleXX*atom2.inducedDipolePolar.x + atom1.quadrupoleXY*atom2.inducedDipolePolar.y + atom1.quadrupoleXZ*atom2.inducedDipolePolar.z;
real qIuJp_1 = atom1.quadrupoleXY*atom2.inducedDipolePolar.x + atom1.quadrupoleYY*atom2.inducedDipolePolar.y + atom1.quadrupoleYZ*atom2.inducedDipolePolar.z;
real qIuJp_2 = atom1.quadrupoleXZ*atom2.inducedDipolePolar.x + atom1.quadrupoleYZ*atom2.inducedDipolePolar.y + atom1quadrupoleZZ*atom2.inducedDipolePolar.z;
#endif
#if defined T3
real qJuIp_0 = atom2.quadrupoleXX*atom1.inducedDipolePolar.x + atom2.quadrupoleXY*atom1.inducedDipolePolar.y + atom2.quadrupoleXZ*atom1.inducedDipolePolar.z;
real qJuIp_1 = atom2.quadrupoleXY*atom1.inducedDipolePolar.x + atom2.quadrupoleYY*atom1.inducedDipolePolar.y + atom2.quadrupoleYZ*atom1.inducedDipolePolar.z;
real qJuIp_2 = atom2.quadrupoleXZ*atom1.inducedDipolePolar.x + atom2.quadrupoleYZ*atom1.inducedDipolePolar.y + atom2quadrupoleZZ*atom1.inducedDipolePolar.z;
real qJuI_0 = atom2.quadrupoleXX*atom1.inducedDipole.x + atom2.quadrupoleXY*atom1.inducedDipole.y + atom2.quadrupoleXZ*atom1.inducedDipole.z;
real qJuI_1 = atom2.quadrupoleXY*atom1.inducedDipole.x + atom2.quadrupoleYY*atom1.inducedDipole.y + atom2.quadrupoleYZ*atom1.inducedDipole.z;
real qJuI_2 = atom2.quadrupoleXZ*atom1.inducedDipole.x + atom2.quadrupoleYZ*atom1.inducedDipole.y + atom2quadrupoleZZ*atom1.inducedDipole.z;
#endif
#ifdef F1
real qkui_0 = atom2.quadrupoleXX*atom1.inducedDipole.x + atom2.quadrupoleXY*atom1.inducedDipole.y + atom2.quadrupoleXZ*atom1.inducedDipole.z;
real qkui_1 = atom2.quadrupoleXY*atom1.inducedDipole.x + atom2.quadrupoleYY*atom1.inducedDipole.y + atom2.quadrupoleYZ*atom1.inducedDipole.z;
real qkui_2 = atom2.quadrupoleXZ*atom1.inducedDipole.x + atom2.quadrupoleYZ*atom1.inducedDipole.y + atom2quadrupoleZZ*atom1.inducedDipole.z;
real qkuip_0 = atom2.quadrupoleXX*atom1.inducedDipolePolar.x + atom2.quadrupoleXY*atom1.inducedDipolePolar.y + atom2.quadrupoleXZ*atom1.inducedDipolePolar.z;
real qkuip_1 = atom2.quadrupoleXY*atom1.inducedDipolePolar.x + atom2.quadrupoleYY*atom1.inducedDipolePolar.y + atom2.quadrupoleYZ*atom1.inducedDipolePolar.z;
real qkuip_2 = atom2.quadrupoleXZ*atom1.inducedDipolePolar.x + atom2.quadrupoleYZ*atom1.inducedDipolePolar.y + atom2quadrupoleZZ*atom1.inducedDipolePolar.z;
ftm2i_0 += 0.5f*(-atom2.posq.w*(atom1.inducedDipole.x*psc3 + atom1.inducedDipolePolar.x*dsc3) +
sc4*(atom1.inducedDipole.x*psc5 + atom1.inducedDipolePolar.x*dsc5) -
sc6*(atom1.inducedDipole.x*psc7 + atom1.inducedDipolePolar.x*dsc7)) +
0.5f*(atom1.posq.w*(atom2.inducedDipole.x*psc3+atom2.inducedDipolePolar.x*dsc3) +
sc3*(atom2.inducedDipole.x*psc5 +atom2.inducedDipolePolar.x*dsc5) +
sc5*(atom2.inducedDipole.x*psc7 +atom2.inducedDipolePolar.x*dsc7)) +
scale5i*(sci4*atom1.inducedDipolePolar.x+scip4*atom1.inducedDipole.x +
sci3*atom2.inducedDipolePolar.x+scip3*atom2.inducedDipole.x)*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atom1.dipole.x +
0.5f*(sci3*psc5+scip3*dsc5)*atom2.dipole.x +
((qkui_0-qIuJ_0)*psc5 + (qkuip_0-qIuJp_0)*dsc5) +
gfi5*qIr_0 + gfi6*qJr_0;
ftm2i_1 += 0.5f*(-atom2.posq.w*(atom1.inducedDipole.y*psc3 + atom1.inducedDipolePolar.y*dsc3) +
sc4*(atom1.inducedDipole.y*psc5 + atom1.inducedDipolePolar.y*dsc5) -
sc6*(atom1.inducedDipole.y*psc7 + atom1.inducedDipolePolar.y*dsc7)) +
(atom1.posq.w*(atom2.inducedDipole.y*psc3+atom2.inducedDipolePolar.y*dsc3) +
sc3*(atom2.inducedDipole.y*psc5+atom2.inducedDipolePolar.y*dsc5) +
sc5*(atom2.inducedDipole.y*psc7+atom2.inducedDipolePolar.y*dsc7))*0.5f +
scale5i*(sci4*atom1.inducedDipolePolar.y+scip4*atom1.inducedDipole.y + sci3*atom2.inducedDipolePolar.y+scip3*atom2.inducedDipole.y)*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atom1.dipole.y +
0.5f*(sci3*psc5+scip3*dsc5)*atom2.dipole.y +
((qkui_1-qIuJ_1)*psc5 + (qkuip_1-qIuJp_1)*dsc5) +
gfi5*qIr_1 + gfi6*qJr_1;
ftm2i_2 += 0.5f*(-atom2.posq.w*(atom1.inducedDipole.z*psc3 + atom1.inducedDipolePolar.z*dsc3) +
sc4*(atom1.inducedDipole.z*psc5 + atom1.inducedDipolePolar.z*dsc5) -
sc6*(atom1.inducedDipole.z*psc7 + atom1.inducedDipolePolar.z*dsc7)) +
(atom1.posq.w*(atom2.inducedDipole.z*psc3+atom2.inducedDipolePolar.z*dsc3) +
sc3*(atom2.inducedDipole.z*psc5+atom2.inducedDipolePolar.z*dsc5) +
sc5*(atom2.inducedDipole.z*psc7+atom2.inducedDipolePolar.z*dsc7))*0.5f +
scale5i*(sci4*atom1.inducedDipolePolar.z+scip4*atom1.inducedDipole.z +
sci3*atom2.inducedDipolePolar.z+scip3*atom2.inducedDipole.z)*0.5f +
0.5f*(sci4*psc5+scip4*dsc5)*atom1.dipole.z +
0.5f*(sci3*psc5+scip3*dsc5)*atom2.dipole.z +
((qkui_2-qIuJ_2)*psc5 + (qkuip_2-qIuJp_2)*dsc5) +
gfi5*qIr_2 + gfi6*qJr_2;
#ifdef DIRECT_POLARIZATION
real gfd = 0.5*(3*rr2*scip2*scale3i - 5*rr2*(scip3*sci4+sci3*scip4)*scale5i);
real temp5 = 0.5*scale5i;
real fdir_0 = gfd*xr + temp5*(sci4*atom1.inducedDipolePolar.x + scip4*atom1.inducedDipole.x + sci3*atom2.inducedDipolePolar.x + scip3*atom2.inducedDipole.x);
real fdir_1 = gfd*yr + temp5*(sci4*atom1.inducedDipolePolar.y + scip4*atom1.inducedDipole.y + sci3*atom2.inducedDipolePolar.y + scip3*atom2.inducedDipole.y);
real fdir_2 = gfd*zr + temp5*(sci4*atom1.inducedDipolePolar.z + scip4*atom1.inducedDipole.z + sci3*atom2.inducedDipolePolar.z + scip3*atom2.inducedDipole.z);
ftm2i_0 -= fdir_0;
ftm2i_1 -= fdir_1;
ftm2i_2 -= fdir_2;
#else
real scaleF = 0.5f*uScale;
real inducedFactor3 = scip2*rr3*scaleF;
real inducedFactor5 = (sci3*scip4+scip3*sci4)*rr5*scaleF;
real findmp_0 = inducedFactor3*ddsc3_0 - inducedFactor5*ddsc5_0;
real findmp_1 = inducedFactor3*ddsc3_1 - inducedFactor5*ddsc5_1;
real findmp_2 = inducedFactor3*ddsc3_2 - inducedFactor5*ddsc5_2;
ftm2i_0 -= findmp_0;
ftm2i_1 -= findmp_1;
ftm2i_2 -= findmp_2;
#endif
#endif
#if defined T1
real gti2 = 0.5f*(sci4*psc5+scip4*dsc5);
real gti5 = gfi5;
#endif
#if defined T3
real gti3 = 0.5f*(sci3*psc5+scip3*dsc5);
real gti6 = gfi6;
#endif
#if defined T1 || defined T3
real dixdk_0 = atom1.dipole.y*atom2.dipole.z - atom1.dipole.z*atom2.dipole.y;
real dixdk_1 = atom1.dipole.z*atom2.dipole.x - atom1.dipole.x*atom2.dipole.z;
real dixdk_2 = atom1.dipole.x*atom2.dipole.y - atom1.dipole.y*atom2.dipole.x;
#if defined T1
real dixuk_0 = atom1.dipole.y*atom2.inducedDipole.z - atom1.dipole.z*atom2.inducedDipole.y;
real dixuk_1 = atom1.dipole.z*atom2.inducedDipole.x - atom1.dipole.x*atom2.inducedDipole.z;
real dixuk_2 = atom1.dipole.x*atom2.inducedDipole.y - atom1.dipole.y*atom2.inducedDipole.x;
#endif
#endif
#ifdef T1
real dixukp_0 = atom1.dipole.y*atom2.inducedDipolePolar.z - atom1.dipole.z*atom2.inducedDipolePolar.y;
real dixukp_1 = atom1.dipole.z*atom2.inducedDipolePolar.x - atom1.dipole.x*atom2.inducedDipolePolar.z;
real dixukp_2 = atom1.dipole.x*atom2.inducedDipolePolar.y - atom1.dipole.y*atom2.inducedDipolePolar.x;
#endif
#ifdef T1
real dixr_0 = atom1.dipole.y*zr - atom1.dipole.z*yr;
real dixr_1 = atom1.dipole.z*xr - atom1.dipole.x*zr;
real dixr_2 = atom1.dipole.x*yr - atom1.dipole.y*xr;
#endif
#ifdef T1
real rxqiukp_0 = yr*qIuJp_2 - zr*qIuJp_1;
real rxqiukp_1 = zr*qIuJp_0 - xr*qIuJp_2;
real rxqiukp_2 = xr*qIuJp_1 - yr*qIuJp_0;
real rxqir_0 = yr*qIr_2 - zr*qIr_1;
real rxqir_1 = zr*qIr_0 - xr*qIr_2;
real rxqir_2 = xr*qIr_1 - yr*qIr_0;
real rxqiuk_0 = yr*qIuJ_2 - zr*qIuJ_1;
real rxqiuk_1 = zr*qIuJ_0 - xr*qIuJ_2;
real rxqiuk_2 = xr*qIuJ_1 - yr*qIuJ_0;
real ukxqir_0 = atom2.inducedDipole.y*qIr_2 - atom2.inducedDipole.z*qIr_1;
real ukxqir_1 = atom2.inducedDipole.z*qIr_0 - atom2.inducedDipole.x*qIr_2;
real ukxqir_2 = atom2.inducedDipole.x*qIr_1 - atom2.inducedDipole.y*qIr_0;
real ukxqirp_0 = atom2.inducedDipolePolar.y*qIr_2 - atom2.inducedDipolePolar.z*qIr_1;
real ukxqirp_1 = atom2.inducedDipolePolar.z*qIr_0 - atom2.inducedDipolePolar.x*qIr_2;
real ukxqirp_2 = atom2.inducedDipolePolar.x*qIr_1 - atom2.inducedDipolePolar.y*qIr_0;
real dixqkr_0 = atom1.dipole.y*qJr_2 - atom1.dipole.z*qJr_1;
real dixqkr_1 = atom1.dipole.z*qJr_0 - atom1.dipole.x*qJr_2;
real dixqkr_2 = atom1.dipole.x*qJr_1 - atom1.dipole.y*qJr_0;
real dkxqir_0 = atom2.dipole.y*qIr_2 - atom2.dipole.z*qIr_1;
real dkxqir_1 = atom2.dipole.z*qIr_0 - atom2.dipole.x*qIr_2;
real dkxqir_2 = atom2.dipole.x*qIr_1 - atom2.dipole.y*qIr_0;
real rxqikr_0 = yr*qIqJr_2 - zr*qIqJr_1;
real rxqikr_1 = zr*qIqJr_0 - xr*qIqJr_2;
real rxqikr_2 = xr*qIqJr_1 - yr*qIqJr_0;
real rxqidk_0 = yr*qIdJ_2 - zr*qIdJ_1;
real rxqidk_1 = zr*qIdJ_0 - xr*qIdJ_2;
real rxqidk_2 = xr*qIdJ_1 - yr*qIdJ_0;
real qkrxqir_0 = qJr_1*qIr_2 - qJr_2*qIr_1;
real qkrxqir_1 = qJr_2*qIr_0 - qJr_0*qIr_2;
real qkrxqir_2 = qJr_0*qIr_1 - qJr_1*qIr_0;
#endif
#if defined T1 || defined T3
real qixqk_0 = atom1.quadrupoleXY*atom2.quadrupoleXZ + atom1.quadrupoleYY*atom2.quadrupoleYZ + atom1.quadrupoleYZ*atom2quadrupoleZZ -
atom1.quadrupoleXZ*atom2.quadrupoleXY - atom1.quadrupoleYZ*atom2.quadrupoleYY - atom1quadrupoleZZ*atom2.quadrupoleYZ;
real qixqk_1 = atom1.quadrupoleXZ*atom2.quadrupoleXX + atom1.quadrupoleYZ*atom2.quadrupoleXY + atom1quadrupoleZZ*atom2.quadrupoleXZ -
atom1.quadrupoleXX*atom2.quadrupoleXZ - atom1.quadrupoleXY*atom2.quadrupoleYZ - atom1.quadrupoleXZ*atom2quadrupoleZZ;
real qixqk_2 = atom1.quadrupoleXX*atom2.quadrupoleXY + atom1.quadrupoleXY*atom2.quadrupoleYY + atom1.quadrupoleXZ*atom2.quadrupoleYZ -
atom1.quadrupoleXY*atom2.quadrupoleXX - atom1.quadrupoleYY*atom2.quadrupoleXY - atom1.quadrupoleYZ*atom2.quadrupoleXZ;
#endif
#ifdef T1
real ttm2_0 = -rr3*dixdk_0 + gf2*dixr_0-gf5*rxqir_0 + 2*rr5*(dixqkr_0 + dkxqir_0 + rxqidk_0-2*qixqk_0) - 4*rr7*(rxqikr_0 + qkrxqir_0);
real ttm2_1 = -rr3*dixdk_1 + gf2*dixr_1-gf5*rxqir_1 + 2*rr5*(dixqkr_1 + dkxqir_1 + rxqidk_1-2*qixqk_1) - 4*rr7*(rxqikr_1 + qkrxqir_1);
real ttm2_2 = -rr3*dixdk_2 + gf2*dixr_2-gf5*rxqir_2 + 2*rr5*(dixqkr_2 + dkxqir_2 + rxqidk_2-2*qixqk_2) - 4*rr7*(rxqikr_2 + qkrxqir_2);
real ttm2i_0 = -(dixuk_0*psc3+dixukp_0*dsc3)*0.5f + gti2*dixr_0 + ((ukxqir_0+ rxqiuk_0)*psc5 + (ukxqirp_0 + rxqiukp_0)*dsc5) - gti5*rxqir_0;
real ttm2i_1 = -(dixuk_1*psc3+dixukp_1*dsc3)*0.5f + gti2*dixr_1 + ((ukxqir_1+ rxqiuk_1)*psc5 + (ukxqirp_1 + rxqiukp_1)*dsc5) - gti5*rxqir_1;
real ttm2i_2 = -(dixuk_2*psc3+dixukp_2*dsc3)*0.5f + gti2*dixr_2 + ((ukxqir_2+ rxqiuk_2)*psc5 + (ukxqirp_2 + rxqiukp_2)*dsc5) - gti5*rxqir_2;
#endif
#ifdef T3
real qJqIr_0 = atom2.quadrupoleXX*qIr_0 + atom2.quadrupoleXY*qIr_1 + atom2.quadrupoleXZ*qIr_2;
real qJqIr_1 = atom2.quadrupoleXY*qIr_0 + atom2.quadrupoleYY*qIr_1 + atom2.quadrupoleYZ*qIr_2;
real qJqIr_2 = atom2.quadrupoleXZ*qIr_0 + atom2.quadrupoleYZ*qIr_1 + atom2quadrupoleZZ*qIr_2;
real qJdI_0 = atom2.quadrupoleXX*atom1.dipole.x + atom2.quadrupoleXY*atom1.dipole.y + atom2.quadrupoleXZ*atom1.dipole.z;
real qJdI_1 = atom2.quadrupoleXY*atom1.dipole.x + atom2.quadrupoleYY*atom1.dipole.y + atom2.quadrupoleYZ*atom1.dipole.z;
real qJdI_2 = atom2.quadrupoleXZ*atom1.dipole.x + atom2.quadrupoleYZ*atom1.dipole.y + atom2quadrupoleZZ*atom1.dipole.z;
real dkxr_0 = atom2.dipole.y*zr - atom2.dipole.z*yr;
real dkxr_1 = atom2.dipole.z*xr - atom2.dipole.x*zr;
real dkxr_2 = atom2.dipole.x*yr - atom2.dipole.y*xr;
real rxqkr_0 = yr*qJr_2 - zr*qJr_1;
real rxqkr_1 = zr*qJr_0 - xr*qJr_2;
real rxqkr_2 = xr*qJr_1 - yr*qJr_0;
real dixqkr_0 = atom1.dipole.y*qJr_2 - atom1.dipole.z*qJr_1;
real dixqkr_1 = atom1.dipole.z*qJr_0 - atom1.dipole.x*qJr_2;
real dixqkr_2 = atom1.dipole.x*qJr_1 - atom1.dipole.y*qJr_0;
real dkxqir_0 = atom2.dipole.y*qIr_2 - atom2.dipole.z*qIr_1;
real dkxqir_1 = atom2.dipole.z*qIr_0 - atom2.dipole.x*qIr_2;
real dkxqir_2 = atom2.dipole.x*qIr_1 - atom2.dipole.y*qIr_0;
real rxqkdi_0 = yr*qJdI_2 - zr*qJdI_1;
real rxqkdi_1 = zr*qJdI_0 - xr*qJdI_2;
real rxqkdi_2 = xr*qJdI_1 - yr*qJdI_0;
real rxqkir_0 = yr*qJqIr_2 - zr*qJqIr_1;
real rxqkir_1 = zr*qJqIr_0 - xr*qJqIr_2;
real rxqkir_2 = xr*qJqIr_1 - yr*qJqIr_0;
real qkrxqir_0 = qJr_1*qIr_2 - qJr_2*qIr_1;
real qkrxqir_1 = qJr_2*qIr_0 - qJr_0*qIr_2;
real qkrxqir_2 = qJr_0*qIr_1 - qJr_1*qIr_0;
real dkxui_0 = atom2.dipole.y*atom1.inducedDipole.z - atom2.dipole.z*atom1.inducedDipole.y;
real dkxui_1 = atom2.dipole.z*atom1.inducedDipole.x - atom2.dipole.x*atom1.inducedDipole.z;
real dkxui_2 = atom2.dipole.x*atom1.inducedDipole.y - atom2.dipole.y*atom1.inducedDipole.x;
real dkxuip_0 = atom2.dipole.y*atom1.inducedDipolePolar.z - atom2.dipole.z*atom1.inducedDipolePolar.y;
real dkxuip_1 = atom2.dipole.z*atom1.inducedDipolePolar.x - atom2.dipole.x*atom1.inducedDipolePolar.z;
real dkxuip_2 = atom2.dipole.x*atom1.inducedDipolePolar.y - atom2.dipole.y*atom1.inducedDipolePolar.x;
real uixqkrp_0 = atom1.inducedDipolePolar.y*qJr_2 - atom1.inducedDipolePolar.z*qJr_1;
real uixqkrp_1 = atom1.inducedDipolePolar.z*qJr_0 - atom1.inducedDipolePolar.x*qJr_2;
real uixqkrp_2 = atom1.inducedDipolePolar.x*qJr_1 - atom1.inducedDipolePolar.y*qJr_0;
real uixqkr_0 = atom1.inducedDipole.y*qJr_2 - atom1.inducedDipole.z*qJr_1;
real uixqkr_1 = atom1.inducedDipole.z*qJr_0 - atom1.inducedDipole.x*qJr_2;
real uixqkr_2 = atom1.inducedDipole.x*qJr_1 - atom1.inducedDipole.y*qJr_0;
real rxqkuip_0 = yr*qJuIp_2 - zr*qJuIp_1;
real rxqkuip_1 = zr*qJuIp_0 - xr*qJuIp_2;
real rxqkuip_2 = xr*qJuIp_1 - yr*qJuIp_0;
real rxqkui_0 = yr*qJuI_2 - zr*qJuI_1;
real rxqkui_1 = zr*qJuI_0 - xr*qJuI_2;
real rxqkui_2 = xr*qJuI_1 - yr*qJuI_0;
real ttm3_0 = rr3*dixdk_0 + gf3*dkxr_0 - gf6*rxqkr_0 - 2*rr5*(dixqkr_0 + dkxqir_0 + rxqkdi_0 - 2*qixqk_0) - 4*rr7*(rxqkir_0 - qkrxqir_0);
real ttm3_1 = rr3*dixdk_1 + gf3*dkxr_1 - gf6*rxqkr_1 - 2*rr5*(dixqkr_1 + dkxqir_1 + rxqkdi_1 - 2*qixqk_1) - 4*rr7*(rxqkir_1 - qkrxqir_1);
real ttm3_2 = rr3*dixdk_2 + gf3*dkxr_2 - gf6*rxqkr_2 - 2*rr5*(dixqkr_2 + dkxqir_2 + rxqkdi_2 - 2*qixqk_2) - 4*rr7*(rxqkir_2 - qkrxqir_2);
real ttm3i_0 = -(dkxui_0*psc3+ dkxuip_0*dsc3)*0.5f + gti3*dkxr_0 - ((uixqkr_0 + rxqkui_0)*psc5 + (uixqkrp_0 + rxqkuip_0)*dsc5) - gti6*rxqkr_0;
real ttm3i_1 = -(dkxui_1*psc3+ dkxuip_1*dsc3)*0.5f + gti3*dkxr_1 - ((uixqkr_1 + rxqkui_1)*psc5 + (uixqkrp_1 + rxqkuip_1)*dsc5) - gti6*rxqkr_1;
real ttm3i_2 = -(dkxui_2*psc3+ dkxuip_2*dsc3)*0.5f + gti3*dkxr_2 - ((uixqkr_2 + rxqkui_2)*psc5 + (uixqkrp_2 + rxqkuip_2)*dsc5) - gti6*rxqkr_2;
#endif
if (mScale < 1) {
#ifdef T1
ttm2_0 *= mScale;
ttm2_1 *= mScale;
ttm2_2 *= mScale;
#endif
#ifdef T3
ttm3_0 *= mScale;
ttm3_1 *= mScale;
ttm3_2 *= mScale;
#endif
}
#ifdef F1
outputForce.x = -(ftm2_0+ftm2i_0);
outputForce.y = -(ftm2_1+ftm2i_1);
outputForce.z = -(ftm2_2+ftm2i_2);
#endif
#ifdef T1
outputForce.x = (ttm2_0 + ttm2i_0);
outputForce.y = (ttm2_1 + ttm2i_1);
outputForce.z = (ttm2_2 + ttm2i_2);
#endif
#ifdef T3
outputForce.x = (ttm3_0 + ttm3i_0);
outputForce.y = (ttm3_1 + ttm3i_1);
outputForce.z = (ttm3_2 + ttm3i_2);
#endif
}
#define TILE_SIZE 32
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef struct {
real4 posq;
real3 force, dipole, inducedDipole, inducedDipolePolar;
real quadrupoleXX, quadrupoleXY, quadrupoleXZ;
real quadrupoleYY, quadrupoleYZ;
float thole, damp;
} AtomData;
__device__ void computeOneInteractionF1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real& energy, real3& outputForce);
__device__ void computeOneInteractionT1(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce);
__device__ void computeOneInteractionT3(AtomData& atom1, volatile AtomData& atom2, float dScale, float pScale, float mScale, real3& outputForce);
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
data.posq = posq[atom];
data.dipole.x = labFrameDipole[atom*3];
data.dipole.y = labFrameDipole[atom*3+1];
data.dipole.z = labFrameDipole[atom*3+2];
data.quadrupoleXX = labFrameQuadrupole[atom*5];
data.quadrupoleXY = labFrameQuadrupole[atom*5+1];
data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
float2 temp = dampingAndThole[atom];
data.damp = temp.x;
data.thole = temp.y;
}
__device__ real computeDScaleFactor(unsigned int polarizationGroup) {
return (polarizationGroup & 1 ? 0 : 1);
}
__device__ float computeMScaleFactor(uint2 covalent) {
bool x = (covalent.x & 1);
bool y = (covalent.y & 1);
return (x ? (y ? 0.0f : 0.4f) : (y ? 0.8f : 1.0f));
}
__device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGroup) {
bool x = (covalent.x & 1);
bool y = (covalent.y & 1);
bool p = (polarizationGroup & 1);
return (x && y ? 0.0f : (x && p ? 0.5f : 1.0f));
}
/**
* Compute electrostatic interactions.
*/
extern "C" __global__ void computeElectrostatics(
unsigned long long* __restrict__ forceBuffers, real* __restrict__ energyBuffer,
const real4* __restrict__ posq, const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices,
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags,
#endif
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
#ifdef USE_CUTOFF
const unsigned int numTiles = interactionCount[0];
unsigned int pos = (numTiles > maxTiles ? startTileIndex+warp*numTileIndices/totalWarps : warp*numTiles/totalWarps);
unsigned int end = (numTiles > maxTiles ? startTileIndex+(warp+1)*numTileIndices/totalWarps : (warp+1)*numTiles/totalWarps);
#else
const unsigned int numTiles = numTileIndices;
unsigned int pos = startTileIndex+warp*numTiles/totalWarps;
unsigned int end = startTileIndex+(warp+1)*numTiles/totalWarps;
#endif
real energy = 0;
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
__shared__ unsigned int exclusionRange[2*WARPS_PER_GROUP];
__shared__ int exclusionIndex[WARPS_PER_GROUP];
#ifndef ENABLE_SHUFFLE
__shared__ real tempBuffer[3*THREAD_BLOCK_SIZE];
#endif
do {
// Extract the coordinates of this tile
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
const unsigned int tbx = threadIdx.x - tgx;
const unsigned int localGroupIndex = threadIdx.x/TILE_SIZE;
unsigned int x, y;
AtomData data;
if (pos < end) {
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
ushort2 tileIndices = tiles[pos];
x = tileIndices.x;
y = tileIndices.y;
}
else
#endif
{
y = (unsigned int) floor(NUM_BLOCKS+0.5f-SQRT((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*pos));
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y || x >= NUM_BLOCKS) { // Occasionally happens due to roundoff error.
y += (x < y ? -1 : 1);
x = (pos-y*NUM_BLOCKS+y*(y+1)/2);
}
}
unsigned int atom1 = x*TILE_SIZE + tgx;
loadAtomData(data, atom1, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
data.force = make_real3(0);
// Locate the exclusion data for this tile.
if (tgx < 2)
exclusionRange[2*localGroupIndex+tgx] = exclusionRowIndices[x+tgx];
if (tgx == 0)
exclusionIndex[localGroupIndex] = -1;
for (unsigned int i = exclusionRange[2*localGroupIndex]+tgx; i < exclusionRange[2*localGroupIndex+1]; i += TILE_SIZE)
if (exclusionIndices[i] == y)
exclusionIndex[localGroupIndex] = i*TILE_SIZE;
bool hasExclusions = (exclusionIndex[localGroupIndex] > -1);
if (pos >= end)
; // This warp is done.
else if (x == y) {
// This tile is on the diagonal.
localData[threadIdx.x].posq = data.posq;
localData[threadIdx.x].dipole = data.dipole;
localData[threadIdx.x].quadrupoleXX = data.quadrupoleXX;
localData[threadIdx.x].quadrupoleXY = data.quadrupoleXY;
localData[threadIdx.x].quadrupoleXZ = data.quadrupoleXZ;
localData[threadIdx.x].quadrupoleYY = data.quadrupoleYY;
localData[threadIdx.x].quadrupoleYZ = data.quadrupoleYZ;
localData[threadIdx.x].inducedDipole = data.inducedDipole;
localData[threadIdx.x].inducedDipolePolar = data.inducedDipolePolar;
localData[threadIdx.x].thole = data.thole; // IS THIS CORRECT?
localData[threadIdx.x].damp = data.damp; // IS THIS CORRECT?
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
// Compute forces.
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
float m = computeMScaleFactor(covalent);
computeOneInteractionF1(data, localData[tbx+j], d, p, m, tempEnergy, tempForce);
data.force += tempForce;
energy += 0.5f*tempEnergy;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
}
data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
// Compute torques.
data.force = make_real3(0);
covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
float m = computeMScaleFactor(covalent);
computeOneInteractionT1(data, localData[tbx+j], d, p, m, tempForce);
data.force += tempForce;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
}
data.force *= ENERGY_SCALE_FACTOR;
atomicAdd(&forceBuffers[atom1], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
}
else {
// This is an off-diagonal tile.
unsigned int j = y*TILE_SIZE + tgx;
loadAtomData(localData[threadIdx.x], j, posq, labFrameDipole, labFrameQuadrupole, inducedDipole, inducedDipolePolar, dampingAndThole);
localData[threadIdx.x].force = make_real3(0);
#ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (!hasExclusions && flags != 0xFFFFFFFF) {
if (flags == 0) {
// No interactions in this tile.
}
else {
// Compute only a subset of the interactions in this tile.
for (j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) {
int atom2 = tbx+j;
int bufferIndex = 3*threadIdx.x;
real3 dEdR1 = make_real3(0);
real3 dEdR2 = make_real3(0);
real3 delta = make_real3(localData[atom2].posq.x-data.posq.x, localData[atom2].posq.y-data.posq.y, localData[atom2].posq.z-data.posq.z);
#ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real r = RECIP(invR);
LOAD_ATOM2_PARAMETERS
atom2 = y*TILE_SIZE+j;
COMPUTE_INTERACTION
#ifdef USE_CUTOFF
}
#endif
#ifdef ENABLE_SHUFFLE
force.x -= dEdR1.x;
force.y -= dEdR1.y;
force.z -= dEdR1.z;
for (int i = 16; i >= 1; i /= 2) {
dEdR2.x += __shfl_xor(dEdR2.x, i, 32);
dEdR2.y += __shfl_xor(dEdR2.y, i, 32);
dEdR2.z += __shfl_xor(dEdR2.z, i, 32);
}
if (tgx == 0) {
localData[tbx+j].fx += dEdR2.x;
localData[tbx+j].fy += dEdR2.y;
localData[tbx+j].fz += dEdR2.z;
}
#else
force.x -= dEdR1.x;
force.y -= dEdR1.y;
force.z -= dEdR1.z;
tempBuffer[bufferIndex] = dEdR2.x;
tempBuffer[bufferIndex+1] = dEdR2.y;
tempBuffer[bufferIndex+2] = dEdR2.z;
// Sum the forces on atom2.
if (tgx % 4 == 0) {
tempBuffer[bufferIndex] += tempBuffer[bufferIndex+3]+tempBuffer[bufferIndex+6]+tempBuffer[bufferIndex+9];
tempBuffer[bufferIndex+1] += tempBuffer[bufferIndex+4]+tempBuffer[bufferIndex+7]+tempBuffer[bufferIndex+10];
tempBuffer[bufferIndex+2] += tempBuffer[bufferIndex+5]+tempBuffer[bufferIndex+8]+tempBuffer[bufferIndex+11];
}
if (tgx == 0) {
localData[tbx+j].fx += tempBuffer[bufferIndex]+tempBuffer[bufferIndex+12]+tempBuffer[bufferIndex+24]+tempBuffer[bufferIndex+36]+tempBuffer[bufferIndex+48]+tempBuffer[bufferIndex+60]+tempBuffer[bufferIndex+72]+tempBuffer[bufferIndex+84];
localData[tbx+j].fy += tempBuffer[bufferIndex+1]+tempBuffer[bufferIndex+13]+tempBuffer[bufferIndex+25]+tempBuffer[bufferIndex+37]+tempBuffer[bufferIndex+49]+tempBuffer[bufferIndex+61]+tempBuffer[bufferIndex+73]+tempBuffer[bufferIndex+85];
localData[tbx+j].fz += tempBuffer[bufferIndex+2]+tempBuffer[bufferIndex+14]+tempBuffer[bufferIndex+26]+tempBuffer[bufferIndex+38]+tempBuffer[bufferIndex+50]+tempBuffer[bufferIndex+62]+tempBuffer[bufferIndex+74]+tempBuffer[bufferIndex+86];
}
#endif
}
}
}
}
else
#endif
{
// Compute the full set of interactions in this tile.
uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx));
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
// Compute forces.
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
real tempEnergy;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
float m = computeMScaleFactor(covalent);
computeOneInteractionF1(data, localData[tbx+tj], d, p, m, tempEnergy, tempForce);
data.force += tempForce;
localData[tbx+tj].force -= tempForce;
energy += tempEnergy;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
// Compute torques.
covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx));
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
for (j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+tj;
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 tempForce;
float d = computeDScaleFactor(polarizationGroup);
float p = computePScaleFactor(covalent, polarizationGroup);
float m = computeMScaleFactor(covalent);
computeOneInteractionT1(data, localData[tbx+tj], d, p, m, tempForce);
data.force += tempForce;
computeOneInteractionT3(data, localData[tbx+tj], d, p, m, tempForce);
localData[tbx+tj].force += tempForce;
}
covalent.x >>= 1;
covalent.y >>= 1;
polarizationGroup >>= 1;
tj = (tj + 1) & (TILE_SIZE - 1);
}
data.force *= ENERGY_SCALE_FACTOR;
localData[threadIdx.x].force *= ENERGY_SCALE_FACTOR;
if (pos < end) {
unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (data.force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
offset = y*TILE_SIZE + tgx;
atomicAdd(&forceBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.x*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.y*0xFFFFFFFF)));
atomicAdd(&forceBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
}
}
}
}
pos++;
} while (pos < end);
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
...@@ -14,12 +14,12 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res ...@@ -14,12 +14,12 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data.dipole.x = labFrameDipole[atom*3]; data.dipole.x = labFrameDipole[atom*3];
data.dipole.y = labFrameDipole[atom*3+1]; data.dipole.y = labFrameDipole[atom*3+1];
data.dipole.z = labFrameDipole[atom*3+2]; data.dipole.z = labFrameDipole[atom*3+2];
data.quadrupoleXX = labFrameQuadrupole[atom*9]; data.quadrupoleXX = labFrameQuadrupole[atom*5];
data.quadrupoleXY = labFrameQuadrupole[atom*9+1]; data.quadrupoleXY = labFrameQuadrupole[atom*5+1];
data.quadrupoleXZ = labFrameQuadrupole[atom*9+2]; data.quadrupoleXZ = labFrameQuadrupole[atom*5+2];
data.quadrupoleYY = labFrameQuadrupole[atom*9+4]; data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*9+5]; data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.quadrupoleZZ = labFrameQuadrupole[atom*9+8]; data.quadrupoleZZ = 1-data.quadrupoleXX-data.quadrupoleYY;
float2 temp = dampingAndThole[atom]; float2 temp = dampingAndThole[atom];
data.damp = temp.x; data.damp = temp.x;
data.thole = temp.y; data.thole = temp.y;
...@@ -38,7 +38,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -38,7 +38,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
float damp = atom1.damp*atom2.damp; float damp = atom1.damp*atom2.damp;
real dampExp; real dampExp;
if (damp != 0 && r < SCALING_DISTANCE_CUTOFF) { if (damp != 0) {
// get scaling factors // get scaling factors
...@@ -83,7 +83,7 @@ __device__ real computeDScaleFactor(unsigned int polarizationGroup) { ...@@ -83,7 +83,7 @@ __device__ real computeDScaleFactor(unsigned int polarizationGroup) {
return (polarizationGroup & 1 ? 0 : 1); return (polarizationGroup & 1 ? 0 : 1);
} }
__device__ float computeDScaleFactor(uint2 covalent) { __device__ float computeMScaleFactor(uint2 covalent) {
// int f1 = (value == 0 || value == 1 ? 1 : 0); // int f1 = (value == 0 || value == 1 ? 1 : 0);
// int f2 = (value == 0 || value == 2 ? 1 : 0); // int f2 = (value == 0 || value == 2 ? 1 : 0);
// 0 = 12 or 13: x and y: 0 // 0 = 12 or 13: x and y: 0
...@@ -106,7 +106,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr ...@@ -106,7 +106,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
*/ */
extern "C" __global__ void computeFixedField( extern "C" __global__ void computeFixedField(
unsigned long long* __restrict__ fieldBuffers, unsigned long long* __restrict__ fieldPolarBuffers, const real4* __restrict__ posq, unsigned long long* __restrict__ fieldBuffers, unsigned long long* __restrict__ fieldPolarBuffers, const real4* __restrict__ posq,
const unsigned int* __restrict__ exclusions, const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices, const unsigned int* __restrict__ exclusionIndices, const unsigned int* __restrict__ exclusionRowIndices,
const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, unsigned int startTileIndex, unsigned int numTileIndices, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags, const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags,
...@@ -185,11 +185,9 @@ extern "C" __global__ void computeFixedField( ...@@ -185,11 +185,9 @@ extern "C" __global__ void computeFixedField(
localData[localAtomIndex].quadrupoleZZ = data.quadrupoleZZ; localData[localAtomIndex].quadrupoleZZ = data.quadrupoleZZ;
localData[localAtomIndex].thole = data.thole; // IS THIS CORRECT? localData[localAtomIndex].thole = data.thole; // IS THIS CORRECT?
localData[localAtomIndex].damp = data.damp; // IS THIS CORRECT? localData[localAtomIndex].damp = data.damp; // IS THIS CORRECT?
unsigned int excl = exclusions[exclusionIndex[localGroupIndex]+tgx];
uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx]; uint2 covalent = covalentFlags[exclusionIndex[localGroupIndex]+tgx];
unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx]; unsigned int polarizationGroup = polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx];
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
bool isExcluded = !(excl & 0x1);
int atom2 = tbx+j; int atom2 = tbx+j;
real3 delta = make_real3(localData[atom2].posq.x-data.posq.x, localData[atom2].posq.y-data.posq.y, localData[atom2].posq.z-data.posq.z); real3 delta = make_real3(localData[atom2].posq.x-data.posq.x, localData[atom2].posq.y-data.posq.y, localData[atom2].posq.z-data.posq.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
...@@ -200,13 +198,13 @@ extern "C" __global__ void computeFixedField( ...@@ -200,13 +198,13 @@ extern "C" __global__ void computeFixedField(
real3 field1; real3 field1;
real3 field2; real3 field2;
computeOneInteraction(data, localData[atom2], delta, field1, field2); computeOneInteraction(data, localData[atom2], delta, field1, field2);
if (!isExcluded) { atom2 = y*TILE_SIZE+j;
float d = computeDScaleFactor(covalent); if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
float d = computeDScaleFactor(polarizationGroup);
data.field += d*field1; data.field += d*field1;
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup);
data.fieldPolar += p*field1; data.fieldPolar += p*field1;
} }
excl >>= 1;
covalent.x >>= 1; covalent.x >>= 1;
covalent.y >>= 1; covalent.y >>= 1;
polarizationGroup >>= 1; polarizationGroup >>= 1;
...@@ -231,7 +229,6 @@ extern "C" __global__ void computeFixedField( ...@@ -231,7 +229,6 @@ extern "C" __global__ void computeFixedField(
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) { if ((flags&(1<<j)) != 0) {
bool isExcluded = false;
int atom2 = tbx+j; int atom2 = tbx+j;
int bufferIndex = 3*threadIdx.x; int bufferIndex = 3*threadIdx.x;
real3 dEdR1 = make_real3(0); real3 dEdR1 = make_real3(0);
...@@ -298,16 +295,13 @@ extern "C" __global__ void computeFixedField( ...@@ -298,16 +295,13 @@ extern "C" __global__ void computeFixedField(
{ {
// Compute the full set of interactions in this tile. // Compute the full set of interactions in this tile.
unsigned int excl = (hasExclusions ? exclusions[exclusionIndex[localGroupIndex]+tgx] : 0xFFFFFFFF);
uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0)); uint2 covalent = (hasExclusions ? covalentFlags[exclusionIndex[localGroupIndex]+tgx] : make_uint2(0, 0));
unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0); unsigned int polarizationGroup = (hasExclusions ? polarizationGroupFlags[exclusionIndex[localGroupIndex]+tgx] : 0);
excl = (excl >> tgx) | (excl << (TILE_SIZE - tgx));
covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx)); covalent.x = (covalent.x >> tgx) | (covalent.x << (TILE_SIZE - tgx));
covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx)); covalent.y = (covalent.y >> tgx) | (covalent.y << (TILE_SIZE - tgx));
polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx)); polarizationGroup = (polarizationGroup >> tgx) | (polarizationGroup << (TILE_SIZE - tgx));
unsigned int tj = tgx; unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) { for (j = 0; j < TILE_SIZE; j++) {
bool isExcluded = !(excl & 0x1);
int atom2 = tbx+tj; int atom2 = tbx+tj;
real3 delta = make_real3(localData[atom2].posq.x-data.posq.x, localData[atom2].posq.y-data.posq.y, localData[atom2].posq.z-data.posq.z); real3 delta = make_real3(localData[atom2].posq.x-data.posq.x, localData[atom2].posq.y-data.posq.y, localData[atom2].posq.z-data.posq.z);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
...@@ -318,15 +312,14 @@ extern "C" __global__ void computeFixedField( ...@@ -318,15 +312,14 @@ extern "C" __global__ void computeFixedField(
real3 field1; real3 field1;
real3 field2; real3 field2;
computeOneInteraction(data, localData[atom2], delta, field1, field2); computeOneInteraction(data, localData[atom2], delta, field1, field2);
if (!isExcluded) { if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
float d = computeDScaleFactor(covalent); float d = computeDScaleFactor(polarizationGroup);
data.field += d*field1; data.field += d*field1;
localData[atom2].field += d*field2; localData[atom2].field += d*field2;
float p = computePScaleFactor(covalent, polarizationGroup); float p = computePScaleFactor(covalent, polarizationGroup);
data.fieldPolar += p*field1; data.fieldPolar += p*field1;
localData[atom2].fieldPolar += p*field2; localData[atom2].fieldPolar += p*field2;
} }
excl >>= 1;
covalent.x >>= 1; covalent.x >>= 1;
covalent.y >>= 1; covalent.y >>= 1;
polarizationGroup >>= 1; polarizationGroup >>= 1;
......
...@@ -8,10 +8,10 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4 ...@@ -8,10 +8,10 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
// code common to ZThenX and Bisector // code common to ZThenX and Bisector
for (int particleIndex = blockIdx.x*blockDim.x+threadIdx.x; particleIndex < NUM_ATOMS; particleIndex += gridDim.x*blockDim.x) { for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
int4 particles = multipoleParticles[particleIndex]; int4 particles = multipoleParticles[atom];
if (particles.x >= 0 && particles.z >= 0) { if (particles.x >= 0 && particles.z >= 0) {
real4 thisParticlePos = posq[particleIndex]; real4 thisParticlePos = posq[atom];
real4 posZ = posq[particles.z]; real4 posZ = posq[particles.z];
real3 vectorZ = make_real3(posZ.x-thisParticlePos.x, posZ.y-thisParticlePos.y, posZ.z-thisParticlePos.z); real3 vectorZ = make_real3(posZ.x-thisParticlePos.x, posZ.y-thisParticlePos.y, posZ.z-thisParticlePos.z);
real4 posX = posq[particles.x]; real4 posX = posq[particles.x];
...@@ -149,7 +149,7 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4 ...@@ -149,7 +149,7 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
// Transform the dipole // Transform the dipole
unsigned int offset = 3*particleIndex; unsigned int offset = 3*atom;
real molDipole[3]; real molDipole[3];
molDipole[0] = molecularDipoles[offset]; molDipole[0] = molecularDipoles[offset];
molDipole[1] = molecularDipoles[offset+1]; molDipole[1] = molecularDipoles[offset+1];
...@@ -164,55 +164,47 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4 ...@@ -164,55 +164,47 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
// Transform the quadrupole // Transform the quadrupole
real mPole[3][3]; offset = 5*atom;
offset = 9*particleIndex; real mPoleXX = molecularQuadrupoles[offset];
mPole[0][0] = molecularQuadrupoles[offset]; real mPoleXY = molecularQuadrupoles[offset+1];
mPole[0][1] = molecularQuadrupoles[offset+1]; real mPoleXZ = molecularQuadrupoles[offset+2];
mPole[0][2] = molecularQuadrupoles[offset+2]; real mPoleYY = molecularQuadrupoles[offset+3];
real mPoleYZ = molecularQuadrupoles[offset+4];
mPole[1][0] = molecularQuadrupoles[offset+3]; real mPoleZZ = 1-mPoleXX-mPoleYY;
mPole[1][1] = molecularQuadrupoles[offset+4];
mPole[1][2] = molecularQuadrupoles[offset+5];
mPole[2][0] = molecularQuadrupoles[offset+6];
mPole[2][1] = molecularQuadrupoles[offset+7];
mPole[2][2] = molecularQuadrupoles[offset+8];
if (reverse) { if (reverse) {
mPole[0][1] *= -1; mPoleXY *= -1;
mPole[1][0] *= -1; mPoleYZ *= -1;
mPole[1][2] *= -1;
mPole[2][1] *= -1;
} }
labFrameQuadrupoles[offset+8] = vectorX.z*(vectorX.z*mPole[0][0] + vectorY.z*mPole[0][1] + vectorZ.z*mPole[0][2]); labFrameQuadrupoles[offset] = vectorX.x*(vectorX.x*mPoleXX + vectorY.x*mPoleXY + vectorZ.x*mPoleXZ);
labFrameQuadrupoles[offset+8] += vectorY.z*(vectorX.z*mPole[1][0] + vectorY.z*mPole[1][1] + vectorZ.z*mPole[1][2]); + vectorY.x*(vectorX.x*mPoleXY + vectorY.x*mPoleYY + vectorZ.x*mPoleYZ);
labFrameQuadrupoles[offset+8] += vectorZ.z*(vectorX.z*mPole[2][0] + vectorY.z*mPole[2][1] + vectorZ.z*mPole[2][2]); + vectorZ.x*(vectorX.x*mPoleXZ + vectorY.x*mPoleYZ + vectorZ.x*mPoleZZ);
labFrameQuadrupoles[offset+1] = vectorX.x*(vectorX.y*mPoleXX + vectorY.y*mPoleXY + vectorZ.y*mPoleXZ);
labFrameQuadrupoles[offset+4] = vectorX.y*(vectorX.y*mPole[0][0] + vectorY.y*mPole[0][1] + vectorZ.y*mPole[0][2]); + vectorY.x*(vectorX.y*mPoleXY + vectorY.y*mPoleYY + vectorZ.y*mPoleYZ);
labFrameQuadrupoles[offset+4] += vectorY.y*(vectorX.y*mPole[1][0] + vectorY.y*mPole[1][1] + vectorZ.y*mPole[1][2]); + vectorZ.x*(vectorX.y*mPoleXZ + vectorY.y*mPoleYZ + vectorZ.y*mPoleZZ);
labFrameQuadrupoles[offset+4] += vectorZ.y*(vectorX.y*mPole[2][0] + vectorY.y*mPole[2][1] + vectorZ.y*mPole[2][2]); labFrameQuadrupoles[offset+2] = vectorX.x*(vectorX.z*mPoleXX + vectorY.z*mPoleXY + vectorZ.z*mPoleXZ);
+ vectorY.x*(vectorX.z*mPoleXY + vectorY.z*mPoleYY + vectorZ.z*mPoleYZ);
labFrameQuadrupoles[offset+5] = vectorX.y*(vectorX.z*mPole[0][0] + vectorY.z*mPole[0][1] + vectorZ.z*mPole[0][2]); + vectorZ.x*(vectorX.z*mPoleXZ + vectorY.z*mPoleYZ + vectorZ.z*mPoleZZ);
labFrameQuadrupoles[offset+5] += vectorY.y*(vectorX.z*mPole[1][0] + vectorY.z*mPole[1][1] + vectorZ.z*mPole[1][2]); labFrameQuadrupoles[offset+3] = vectorX.y*(vectorX.y*mPoleXX + vectorY.y*mPoleXY + vectorZ.y*mPoleXZ);
labFrameQuadrupoles[offset+5] += vectorZ.y*(vectorX.z*mPole[2][0] + vectorY.z*mPole[2][1] + vectorZ.z*mPole[2][2]); + vectorY.y*(vectorX.y*mPoleXY + vectorY.y*mPoleYY + vectorZ.y*mPoleYZ);
+ vectorZ.y*(vectorX.y*mPoleXZ + vectorY.y*mPoleYZ + vectorZ.y*mPoleZZ);
labFrameQuadrupoles[offset] = vectorX.x*(vectorX.x*mPole[0][0] + vectorY.x*mPole[0][1] + vectorZ.x*mPole[0][2]); labFrameQuadrupoles[offset+4] = vectorX.y*(vectorX.z*mPoleXX + vectorY.z*mPoleXY + vectorZ.z*mPoleXZ);
labFrameQuadrupoles[offset] += vectorY.x*(vectorX.x*mPole[1][0] + vectorY.x*mPole[1][1] + vectorZ.x*mPole[1][2]); + vectorY.y*(vectorX.z*mPoleXY + vectorY.z*mPoleYY + vectorZ.z*mPoleYZ);
labFrameQuadrupoles[offset] += vectorZ.x*(vectorX.x*mPole[2][0] + vectorY.x*mPole[2][1] + vectorZ.x*mPole[2][2]); + vectorZ.y*(vectorX.z*mPoleXZ + vectorY.z*mPoleYZ + vectorZ.z*mPoleZZ);
labFrameQuadrupoles[offset+1] = vectorX.x*(vectorX.y*mPole[0][0] + vectorY.y*mPole[0][1] + vectorZ.y*mPole[0][2]);
labFrameQuadrupoles[offset+1] += vectorY.x*(vectorX.y*mPole[1][0] + vectorY.y*mPole[1][1] + vectorZ.y*mPole[1][2]);
labFrameQuadrupoles[offset+1] += vectorZ.x*(vectorX.y*mPole[2][0] + vectorY.y*mPole[2][1] + vectorZ.y*mPole[2][2]);
labFrameQuadrupoles[offset+2] = vectorX.x*(vectorX.z*mPole[0][0] + vectorY.z*mPole[0][1] + vectorZ.z*mPole[0][2]);
labFrameQuadrupoles[offset+2] += vectorY.x*(vectorX.z*mPole[1][0] + vectorY.z*mPole[1][1] + vectorZ.z*mPole[1][2]);
labFrameQuadrupoles[offset+2] += vectorZ.x*(vectorX.z*mPole[2][0] + vectorY.z*mPole[2][1] + vectorZ.z*mPole[2][2]);
labFrameQuadrupoles[offset+3] = labFrameQuadrupoles[offset+1];
labFrameQuadrupoles[offset+6] = labFrameQuadrupoles[offset+2];
labFrameQuadrupoles[offset+7] = labFrameQuadrupoles[offset+5];
} }
} }
} }
extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fieldBuffers, const long long* __restrict__ fieldPolarBuffers,
real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
real scale = polarizability[atom]/(real) 0xFFFFFFFF;
inducedDipole[3*atom] = scale*fieldBuffers[atom];
inducedDipole[3*atom+1] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS];
inducedDipole[3*atom+2] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS*2];
inducedDipolePolar[3*atom] = scale*fieldPolarBuffers[atom];
inducedDipolePolar[3*atom+1] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS];
inducedDipolePolar[3*atom+2] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2];
}
}
\ No newline at end of file
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for * * Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. * * Medical Research, grant U54 GM072970. See https://simtk.org. *
* * * *
* Portions copyright (c) 2008 Stanford University and the Authors. * * Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Mark Friedrichs * * Authors: Mark Friedrichs *
* Contributors: * * Contributors: *
* * * *
...@@ -42,6 +42,13 @@ ...@@ -42,6 +42,13 @@
#include "openmm/LangevinIntegrator.h" #include "openmm/LangevinIntegrator.h"
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#include <stdlib.h>
#include <stdio.h>
#define ASSERT_EQUAL_TOL_MOD(expected, found, tol, testname) {double _scale_ = std::abs(expected) > 1.0 ? std::abs(expected) : 1.0; if (!(std::abs((expected)-(found))/_scale_ <= (tol))) {std::stringstream details; details << testname << " Expected "<<(expected)<<", found "<<(found); throwException(__FILE__, __LINE__, details.str());}};
#define ASSERT_EQUAL_VEC_MOD(expected, found, tol,testname) {ASSERT_EQUAL_TOL_MOD((expected)[0], (found)[0], (tol),(testname)); ASSERT_EQUAL_TOL_MOD((expected)[1], (found)[1], (tol),(testname)); ASSERT_EQUAL_TOL_MOD((expected)[2], (found)[2], (tol),(testname));};
using namespace OpenMM; using namespace OpenMM;
const double TOL = 1e-4; const double TOL = 1e-4;
...@@ -56,22 +63,20 @@ void testVdw( FILE* log ) { ...@@ -56,22 +63,20 @@ void testVdw( FILE* log ) {
std::string epsilonCombiningRule = std::string("HHG"); std::string epsilonCombiningRule = std::string("HHG");
amoebaVdwForce->setEpsilonCombiningRule( epsilonCombiningRule ); amoebaVdwForce->setEpsilonCombiningRule( epsilonCombiningRule );
int classIndex = 0;
for( int ii = 0; ii < numberOfParticles; ii++ ){ for( int ii = 0; ii < numberOfParticles; ii++ ){
int indexIV, indexClass; int indexIV;
double mass, sigma, epsilon, reduction; double mass, sigma, epsilon, reduction;
std::vector< int > exclusions; std::vector< int > exclusions;
if( ii == 0 || ii == 3 ){ if( ii == 0 || ii == 3 ){
mass = 16.0; mass = 16.0;
indexIV = ii; indexIV = ii;
indexClass = 70;
sigma = 1.70250E+00; sigma = 1.70250E+00;
epsilon = 1.10000E-01; epsilon = 1.10000E-01;
reduction = 0.0; reduction = 0.0;
} else { } else {
mass = 1.0; mass = 1.0;
indexIV = ii < 3 ? 0 : 3; indexIV = ii < 3 ? 0 : 3;
indexClass = 71;
sigma = 1.32750E+00; sigma = 1.32750E+00;
epsilon = 1.35000E-02; epsilon = 1.35000E-02;
reduction = 0.91; reduction = 0.91;
...@@ -89,7 +94,7 @@ void testVdw( FILE* log ) { ...@@ -89,7 +94,7 @@ void testVdw( FILE* log ) {
exclusions.push_back ( 5 ); exclusions.push_back ( 5 );
} }
system.addParticle(mass); system.addParticle(mass);
amoebaVdwForce->addParticle( indexIV, indexClass, sigma, epsilon, reduction ); amoebaVdwForce->addParticle( indexIV, classIndex, sigma, epsilon, reduction );
amoebaVdwForce->setParticleExclusions( ii, exclusions ); amoebaVdwForce->setParticleExclusions( ii, exclusions );
} }
LangevinIntegrator integrator(0.0, 0.1, 0.01); LangevinIntegrator integrator(0.0, 0.1, 0.01);
...@@ -130,12 +135,13 @@ void testVdw( FILE* log ) { ...@@ -130,12 +135,13 @@ void testVdw( FILE* log ) {
positions[ii][2] *= AngstromToNm; positions[ii][2] *= AngstromToNm;
} }
for( int ii = 0; ii < amoebaVdwForce->getNumParticles(); ii++ ){ for( int ii = 0; ii < amoebaVdwForce->getNumParticles(); ii++ ){
int indexIV, indexClass; int indexIV;
int classIndex;
double sigma, epsilon, reduction; double sigma, epsilon, reduction;
amoebaVdwForce->getParticleParameters( ii, indexIV, indexClass, sigma, epsilon, reduction ); amoebaVdwForce->getParticleParameters( ii, indexIV, classIndex, sigma, epsilon, reduction );
sigma *= AngstromToNm; sigma *= AngstromToNm;
epsilon *= CalToJoule; epsilon *= CalToJoule;
amoebaVdwForce->setParticleParameters( ii, indexIV, indexClass, sigma, epsilon, reduction ); amoebaVdwForce->setParticleParameters( ii, indexIV, classIndex, sigma, epsilon, reduction );
} }
platformName = "CUDA"; platformName = "CUDA";
Context context(system, integrator, Platform::getPlatformByName( platformName ) ); Context context(system, integrator, Platform::getPlatformByName( platformName ) );
...@@ -170,6 +176,342 @@ void testVdw( FILE* log ) { ...@@ -170,6 +176,342 @@ void testVdw( FILE* log ) {
ASSERT_EQUAL_TOL( expectedEnergy, state.getPotentialEnergy(), tolerance ); ASSERT_EQUAL_TOL( expectedEnergy, state.getPotentialEnergy(), tolerance );
} }
void setupAndGetForcesEnergyVdwAmmonia( const std::string& sigmaCombiningRule, const std::string& epsilonCombiningRule, double cutoff,
double boxDimension, std::vector<Vec3>& forces, double& energy, FILE* log ){
// beginning of Vdw setup
System system;
AmoebaVdwForce* amoebaVdwForce = new AmoebaVdwForce();;
int numberOfParticles = 8;
amoebaVdwForce->setSigmaCombiningRule( sigmaCombiningRule );
amoebaVdwForce->setEpsilonCombiningRule( epsilonCombiningRule );
amoebaVdwForce->setUseNeighborList( 1 );
amoebaVdwForce->setCutoff( cutoff );
if( boxDimension > 0.0 ){
Vec3 a( boxDimension, 0.0, 0.0 );
Vec3 b( 0.0, boxDimension, 0.0 );
Vec3 c( 0.0, 0.0, boxDimension );
system.setDefaultPeriodicBoxVectors( a, b, c );
amoebaVdwForce->setPBC( 1 );
} else {
amoebaVdwForce->setPBC( 0 );
}
// addParticle: ivIndex, radius, epsilon, reductionFactor
int classIndex = 0;
system.addParticle( 1.4007000e+01 );
amoebaVdwForce->addParticle( 0, classIndex, 1.8550000e-01, 4.3932000e-01, 0.0000000e+00 );
system.addParticle( 1.0080000e+00 );
amoebaVdwForce->addParticle( 0, classIndex, 1.3500000e-01, 8.3680000e-02, 9.1000000e-01 );
system.addParticle( 1.0080000e+00 );
amoebaVdwForce->addParticle( 0, classIndex, 1.3500000e-01, 8.3680000e-02, 9.1000000e-01 );
system.addParticle( 1.0080000e+00 );
amoebaVdwForce->addParticle( 0, classIndex, 1.3500000e-01, 8.3680000e-02, 9.1000000e-01 );
system.addParticle( 1.4007000e+01 );
amoebaVdwForce->addParticle( 4, classIndex, 1.8550000e-01, 4.3932000e-01, 0.0000000e+00 );
system.addParticle( 1.0080000e+00 );
amoebaVdwForce->addParticle( 4, classIndex, 1.3500000e-01, 8.3680000e-02, 9.1000000e-01 );
system.addParticle( 1.0080000e+00 );
amoebaVdwForce->addParticle( 4, classIndex, 1.3500000e-01, 8.3680000e-02, 9.1000000e-01 );
system.addParticle( 1.0080000e+00 );
amoebaVdwForce->addParticle( 4, classIndex, 1.3500000e-01, 8.3680000e-02, 9.1000000e-01 );
// ParticleExclusions
std::vector< int > exclusions;
exclusions.resize(0);
exclusions.push_back( 0 );
exclusions.push_back( 1 );
exclusions.push_back( 2 );
exclusions.push_back( 3 );
amoebaVdwForce->setParticleExclusions( 0, exclusions );
exclusions.resize(0);
exclusions.push_back( 1 );
exclusions.push_back( 0 );
exclusions.push_back( 2 );
exclusions.push_back( 3 );
amoebaVdwForce->setParticleExclusions( 1, exclusions );
exclusions.resize(0);
exclusions.push_back( 2 );
exclusions.push_back( 0 );
exclusions.push_back( 1 );
exclusions.push_back( 3 );
amoebaVdwForce->setParticleExclusions( 2, exclusions );
exclusions.resize(0);
exclusions.push_back( 3 );
exclusions.push_back( 0 );
exclusions.push_back( 1 );
exclusions.push_back( 2 );
amoebaVdwForce->setParticleExclusions( 3, exclusions );
exclusions.resize(0);
exclusions.push_back( 4 );
exclusions.push_back( 5 );
exclusions.push_back( 6 );
exclusions.push_back( 7 );
amoebaVdwForce->setParticleExclusions( 4, exclusions );
exclusions.resize(0);
exclusions.push_back( 5 );
exclusions.push_back( 4 );
exclusions.push_back( 6 );
exclusions.push_back( 7 );
amoebaVdwForce->setParticleExclusions( 5, exclusions );
exclusions.resize(0);
exclusions.push_back( 6 );
exclusions.push_back( 4 );
exclusions.push_back( 5 );
exclusions.push_back( 7 );
amoebaVdwForce->setParticleExclusions( 6, exclusions );
exclusions.resize(0);
exclusions.push_back( 7 );
exclusions.push_back( 4 );
exclusions.push_back( 5 );
exclusions.push_back( 6 );
amoebaVdwForce->setParticleExclusions( 7, exclusions );
// end of Vdw setup
std::vector<Vec3> positions(numberOfParticles);
positions[0] = Vec3( 1.5927280e-01, 1.7000000e-06, 1.6491000e-03 );
positions[1] = Vec3( 2.0805540e-01, -8.1258800e-02, 3.7282500e-02 );
positions[2] = Vec3( 2.0843610e-01, 8.0953200e-02, 3.7462200e-02 );
positions[3] = Vec3( 1.7280780e-01, 2.0730000e-04, -9.8741700e-02 );
positions[4] = Vec3( -1.6743680e-01, 1.5900000e-05, -6.6149000e-03 );
positions[5] = Vec3( -2.0428260e-01, 8.1071500e-02, 4.1343900e-02 );
positions[6] = Vec3( -6.7308300e-02, 1.2800000e-05, 1.0623300e-02 );
positions[7] = Vec3( -2.0426290e-01, -8.1231400e-02, 4.1033500e-02 );
system.addForce(amoebaVdwForce);
std::string platformName;
platformName = "CUDA";
LangevinIntegrator integrator(0.0, 0.1, 0.01);
Context context(system, integrator, Platform::getPlatformByName( platformName ) );
context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy);
forces = state.getForces();
energy = state.getPotentialEnergy();
}
void compareForcesEnergy( std::string& testName, double expectedEnergy, double energy,
std::vector<Vec3>& expectedForces,
std::vector<Vec3>& forces, double tolerance, FILE* log ) {
#ifdef AMOEBA_DEBUG
if( log ){
(void) fprintf( log, "%s: expected energy=%14.7e %14.7e\n", testName.c_str(), expectedEnergy, state.getPotentialEnergy() );
for( unsigned int ii = 0; ii < forces.size(); ii++ ){
(void) fprintf( log, "%6u [%14.7e %14.7e %14.7e] [%14.7e %14.7e %14.7e]\n", ii,
expectedForces[ii][0], expectedForces[ii][1], expectedForces[ii][2], forces[ii][0], forces[ii][1], forces[ii][2] );
}
(void) fflush( log );
}
#endif
for( unsigned int ii = 0; ii < forces.size(); ii++ ){
ASSERT_EQUAL_VEC_MOD( expectedForces[ii], forces[ii], tolerance, testName );
}
ASSERT_EQUAL_TOL_MOD( expectedEnergy, energy, tolerance, testName );
}
// test VDW w/ sigmaRule=CubicMean and epsilonRule=HHG
void testVdwAmmoniaCubicMeanHhg( FILE* log ) {
std::string testName = "testVdwAmmoniaCubicMeanHhg";
int numberOfParticles = 8;
double boxDimension = -1.0;
double cutoff = 9000000.0;
std::vector<Vec3> forces;
double energy;
setupAndGetForcesEnergyVdwAmmonia( "CUBIC-MEAN", "HHG", cutoff, boxDimension, forces, energy, log );
std::vector<Vec3> expectedForces(numberOfParticles);
double expectedEnergy = 4.8012258e+00;
expectedForces[0] = Vec3( 2.9265247e+02, -1.4507808e-02, -6.9562123e+00 );
expectedForces[1] = Vec3( -2.2451693e+00, 4.8143073e-01, -2.0041494e-01 );
expectedForces[2] = Vec3( -2.2440698e+00, -4.7905450e-01, -2.0125284e-01 );
expectedForces[3] = Vec3( -1.0840394e+00, -5.8531253e-04, 2.6934135e-01 );
expectedForces[4] = Vec3( -5.6305662e+01, 1.4733908e-03, -1.8083306e-01 );
expectedForces[5] = Vec3( 1.6750145e+00, -3.2448374e-01, -1.8030914e-01 );
expectedForces[6] = Vec3( -2.3412420e+02, 1.0754069e-02, 7.6287492e+00 );
expectedForces[7] = Vec3( 1.6756544e+00, 3.2497316e-01, -1.7906832e-01 );
double tolerance = 1.0e-04;
compareForcesEnergy( testName, expectedEnergy, energy, expectedForces, forces, tolerance, log );
}
// test VDW w/ sigmaRule=Arithmetic and epsilonRule=Arithmetic
void testVdwAmmoniaArithmeticArithmetic( FILE* log ) {
std::string testName = "testVdwAmmoniaArithmeticArithmetic";
int numberOfParticles = 8;
double boxDimension = -1.0;
double cutoff = 9000000.0;
std::vector<Vec3> forces;
double energy;
setupAndGetForcesEnergyVdwAmmonia( "ARITHMETIC", "ARITHMETIC", cutoff, boxDimension, forces, energy, log );
std::vector<Vec3> expectedForces(numberOfParticles);
double expectedEnergy = 4.2252403e+00;
expectedForces[0] = Vec3( 3.0603839e+02, -1.5550310e-02, -7.2661707e+00 );
expectedForces[1] = Vec3( -2.7801357e+00, 5.8805051e-01, -2.5907269e-01 );
expectedForces[2] = Vec3( -2.7753968e+00, -5.8440732e-01, -2.5969111e-01 );
expectedForces[3] = Vec3( -2.2496416e+00, -1.1797440e-03, 5.5501757e-01 );
expectedForces[4] = Vec3( -5.5077629e+01, 8.3417114e-04, -3.3668921e-01 );
expectedForces[5] = Vec3( 2.3752452e+00, -4.6788669e-01, -2.4907764e-01 );
expectedForces[6] = Vec3( -2.4790697e+02, 1.1419770e-02, 8.0629999e+00 );
expectedForces[7] = Vec3( 2.3761408e+00, 4.6871961e-01, -2.4731607e-01 );
double tolerance = 1.0e-04;
compareForcesEnergy( testName, expectedEnergy, energy, expectedForces, forces, tolerance, log );
}
// test VDW w/ sigmaRule=Geometric and epsilonRule=Geometric
void testVdwAmmoniaGeometricGeometric( FILE* log ) {
std::string testName = "testVdwAmmoniaGeometricGeometric";
int numberOfParticles = 8;
double boxDimension = -1.0;
double cutoff = 9000000.0;
std::vector<Vec3> forces;
double energy;
setupAndGetForcesEnergyVdwAmmonia( "GEOMETRIC", "GEOMETRIC", cutoff, boxDimension, forces, energy, log );
std::vector<Vec3> expectedForces(numberOfParticles);
double expectedEnergy = 2.5249914e+00;
expectedForces[0] = Vec3( 2.1169631e+02, -1.0710925e-02, -4.3728025e+00 );
expectedForces[1] = Vec3( -2.2585621e+00, 4.8409995e-01, -2.0188344e-01 );
expectedForces[2] = Vec3( -2.2551351e+00, -4.8124855e-01, -2.0246986e-01 );
expectedForces[3] = Vec3( -1.7178028e+00, -9.0851787e-04, 4.2466975e-01 );
expectedForces[4] = Vec3( -4.8302147e+01, 9.6603376e-04, -5.7972068e-01 );
expectedForces[5] = Vec3( 1.8100634e+00, -3.5214093e-01, -1.9357207e-01 );
expectedForces[6] = Vec3( -1.6078365e+02, 7.2117601e-03, 5.3180261e+00 );
expectedForces[7] = Vec3( 1.8109211e+00, 3.5273117e-01, -1.9224723e-01 );
double tolerance = 1.0e-04;
compareForcesEnergy( testName, expectedEnergy, energy, expectedForces, forces, tolerance, log );
}
void testVdwAmmoniaCubicMeanHarmonic( FILE* log ) {
std::string testName = "testVdwAmmoniaCubicMeanHarmonic";
int numberOfParticles = 8;
double boxDimension = -1.0;
double cutoff = 9000000.0;
std::vector<Vec3> forces;
double energy;
setupAndGetForcesEnergyVdwAmmonia( "CUBIC-MEAN", "HARMONIC", cutoff, boxDimension, forces, energy, log );
std::vector<Vec3> expectedForces(numberOfParticles);
double expectedEnergy = 4.1369069e+00;
expectedForces[0] = Vec3( 2.5854436e+02, -1.2779529e-02, -5.9041148e+00 );
expectedForces[1] = Vec3( -2.0832419e+00, 4.4915831e-01, -1.8266000e-01 );
expectedForces[2] = Vec3( -2.0823991e+00, -4.4699804e-01, -1.8347141e-01 );
expectedForces[3] = Vec3( -9.5914714e-01, -5.2162026e-04, 2.3873165e-01 );
expectedForces[4] = Vec3( -5.3724787e+01, 1.4838241e-03, -2.8089191e-01 );
expectedForces[5] = Vec3( 1.5074325e+00, -2.9016397e-01, -1.6385118e-01 );
expectedForces[6] = Vec3( -2.0271029e+02, 9.2367947e-03, 6.6389988e+00 );
expectedForces[7] = Vec3( 1.5080748e+00, 2.9058422e-01, -1.6274118e-01 );
double tolerance = 1.0e-04;
compareForcesEnergy( testName, expectedEnergy, energy, expectedForces, forces, tolerance, log );
}
// test w/ cutoff=0.25 nm; single ixn between two particles (0 and 6); force nonzero on
// particle 4 due to reduction applied to NH
// the distance between 0 and 6 is ~ 0.235 so the ixn is in the tapered region
void testVdwTaper( FILE* log ) {
std::string testName = "testVdwTaper";
int numberOfParticles = 8;
double boxDimension = -1.0;
double cutoff = 0.25;
std::vector<Vec3> forces;
double energy;
setupAndGetForcesEnergyVdwAmmonia( "CUBIC-MEAN", "HHG", cutoff, boxDimension, forces, energy, log );
std::vector<Vec3> expectedForces(numberOfParticles);
double expectedEnergy = 3.5478444e+00;
expectedForces[0] = Vec3( 5.6710779e+02, -2.7391004e-02, -1.7867730e+01 );
expectedForces[1] = Vec3( -0.0000000e+00, -0.0000000e+00, -0.0000000e+00 );
expectedForces[2] = Vec3( -0.0000000e+00, -0.0000000e+00, -0.0000000e+00 );
expectedForces[3] = Vec3( -0.0000000e+00, -0.0000000e+00, -0.0000000e+00 );
expectedForces[4] = Vec3( -5.1039701e+01, 2.4651903e-03, 1.6080957e+00 );
expectedForces[5] = Vec3( -0.0000000e+00, -0.0000000e+00, -0.0000000e+00 );
expectedForces[6] = Vec3( -5.1606809e+02, 2.4925813e-02, 1.6259634e+01 );
expectedForces[7] = Vec3( -0.0000000e+00, -0.0000000e+00, -0.0000000e+00 );
double tolerance = 1.0e-04;
compareForcesEnergy( testName, expectedEnergy, energy, expectedForces, forces, tolerance, log );
}
// test PBC
void testVdwPBC( FILE* log ) {
std::string testName = "testVdwPBC";
int numberOfParticles = 8;
double boxDimension = 0.6;
double cutoff = 0.25;
std::vector<Vec3> forces;
double energy;
setupAndGetForcesEnergyVdwAmmonia( "CUBIC-MEAN", "HHG", cutoff, boxDimension, forces, energy, log );
std::vector<Vec3> expectedForces(numberOfParticles);
double expectedEnergy = 8.4385405e+00;
expectedForces[0] = Vec3( 5.1453069e+02, 4.9751912e-01, -1.2759570e+01 );
expectedForces[1] = Vec3( -2.5622586e+02, -4.6524265e+01, 2.4281465e+01 );
expectedForces[2] = Vec3( -2.7538705e+02, 5.1831690e+01, 2.7367710e+01 );
expectedForces[3] = Vec3( -0.0000000e+00, -0.0000000e+00, -0.0000000e+00 );
expectedForces[4] = Vec3( 3.0883034e+02, -5.8876974e+00, -5.8286122e+01 );
expectedForces[5] = Vec3( 1.1319359e+02, -3.2047069e-01, 1.6181231e+00 );
expectedForces[6] = Vec3( -5.1606809e+02, 2.4925813e-02, 1.6259634e+01 );
expectedForces[7] = Vec3( 1.1112638e+02, 3.7829857e-01, 1.5187587e+00 );
// tolerance is higher here due to interpolation used in setting tapering coefficients;
// if tapering turned off, then absolute difference < 2.0e-05
double tolerance = 5.0e-04;
compareForcesEnergy( testName, expectedEnergy, energy, expectedForces, forces, tolerance, log );
}
int main( int numberOfArguments, char* argv[] ) { int main( int numberOfArguments, char* argv[] ) {
...@@ -178,7 +520,37 @@ int main( int numberOfArguments, char* argv[] ) { ...@@ -178,7 +520,37 @@ int main( int numberOfArguments, char* argv[] ) {
registerAmoebaCudaKernelFactories(); registerAmoebaCudaKernelFactories();
FILE* log = NULL; FILE* log = NULL;
testVdw( log ); testVdw( log );
// tests using two ammonia molecules
// test VDW w/ sigmaRule=CubicMean and epsilonRule=HHG
testVdwAmmoniaCubicMeanHhg( log );
// test VDW w/ sigmaRule=Arithmetic and epsilonRule=Arithmetic
testVdwAmmoniaArithmeticArithmetic( log );
// test VDW w/ sigmaRule=Geometric and epsilonRule=Geometric
testVdwAmmoniaGeometricGeometric( log );
// test VDW w/ sigmaRule=CubicMean and epsilonRule=Harmonic
testVdwAmmoniaCubicMeanHarmonic( log );
// test w/ cutoff=0.25 nm; single ixn between two particles (0 and 6); force nonzero on
// particle 4 due to reduction applied to NH
// the distance between 0 and 6 is ~ 0.235 so the ixn is in the tapered region
testVdwTaper( log );
// test PBC
testVdwPBC( log );
} catch(const std::exception& e) { } catch(const std::exception& e) {
std::cout << "exception: " << e.what() << std::endl; std::cout << "exception: " << e.what() << std::endl;
std::cout << "FAIL - ERROR. Test failed." << std::endl; std::cout << "FAIL - ERROR. Test failed." << std::endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment