Commit c2461875 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to convert AmoebaGeneralizedKirkwoodForce to new CUDA platform

parent 6c0082a5
...@@ -1321,6 +1321,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1321,6 +1321,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
int numForceThreadBlocks = nb.getNumForceThreadBlocks(); int numForceThreadBlocks = nb.getNumForceThreadBlocks();
int forceThreadBlockSize = nb.getForceThreadBlockSize(); int forceThreadBlockSize = nb.getForceThreadBlockSize();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
void* npt = NULL;
if (pmeGrid == NULL) { if (pmeGrid == NULL) {
// Compute induced dipoles. // Compute induced dipoles.
...@@ -1355,11 +1356,40 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1355,11 +1356,40 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
for (int i = 0; i < maxInducedIterations; i++) { for (int i = 0; i < maxInducedIterations; i++) {
cu.clearBuffer(*inducedField); cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar); cu.clearBuffer(*inducedFieldPolar);
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(), if (gkKernel == NULL) {
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices, void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&dampingAndThole->getDevicePointer()}; &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); &dampingAndThole->getDevicePointer()};
void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &inducedField->getDevicePointer(), cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
else {
cu.clearBuffer(*gkKernel->getInducedField());
cu.clearBuffer(*gkKernel->getInducedFieldPolar());
vector<float> d, dp;
gkKernel->getInducedDipoles()->download(d);
gkKernel->getInducedDipolesPolar()->download(dp);
printf("dipole\n");
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d %g %g %g, %g %g %g\n", i, d[3*i], d[3*i+1], d[3*i+2], dp[3*i], dp[3*i+1], dp[3*i+2]);
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&gkKernel->getInducedField()->getDevicePointer(), &gkKernel->getInducedFieldPolar()->getDevicePointer(),
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&gkKernel->getBornRadii()->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
vector<long long> f, fp;
gkKernel->getInducedField()->download(f);
gkKernel->getInducedFieldPolar()->download(fp);
printf("field\n");
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d %g %g %g, %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+2*cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, fp[i]/(double) 0xFFFFFFFF, fp[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, fp[i+2*cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF);
void* updateInducedGkFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
&gkKernel->getField()->getDevicePointer(), &gkKernel->getFieldPolar()->getDevicePointer(), &gkKernel->getInducedField()->getDevicePointer(),
&gkKernel->getInducedFieldPolar()->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
cu.executeKernel(updateInducedFieldKernel, updateInducedGkFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
}
void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &npt, &npt, &inducedField->getDevicePointer(),
&inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()}; &polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2); cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
...@@ -1487,7 +1517,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1487,7 +1517,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(), void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()}; &inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms()); cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &inducedField->getDevicePointer(), void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &npt, &npt, &inducedField->getDevicePointer(),
&inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()}; &polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2); cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
...@@ -1704,8 +1734,8 @@ private: ...@@ -1704,8 +1734,8 @@ private:
}; };
CudaCalcAmoebaGeneralizedKirkwoodForceKernel::CudaCalcAmoebaGeneralizedKirkwoodForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) : CudaCalcAmoebaGeneralizedKirkwoodForceKernel::CudaCalcAmoebaGeneralizedKirkwoodForceKernel(std::string name, const Platform& platform, CudaContext& cu, System& system) :
CalcAmoebaGeneralizedKirkwoodForceKernel(name, platform), cu(cu), system(system), params(NULL), bornRadii(NULL), field(NULL), inducedDipoleS(NULL), CalcAmoebaGeneralizedKirkwoodForceKernel(name, platform), cu(cu), system(system), params(NULL), bornRadii(NULL), field(NULL), fieldPolar(NULL),
inducedDipolePolarS(NULL), bornSum(NULL), bornForce(NULL) { inducedField(NULL), inducedFieldPolar(NULL), inducedDipoleS(NULL), inducedDipolePolarS(NULL), bornSum(NULL), bornForce(NULL) {
} }
CudaCalcAmoebaGeneralizedKirkwoodForceKernel::~CudaCalcAmoebaGeneralizedKirkwoodForceKernel() { CudaCalcAmoebaGeneralizedKirkwoodForceKernel::~CudaCalcAmoebaGeneralizedKirkwoodForceKernel() {
...@@ -1716,6 +1746,12 @@ CudaCalcAmoebaGeneralizedKirkwoodForceKernel::~CudaCalcAmoebaGeneralizedKirkwood ...@@ -1716,6 +1746,12 @@ CudaCalcAmoebaGeneralizedKirkwoodForceKernel::~CudaCalcAmoebaGeneralizedKirkwood
delete bornRadii; delete bornRadii;
if (field != NULL) if (field != NULL)
delete field; delete field;
if (fieldPolar != NULL)
delete fieldPolar;
if (inducedField != NULL)
delete inducedField;
if (inducedFieldPolar != NULL)
delete inducedFieldPolar;
if (inducedDipoleS != NULL) if (inducedDipoleS != NULL)
delete inducedDipoleS; delete inducedDipoleS;
if (inducedDipolePolarS != NULL) if (inducedDipolePolarS != NULL)
...@@ -1741,11 +1777,17 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& syst ...@@ -1741,11 +1777,17 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::initialize(const System& syst
params = CudaArray::create<float2>(cu, paddedNumAtoms, "amoebaGkParams"); params = CudaArray::create<float2>(cu, paddedNumAtoms, "amoebaGkParams");
bornRadii = new CudaArray(cu, paddedNumAtoms, elementSize, "bornRadii"); bornRadii = new CudaArray(cu, paddedNumAtoms, elementSize, "bornRadii");
field = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkField"); field = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkField");
fieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkFieldPolar");
bornSum = CudaArray::create<long long>(cu, paddedNumAtoms, "bornSum"); bornSum = CudaArray::create<long long>(cu, paddedNumAtoms, "bornSum");
bornForce = CudaArray::create<long long>(cu, paddedNumAtoms, "bornForce"); bornForce = CudaArray::create<long long>(cu, paddedNumAtoms, "bornForce");
inducedDipoleS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipoleS"); inducedDipoleS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipoleS");
inducedDipolePolarS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolarS"); inducedDipolePolarS = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolarS");
if (multipoles->getPolarizationType() == AmoebaMultipoleForce::Mutual) {
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkInducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "gkInducedFieldPolar");
}
cu.addAutoclearBuffer(*field); cu.addAutoclearBuffer(*field);
cu.addAutoclearBuffer(*fieldPolar);
cu.addAutoclearBuffer(*bornSum); cu.addAutoclearBuffer(*bornSum);
cu.addAutoclearBuffer(*bornForce); cu.addAutoclearBuffer(*bornForce);
vector<float2> paramsVector(paddedNumAtoms); vector<float2> paramsVector(paddedNumAtoms);
......
...@@ -470,6 +470,15 @@ public: ...@@ -470,6 +470,15 @@ public:
CudaArray* getField() { CudaArray* getField() {
return field; return field;
} }
CudaArray* getFieldPolar() {
return fieldPolar;
}
CudaArray* getInducedField() {
return inducedField;
}
CudaArray* getInducedFieldPolar() {
return inducedFieldPolar;
}
CudaArray* getInducedDipoles() { CudaArray* getInducedDipoles() {
return inducedDipoleS; return inducedDipoleS;
} }
...@@ -485,6 +494,9 @@ private: ...@@ -485,6 +494,9 @@ private:
CudaArray* bornRadii; CudaArray* bornRadii;
CudaArray* bornForce; CudaArray* bornForce;
CudaArray* field; CudaArray* field;
CudaArray* fieldPolar;
CudaArray* inducedField;
CudaArray* inducedFieldPolar;
CudaArray* inducedDipoleS; CudaArray* inducedDipoleS;
CudaArray* inducedDipolePolarS; CudaArray* inducedDipolePolarS;
CUfunction computeBornSumKernel, reduceBornSumKernel, gkForceKernel, chainRuleKernel, ediffKernel; CUfunction computeBornSumKernel, reduceBornSumKernel, gkForceKernel, chainRuleKernel, ediffKernel;
......
#define APPLY_SCALE
#if defined F1 #if defined F1
__device__ void computeOneEDiffInteractionF1(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real& outputEnergy, real3& outputForce) { __device__ void computeOneEDiffInteractionF1(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real& outputEnergy, real3& outputForce) {
#elif defined T1 #elif defined T1
...@@ -77,7 +75,6 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData ...@@ -77,7 +75,6 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
real scale3i = 3*scale3*uscale*rr3*rr2; real scale3i = 3*scale3*uscale*rr3*rr2;
real scale5i = 3*scale5*uscale*rr3*rr2; real scale5i = 3*scale5*uscale*rr3*rr2;
#ifdef APPLY_SCALE
real dsc3 = scale3*dScale*rr3; real dsc3 = scale3*dScale*rr3;
real dsc5 = 3*scale5*dScale*rr3*rr2; real dsc5 = 3*scale5*dScale*rr3*rr2;
real dsc7 = 15*scale7*dScale*rr3*rr3*rr1; real dsc7 = 15*scale7*dScale*rr3*rr3*rr1;
...@@ -85,11 +82,6 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData ...@@ -85,11 +82,6 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
real psc3 = scale3*pScale*rr3; real psc3 = scale3*pScale*rr3;
real psc5 = 3*scale5*pScale*rr3*rr2; real psc5 = 3*scale5*pScale*rr3*rr2;
real psc7 = 15*scale7*pScale*rr3*rr3*rr1; real psc7 = 15*scale7*pScale*rr3*rr3*rr1;
#else
real psc3 = scale3*rr3;
real psc5 = 3*scale5*rr3*rr2;
real psc7 = 15*scale7*rr3*rr3*rr1;
#endif
#ifdef T1 #ifdef T1
real dixr1 = atom1.dipole.y*zr - atom1.dipole.z*yr; real dixr1 = atom1.dipole.y*zr - atom1.dipole.z*yr;
...@@ -167,15 +159,9 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData ...@@ -167,15 +159,9 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
qiukp2 -= atom1.quadrupoleXY*atom2.inducedDipolePolar.x + atom1.quadrupoleYY*atom2.inducedDipolePolar.y + atom1.quadrupoleYZ*atom2.inducedDipolePolar.z; qiukp2 -= atom1.quadrupoleXY*atom2.inducedDipolePolar.x + atom1.quadrupoleYY*atom2.inducedDipolePolar.y + atom1.quadrupoleYZ*atom2.inducedDipolePolar.z;
qiukp3 -= atom1.quadrupoleXZ*atom2.inducedDipolePolar.x + atom1.quadrupoleYZ*atom2.inducedDipolePolar.y + atom1.quadrupoleZZ*atom2.inducedDipolePolar.z; qiukp3 -= atom1.quadrupoleXZ*atom2.inducedDipolePolar.x + atom1.quadrupoleYZ*atom2.inducedDipolePolar.y + atom1.quadrupoleZZ*atom2.inducedDipolePolar.z;
#ifdef APPLY_SCALE
ftm2i1 -= psc5*qiuk1 + dsc5*qiukp1; ftm2i1 -= psc5*qiuk1 + dsc5*qiukp1;
ftm2i2 -= psc5*qiuk2 + dsc5*qiukp2; ftm2i2 -= psc5*qiuk2 + dsc5*qiukp2;
ftm2i3 -= psc5*qiuk3 + dsc5*qiukp3; ftm2i3 -= psc5*qiuk3 + dsc5*qiukp3;
#else
ftm2i1 -= psc5*(qiuk1 + qiukp1);
ftm2i2 -= psc5*(qiuk2 + qiukp2);
ftm2i3 -= psc5*(qiuk3 + qiukp3);
#endif
#endif #endif
#endif #endif
...@@ -197,15 +183,9 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData ...@@ -197,15 +183,9 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
qkuip2 -= atom2.quadrupoleXY*atom1.inducedDipolePolar.x + atom2.quadrupoleYY*atom1.inducedDipolePolar.y + atom2.quadrupoleYZ*atom1.inducedDipolePolar.z; qkuip2 -= atom2.quadrupoleXY*atom1.inducedDipolePolar.x + atom2.quadrupoleYY*atom1.inducedDipolePolar.y + atom2.quadrupoleYZ*atom1.inducedDipolePolar.z;
qkuip3 -= atom2.quadrupoleXZ*atom1.inducedDipolePolar.x + atom2.quadrupoleYZ*atom1.inducedDipolePolar.y + atom2.quadrupoleZZ*atom1.inducedDipolePolar.z; qkuip3 -= atom2.quadrupoleXZ*atom1.inducedDipolePolar.x + atom2.quadrupoleYZ*atom1.inducedDipolePolar.y + atom2.quadrupoleZZ*atom1.inducedDipolePolar.z;
#ifdef APPLY_SCALE
ftm2i1 += psc5*qkui1 + dsc5*qkuip1; ftm2i1 += psc5*qkui1 + dsc5*qkuip1;
ftm2i2 += psc5*qkui2 + dsc5*qkuip2; ftm2i2 += psc5*qkui2 + dsc5*qkuip2;
ftm2i3 += psc5*qkui3 + dsc5*qkuip3; ftm2i3 += psc5*qkui3 + dsc5*qkuip3;
#else
ftm2i1 += psc5*(qkui1 + qkuip1);
ftm2i2 += psc5*(qkui2 + qkuip2);
ftm2i3 += psc5*(qkui3 + qkuip3);
#endif
#endif #endif
#endif #endif
...@@ -326,44 +306,24 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData ...@@ -326,44 +306,24 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
real glip3 = scip3Y*sc6 - scip4Y*sc5; real glip3 = scip3Y*sc6 - scip4Y*sc5;
#ifdef F1 #ifdef F1
#ifdef APPLY_SCALE
ftm2i1 -= 0.5f*((gli1*pScale + glip1*dScale)*ddsc3_1 + (gli2*pScale + glip2*dScale)*ddsc5_1 + (gli3*pScale+glip3*dScale)*ddsc7_1); ftm2i1 -= 0.5f*((gli1*pScale + glip1*dScale)*ddsc3_1 + (gli2*pScale + glip2*dScale)*ddsc5_1 + (gli3*pScale+glip3*dScale)*ddsc7_1);
ftm2i2 -= 0.5f*((gli1*pScale + glip1*dScale)*ddsc3_2 + (gli2*pScale + glip2*dScale)*ddsc5_2 + (gli3*pScale+glip3*dScale)*ddsc7_2); ftm2i2 -= 0.5f*((gli1*pScale + glip1*dScale)*ddsc3_2 + (gli2*pScale + glip2*dScale)*ddsc5_2 + (gli3*pScale+glip3*dScale)*ddsc7_2);
ftm2i3 -= 0.5f*((gli1*pScale + glip1*dScale)*ddsc3_3 + (gli2*pScale + glip2*dScale)*ddsc5_3 + (gli3*pScale+glip3*dScale)*ddsc7_3); ftm2i3 -= 0.5f*((gli1*pScale + glip1*dScale)*ddsc3_3 + (gli2*pScale + glip2*dScale)*ddsc5_3 + (gli3*pScale+glip3*dScale)*ddsc7_3);
#else
ftm2i1 -= 0.5f*((gli1 + glip1)*ddsc3_1 + (gli2 + glip2)*ddsc5_1 + (gli3 + glip3)*ddsc7_1);
ftm2i2 -= 0.5f*((gli1 + glip1)*ddsc3_2 + (gli2 + glip2)*ddsc5_2 + (gli3 + glip3)*ddsc7_2);
ftm2i3 -= 0.5f*((gli1 + glip1)*ddsc3_3 + (gli2 + glip2)*ddsc5_3 + (gli3 + glip3)*ddsc7_3);
#endif
outputEnergy = gli1*psc3 + gli2*psc5 + gli3*psc7; outputEnergy = gli1*psc3 + gli2*psc5 + gli3*psc7;
#endif #endif
#ifdef APPLY_SCALE
gfi1 += 1.5f*(gli1*psc3 + glip1*dsc3); gfi1 += 1.5f*(gli1*psc3 + glip1*dsc3);
gfi1 += 2.5f*(gli2*psc5 + glip2*dsc5); gfi1 += 2.5f*(gli2*psc5 + glip2*dsc5);
gfi1 += 3.5f*(gli3*psc7 + glip3*dsc7); gfi1 += 3.5f*(gli3*psc7 + glip3*dsc7);
#else
gfi1 += 1.5f*psc3*(gli1 + glip1);
gfi1 += 2.5f*psc5*(gli2 + glip2);
gfi1 += 3.5f*psc7*(gli3 + glip3);
#endif
gfi1 *= rr2; gfi1 *= rr2;
gfi1 += 0.5f*scip2*scale3i; gfi1 += 0.5f*scip2*scale3i;
#if defined F1 || defined T1 #if defined F1 || defined T1
#ifdef APPLY_SCALE
real gfi5 = (sci4Y*psc7+scip4Y*dsc7); real gfi5 = (sci4Y*psc7+scip4Y*dsc7);
#else
real gfi5 = psc7*(sci4Y + scip4Y);
#endif
#endif #endif
#if defined F1 || defined T3 #if defined F1 || defined T3
#ifdef APPLY_SCALE
real gfi6 = -(sci3Y*psc7+scip3Y*dsc7); real gfi6 = -(sci3Y*psc7+scip3Y*dsc7);
#else
real gfi6 = -psc7*(sci3Y + scip3Y);
#endif
#endif #endif
#ifdef F1 #ifdef F1
...@@ -371,66 +331,35 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData ...@@ -371,66 +331,35 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
real diff0 = atom1.inducedDipoleS.x - atom1.inducedDipole.x; real diff0 = atom1.inducedDipoleS.x - atom1.inducedDipole.x;
real diff1 = atom1.inducedDipolePolarS.x - atom1.inducedDipolePolar.x; real diff1 = atom1.inducedDipolePolarS.x - atom1.inducedDipolePolar.x;
#ifdef APPLY_SCALE
ftm2i1 += 0.5f*(-atom2.q*(diff0*psc3 + diff1*dsc3) + sc4*(diff0*psc5 + diff1*dsc5) - sc6*(diff0*psc7 + diff1*dsc7)); ftm2i1 += 0.5f*(-atom2.q*(diff0*psc3 + diff1*dsc3) + sc4*(diff0*psc5 + diff1*dsc5) - sc6*(diff0*psc7 + diff1*dsc7));
#else
ftm2i1 += 0.5f*(-atom2.q*psc3*(diff0 + diff1) + sc4*psc5*(diff0 + diff1) - sc6*psc7*(diff0 + diff1));
#endif
diff0 = atom2.inducedDipoleS.x - atom2.inducedDipole.x; diff0 = atom2.inducedDipoleS.x - atom2.inducedDipole.x;
diff1 = atom2.inducedDipolePolarS.x - atom2.inducedDipolePolar.x; diff1 = atom2.inducedDipolePolarS.x - atom2.inducedDipolePolar.x;
#ifdef APPLY_SCALE
ftm2i1 += 0.5f*(atom1.q*(diff0*psc3 + diff1*dsc3) + sc3*(diff0*psc5 + diff1*dsc5) + sc5*(diff0*psc7 + diff1*dsc7)); ftm2i1 += 0.5f*(atom1.q*(diff0*psc3 + diff1*dsc3) + sc3*(diff0*psc5 + diff1*dsc5) + sc5*(diff0*psc7 + diff1*dsc7));
ftm2i1 += 0.5f*(sci4Y*psc5+scip4Y*dsc5)*atom1.dipole.x + 0.5f*(sci3Y*psc5+scip3Y*dsc5)*atom2.dipole.x + gfi5*qir1 + gfi6*qkr1; ftm2i1 += 0.5f*(sci4Y*psc5+scip4Y*dsc5)*atom1.dipole.x + 0.5f*(sci3Y*psc5+scip3Y*dsc5)*atom2.dipole.x + gfi5*qir1 + gfi6*qkr1;
#else
ftm2i1 += 0.5f*(atom1.q*psc3*(diff0 + diff1) + sc3*psc5*(diff0 + diff1) + sc5*psc7*(diff0 + diff1));
ftm2i1 += 0.5f*psc5*(sci4Y + scip4Y)*atom1.dipole.x + 0.5f*psc5*(sci3Y + scip3Y)*atom2.dipole.x + gfi5*qir1 + gfi6*qkr1;
#endif
ftm2i2 += gfi1*yr; ftm2i2 += gfi1*yr;
diff0 = atom1.inducedDipoleS.y - atom1.inducedDipole.y; diff0 = atom1.inducedDipoleS.y - atom1.inducedDipole.y;
diff1 = atom1.inducedDipolePolarS.y - atom1.inducedDipolePolar.y; diff1 = atom1.inducedDipolePolarS.y - atom1.inducedDipolePolar.y;
#ifdef APPLY_SCALE
ftm2i2 += 0.5f*(-atom2.q*(diff0*psc3 + diff1*dsc3) + sc4*(diff0*psc5 + diff1*dsc5) - sc6*(diff0*psc7 + diff1*dsc7)); ftm2i2 += 0.5f*(-atom2.q*(diff0*psc3 + diff1*dsc3) + sc4*(diff0*psc5 + diff1*dsc5) - sc6*(diff0*psc7 + diff1*dsc7));
#else
ftm2i2 += 0.5f*(-atom2.q*psc3*(diff0 + diff1) + sc4*psc5*(diff0 + diff1) - sc6*psc7*(diff0 + diff1));
#endif
diff0 = atom2.inducedDipoleS.y - atom2.inducedDipole.y; diff0 = atom2.inducedDipoleS.y - atom2.inducedDipole.y;
diff1 = atom2.inducedDipolePolarS.y - atom2.inducedDipolePolar.y; diff1 = atom2.inducedDipolePolarS.y - atom2.inducedDipolePolar.y;
#ifdef APPLY_SCALE
ftm2i2 += 0.5f*(atom1.q*(diff0*psc3 + diff1*dsc3) + sc3*(diff0*psc5 + diff1*dsc5) + sc5*(diff0*psc7 + diff1*dsc7)); ftm2i2 += 0.5f*(atom1.q*(diff0*psc3 + diff1*dsc3) + sc3*(diff0*psc5 + diff1*dsc5) + sc5*(diff0*psc7 + diff1*dsc7));
ftm2i2 += 0.5f*(sci4Y*psc5+scip4Y*dsc5)*atom1.dipole.y + 0.5f*(sci3Y*psc5+scip3Y*dsc5)*atom2.dipole.y + gfi5*qir2 + gfi6*qkr2; ftm2i2 += 0.5f*(sci4Y*psc5+scip4Y*dsc5)*atom1.dipole.y + 0.5f*(sci3Y*psc5+scip3Y*dsc5)*atom2.dipole.y + gfi5*qir2 + gfi6*qkr2;
#else
ftm2i2 += 0.5f*(atom1.q*psc3*(diff0 + diff1) + sc3*psc5*(diff0 + diff1) + sc5*psc7*(diff0 + diff1));
ftm2i2 += 0.5f*psc5*(sci4Y +scip4Y)*atom1.dipole.y + 0.5f*psc5*(sci3Y +scip3Y)*atom2.dipole.y + gfi5*qir2 + gfi6*qkr2;
#endif
ftm2i3 += gfi1*zr; ftm2i3 += gfi1*zr;
diff0 = atom1.inducedDipoleS.z - atom1.inducedDipole.z; diff0 = atom1.inducedDipoleS.z - atom1.inducedDipole.z;
diff1 = atom1.inducedDipolePolarS.z - atom1.inducedDipolePolar.z; diff1 = atom1.inducedDipolePolarS.z - atom1.inducedDipolePolar.z;
#ifdef APPLY_SCALE
ftm2i3 += 0.5f*(-atom2.q*(diff0*psc3 + diff1*dsc3) + sc4*(diff0*psc5 + diff1*dsc5) - sc6*(diff0*psc7 + diff1*dsc7)); ftm2i3 += 0.5f*(-atom2.q*(diff0*psc3 + diff1*dsc3) + sc4*(diff0*psc5 + diff1*dsc5) - sc6*(diff0*psc7 + diff1*dsc7));
#else
ftm2i3 += 0.5f*(-atom2.q*psc3*(diff0 + diff1) + sc4*psc5*(diff0 + diff1) - sc6*psc7*(diff0 + diff1));
#endif
diff0 = atom2.inducedDipoleS.z - atom2.inducedDipole.z; diff0 = atom2.inducedDipoleS.z - atom2.inducedDipole.z;
diff1 = atom2.inducedDipolePolarS.z - atom2.inducedDipolePolar.z; diff1 = atom2.inducedDipolePolarS.z - atom2.inducedDipolePolar.z;
#ifdef APPLY_SCALE
ftm2i3 += 0.5f*(atom1.q*(diff0*psc3 + diff1*dsc3) + sc3*(diff0*psc5 + diff1*dsc5) + sc5*(diff0*psc7 + diff1*dsc7)); ftm2i3 += 0.5f*(atom1.q*(diff0*psc3 + diff1*dsc3) + sc3*(diff0*psc5 + diff1*dsc5) + sc5*(diff0*psc7 + diff1*dsc7));
ftm2i3 += 0.5f*(sci4Y*psc5+scip4Y*dsc5)*atom1.dipole.z + 0.5f*(sci3Y*psc5+scip3Y*dsc5)*atom2.dipole.z + gfi5*qir3 + gfi6*qkr3; ftm2i3 += 0.5f*(sci4Y*psc5+scip4Y*dsc5)*atom1.dipole.z + 0.5f*(sci3Y*psc5+scip3Y*dsc5)*atom2.dipole.z + gfi5*qir3 + gfi6*qkr3;
#else
ftm2i3 += 0.5f*(atom1.q*psc3*(diff0 + diff1) + sc3*psc5*(diff0 + diff1) + sc5*psc7*(diff0 + diff1));
ftm2i3 += 0.5f*psc5*(sci4Y + scip4Y)*atom1.dipole.z + 0.5f*psc5*(sci3Y+scip3Y)*atom2.dipole.z + gfi5*qir3 + gfi6*qkr3;
#endif
// intermediate values needed for partially excluded interactions // intermediate values needed for partially excluded interactions
...@@ -475,31 +404,17 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData ...@@ -475,31 +404,17 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
#endif #endif
#ifdef T1 #ifdef T1
#ifdef APPLY_SCALE
real gti2 = 0.5f*(sci4Y*psc5 + scip4Y*dsc5); real gti2 = 0.5f*(sci4Y*psc5 + scip4Y*dsc5);
real ttm2i1 = -(dixuk1*psc3+dixukp1*dsc3)*0.5f + gti2*dixr1 + ((ukxqir1+rxqiuk1)*psc5 +(ukxqirp1+rxqiukp1)*dsc5) - gfi5*rxqir1; real ttm2i1 = -(dixuk1*psc3+dixukp1*dsc3)*0.5f + gti2*dixr1 + ((ukxqir1+rxqiuk1)*psc5 +(ukxqirp1+rxqiukp1)*dsc5) - gfi5*rxqir1;
real ttm2i2 = -(dixuk2*psc3+dixukp2*dsc3)*0.5f + gti2*dixr2 + ((ukxqir2+rxqiuk2)*psc5 +(ukxqirp2+rxqiukp2)*dsc5) - gfi5*rxqir2; real ttm2i2 = -(dixuk2*psc3+dixukp2*dsc3)*0.5f + gti2*dixr2 + ((ukxqir2+rxqiuk2)*psc5 +(ukxqirp2+rxqiukp2)*dsc5) - gfi5*rxqir2;
real ttm2i3 = -(dixuk3*psc3+dixukp3*dsc3)*0.5f + gti2*dixr3 + ((ukxqir3+rxqiuk3)*psc5 +(ukxqirp3+rxqiukp3)*dsc5) - gfi5*rxqir3; real ttm2i3 = -(dixuk3*psc3+dixukp3*dsc3)*0.5f + gti2*dixr3 + ((ukxqir3+rxqiuk3)*psc5 +(ukxqirp3+rxqiukp3)*dsc5) - gfi5*rxqir3;
#else
real gti2 = 0.5f*psc5*(sci4Y + scip4Y);
real ttm2i1 = -psc3*(dixuk1 + dixukp1)*0.5f + gti2*dixr1 + psc5*((ukxqir1+rxqiuk1) + (ukxqirp1+rxqiukp1)) - gfi5*rxqir1;
real ttm2i2 = -psc3*(dixuk2 + dixukp2)*0.5f + gti2*dixr2 + psc5*((ukxqir2+rxqiuk2) + (ukxqirp2+rxqiukp2)) - gfi5*rxqir2;
real ttm2i3 = -psc3*(dixuk3 + dixukp3)*0.5f + gti2*dixr3 + psc5*((ukxqir3+rxqiuk3) + (ukxqirp3+rxqiukp3)) - gfi5*rxqir3;
#endif
#endif #endif
#ifdef T3 #ifdef T3
#ifdef APPLY_SCALE
real gti3 = 0.5f*(sci3Y*psc5 + scip3Y*dsc5); real gti3 = 0.5f*(sci3Y*psc5 + scip3Y*dsc5);
real ttm3i1 = -(dkxui1*psc3+dkxuip1*dsc3)*0.5f + gti3*dkxr1 - ((uixqkr1+rxqkui1)*psc5 +(uixqkrp1+rxqkuip1)*dsc5) - gfi6*rxqkr1; real ttm3i1 = -(dkxui1*psc3+dkxuip1*dsc3)*0.5f + gti3*dkxr1 - ((uixqkr1+rxqkui1)*psc5 +(uixqkrp1+rxqkuip1)*dsc5) - gfi6*rxqkr1;
real ttm3i2 = -(dkxui2*psc3+dkxuip2*dsc3)*0.5f + gti3*dkxr2 - ((uixqkr2+rxqkui2)*psc5 +(uixqkrp2+rxqkuip2)*dsc5) - gfi6*rxqkr2; real ttm3i2 = -(dkxui2*psc3+dkxuip2*dsc3)*0.5f + gti3*dkxr2 - ((uixqkr2+rxqkui2)*psc5 +(uixqkrp2+rxqkuip2)*dsc5) - gfi6*rxqkr2;
real ttm3i3 = -(dkxui3*psc3+dkxuip3*dsc3)*0.5f + gti3*dkxr3 - ((uixqkr3+rxqkui3)*psc5 +(uixqkrp3+rxqkuip3)*dsc5) - gfi6*rxqkr3; real ttm3i3 = -(dkxui3*psc3+dkxuip3*dsc3)*0.5f + gti3*dkxr3 - ((uixqkr3+rxqkui3)*psc5 +(uixqkrp3+rxqkuip3)*dsc5) - gfi6*rxqkr3;
#else
real gti3 = 0.5f*psc5*(sci3Y + scip3Y);
real ttm3i1 = -psc3*(dkxui1 + dkxuip1)*0.5f + gti3*dkxr1 - psc5*((uixqkr1+rxqkui1) + (uixqkrp1+rxqkuip1)) - gfi6*rxqkr1;
real ttm3i2 = -psc3*(dkxui2 + dkxuip2)*0.5f + gti3*dkxr2 - psc5*((uixqkr2+rxqkui2) + (uixqkrp2+rxqkuip2)) - gfi6*rxqkr2;
real ttm3i3 = -psc3*(dkxui3 + dkxuip3)*0.5f + gti3*dkxr3 - psc5*((uixqkr3+rxqkui3) + (uixqkrp3+rxqkuip3)) - gfi6*rxqkr3;
#endif
#endif #endif
// update force and torque on site k // update force and torque on site k
...@@ -603,27 +518,15 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData ...@@ -603,27 +518,15 @@ __device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData
#endif #endif
#ifdef T1 #ifdef T1
#ifdef APPLY_SCALE
ttm2i1 = -(dixuk1*psc3+dixukp1*dsc3)*0.5f + ((ukxqir1+rxqiuk1)*psc5 +(ukxqirp1+rxqiukp1)*dsc5); ttm2i1 = -(dixuk1*psc3+dixukp1*dsc3)*0.5f + ((ukxqir1+rxqiuk1)*psc5 +(ukxqirp1+rxqiukp1)*dsc5);
ttm2i2 = -(dixuk2*psc3+dixukp2*dsc3)*0.5f + ((ukxqir2+rxqiuk2)*psc5 +(ukxqirp2+rxqiukp2)*dsc5); ttm2i2 = -(dixuk2*psc3+dixukp2*dsc3)*0.5f + ((ukxqir2+rxqiuk2)*psc5 +(ukxqirp2+rxqiukp2)*dsc5);
ttm2i3 = -(dixuk3*psc3+dixukp3*dsc3)*0.5f + ((ukxqir3+rxqiuk3)*psc5 +(ukxqirp3+rxqiukp3)*dsc5); ttm2i3 = -(dixuk3*psc3+dixukp3*dsc3)*0.5f + ((ukxqir3+rxqiuk3)*psc5 +(ukxqirp3+rxqiukp3)*dsc5);
#else
ttm2i1 = -psc3*(dixuk1+dixukp1)*0.5f + psc5*((ukxqir1+rxqiuk1) + (ukxqirp1+rxqiukp1));
ttm2i2 = -psc3*(dixuk2+dixukp2)*0.5f + psc5*((ukxqir2+rxqiuk2) + (ukxqirp2+rxqiukp2));
ttm2i3 = -psc3*(dixuk3+dixukp3)*0.5f + psc5*((ukxqir3+rxqiuk3) + (ukxqirp3+rxqiukp3));
#endif
#endif #endif
#ifdef T3 #ifdef T3
#ifdef APPLY_SCALE
ttm3i1 = -(dkxui1*psc3+dkxuip1*dsc3)*0.5f - ((uixqkr1+rxqkui1)*psc5 +(uixqkrp1+rxqkuip1)*dsc5); ttm3i1 = -(dkxui1*psc3+dkxuip1*dsc3)*0.5f - ((uixqkr1+rxqkui1)*psc5 +(uixqkrp1+rxqkuip1)*dsc5);
ttm3i2 = -(dkxui2*psc3+dkxuip2*dsc3)*0.5f - ((uixqkr2+rxqkui2)*psc5 +(uixqkrp2+rxqkuip2)*dsc5); ttm3i2 = -(dkxui2*psc3+dkxuip2*dsc3)*0.5f - ((uixqkr2+rxqkui2)*psc5 +(uixqkrp2+rxqkuip2)*dsc5);
ttm3i3 = -(dkxui3*psc3+dkxuip3*dsc3)*0.5f - ((uixqkr3+rxqkui3)*psc5 +(uixqkrp3+rxqkuip3)*dsc5); ttm3i3 = -(dkxui3*psc3+dkxuip3*dsc3)*0.5f - ((uixqkr3+rxqkui3)*psc5 +(uixqkrp3+rxqkuip3)*dsc5);
#else
ttm3i1 = -psc3*(dkxui1 + dkxuip1)*0.5f - psc5*((uixqkr1+rxqkui1) + (uixqkrp1+rxqkuip1));
ttm3i2 = -psc3*(dkxui2 + dkxuip2)*0.5f - psc5*((uixqkr2+rxqkui2) + (uixqkrp2+rxqkuip2));
ttm3i3 = -psc3*(dkxui3 + dkxuip3)*0.5f - psc5*((uixqkr3+rxqkui3) + (uixqkrp3+rxqkuip3));
#endif
#endif #endif
// update force and torque on site k; // update force and torque on site k;
......
...@@ -4,10 +4,21 @@ ...@@ -4,10 +4,21 @@
typedef struct { typedef struct {
real3 pos; real3 pos;
real3 field, fieldPolar, inducedDipole, inducedDipolePolar; real3 field, fieldPolar, inducedDipole, inducedDipolePolar;
#ifdef USE_GK
real3 fieldS, fieldPolarS, inducedDipoleS, inducedDipolePolarS;
real bornRadius;
#endif
float thole, damp; float thole, damp;
} AtomData; } AtomData;
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) { #ifdef USE_GK
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole, const real* __restrict__ inducedDipoleS,
const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii) {
#else
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
#endif
real4 atomPosq = posq[atom]; real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z); data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.inducedDipole.x = inducedDipole[atom*3]; data.inducedDipole.x = inducedDipole[atom*3];
...@@ -19,10 +30,30 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res ...@@ -19,10 +30,30 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
float2 temp = dampingAndThole[atom]; float2 temp = dampingAndThole[atom];
data.damp = temp.x; data.damp = temp.x;
data.thole = temp.y; data.thole = temp.y;
#ifdef USE_GK
data.inducedDipoleS.x = inducedDipoleS[atom*3];
data.inducedDipoleS.y = inducedDipoleS[atom*3+1];
data.inducedDipoleS.z = inducedDipoleS[atom*3+2];
data.inducedDipolePolarS.x = inducedDipolePolarS[atom*3];
data.inducedDipolePolarS.y = inducedDipolePolarS[atom*3+1];
data.inducedDipolePolarS.z = inducedDipolePolarS[atom*3+2];
data.bornRadius = bornRadii[atom];
#endif
}
inline __device__ void zeroAtomData(AtomData& data) {
data.field = make_real3(0);
data.fieldPolar = make_real3(0);
#ifdef USE_GK
data.fieldS = make_real3(0);
data.fieldPolarS = make_real3(0);
#endif
} }
#ifdef USE_EWALD #ifdef USE_EWALD
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, real3* fields) { __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, bool isSelfInteraction) {
if (isSelfInteraction)
return;
real scale1, scale2; real scale1, scale2;
real r2 = dot(deltaR, deltaR); real r2 = dot(deltaR, deltaR);
if (r2 < CUTOFF_SQUARED) { if (r2 < CUTOFF_SQUARED) {
...@@ -35,7 +66,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -35,7 +66,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
real bn0 = erfc(ralpha)*rI; real bn0 = erfc(ralpha)*rI;
real alsq2 = 2*EWALD_ALPHA*EWALD_ALPHA; real alsq2 = 2*EWALD_ALPHA*EWALD_ALPHA;
real alsq2n = RECIP(SQRT_PI*EWALD_ALPHA); real alsq2n = RECIP(SQRT_PI*EWALD_ALPHA);
real exp2a = expf(-(ralpha*ralpha)); real exp2a = EXP(-(ralpha*ralpha));
alsq2n *= alsq2; alsq2n *= alsq2;
real bn1 = (bn0+alsq2n*exp2a)*rI*rI; real bn1 = (bn0+alsq2n*exp2a)*rI*rI;
...@@ -73,16 +104,90 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -73,16 +104,90 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
scale2 = 0; scale2 = 0;
} }
real dDotDelta = scale2*dot(deltaR, atom2.inducedDipole); real dDotDelta = scale2*dot(deltaR, atom2.inducedDipole);
fields[0] = scale1*atom2.inducedDipole + dDotDelta*deltaR; atom1.field += scale1*atom2.inducedDipole + dDotDelta*deltaR;
dDotDelta = scale2*dot(deltaR, atom2.inducedDipolePolar); dDotDelta = scale2*dot(deltaR, atom2.inducedDipolePolar);
fields[1] = scale1*atom2.inducedDipolePolar + dDotDelta*deltaR; atom1.fieldPolar += scale1*atom2.inducedDipolePolar + dDotDelta*deltaR;
dDotDelta = scale2*dot(deltaR, atom1.inducedDipole); dDotDelta = scale2*dot(deltaR, atom1.inducedDipole);
fields[2] = scale1*atom1.inducedDipole + dDotDelta*deltaR; atom2.field += scale1*atom1.inducedDipole + dDotDelta*deltaR;
dDotDelta = scale2*dot(deltaR, atom1.inducedDipolePolar); dDotDelta = scale2*dot(deltaR, atom1.inducedDipolePolar);
fields[3] = scale1*atom1.inducedDipolePolar + dDotDelta*deltaR; atom2.fieldPolar += scale1*atom1.inducedDipolePolar + dDotDelta*deltaR;
}
#elif defined USE_GK
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, bool isSelfInteraction) {
real r2 = dot(deltaR, deltaR);
real r = SQRT(r2);
if (!isSelfInteraction) {
real rI = RECIP(r);
real r2I = rI*rI;
real rr3 = -rI*r2I;
real rr5 = -3*rr3*r2I;
real dampProd = atom1.damp*atom2.damp;
real ratio = (dampProd != 0 ? r/dampProd : 1);
float pGamma = (atom1.thole > atom2.thole ? atom2.thole: atom1.thole);
real damp = ratio*ratio*ratio*pGamma;
real dampExp = (dampProd != 0 ? EXP(-damp) : 0);
rr3 *= 1-dampExp;
rr5 *= 1-(1+damp)*dampExp;
real dDotDelta = rr5*dot(deltaR, atom2.inducedDipole);
atom1.field += rr3*atom2.inducedDipole + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom2.inducedDipolePolar);
atom1.fieldPolar += rr3*atom2.inducedDipolePolar + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom1.inducedDipole);
atom2.field += rr3*atom1.inducedDipole + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom1.inducedDipolePolar);
atom2.fieldPolar += rr3*atom1.inducedDipolePolar + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom2.inducedDipoleS);
atom1.fieldS += rr3*atom2.inducedDipoleS + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom2.inducedDipolePolarS);
atom1.fieldPolarS += rr3*atom2.inducedDipolePolarS + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom1.inducedDipoleS);
atom2.fieldS += rr3*atom1.inducedDipoleS + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom1.inducedDipolePolarS);
atom2.fieldPolarS += rr3*atom1.inducedDipolePolarS + dDotDelta*deltaR;
}
real rb2 = atom1.bornRadius*atom2.bornRadius;
real expterm = EXP(-r2/(GK_C*rb2));
real expc = expterm/GK_C;
real gf2 = RECIP(r2+rb2*expterm);
real gf = SQRT(gf2);
real gf3 = gf2*gf;
real gf5 = gf3*gf2;
// reaction potential auxiliary terms
real a10 = -gf3;
// reaction potential gradient auxiliary terms
real expc1 = 1 - expc;
real a11 = expc1 * 3 * gf5;
// unweighted dipole reaction potential gradient tensor
real3 gux = GK_FD*make_real3(a10+deltaR.x*deltaR.x*a11, deltaR.x*deltaR.y*a11, deltaR.x*deltaR.z*a11);
real3 guy = make_real3(gux.y, GK_FD*(a10+deltaR.y*deltaR.y*a11), GK_FD*deltaR.y*deltaR.z*a11);
real3 guz = make_real3(gux.z, guy.z, GK_FD*(a10+deltaR.z*deltaR.z*a11));
atom1.fieldS += atom2.inducedDipoleS.x*gux+atom2.inducedDipoleS.y*guy+atom2.inducedDipoleS.z*guz;
atom2.fieldS += atom1.inducedDipoleS.x*gux+atom1.inducedDipoleS.y*guy+atom1.inducedDipoleS.z*guz;
atom1.fieldPolarS += atom2.inducedDipolePolarS.x*gux+atom2.inducedDipolePolarS.y*guy+atom2.inducedDipolePolarS.z*guz;
atom2.fieldPolarS += atom1.inducedDipolePolarS.x*gux+atom1.inducedDipolePolarS.y*guy+atom1.inducedDipolePolarS.z*guz;
} }
#else #else
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, real3* fields) { __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, bool isSelfInteraction) {
if (isSelfInteraction)
return;
real rI = RSQRT(dot(deltaR, deltaR)); real rI = RSQRT(dot(deltaR, deltaR));
real r = RECIP(rI); real r = RECIP(rI);
real r2I = rI*rI; real r2I = rI*rI;
...@@ -96,13 +201,13 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -96,13 +201,13 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
rr3 *= 1 - dampExp; rr3 *= 1 - dampExp;
rr5 *= 1 - (1+damp)*dampExp; rr5 *= 1 - (1+damp)*dampExp;
real dDotDelta = rr5*dot(deltaR, atom2.inducedDipole); real dDotDelta = rr5*dot(deltaR, atom2.inducedDipole);
fields[0] = rr3*atom2.inducedDipole + dDotDelta*deltaR; atom1.field += rr3*atom2.inducedDipole + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom2.inducedDipolePolar); dDotDelta = rr5*dot(deltaR, atom2.inducedDipolePolar);
fields[1] = rr3*atom2.inducedDipolePolar + dDotDelta*deltaR; atom1.fieldPolar += rr3*atom2.inducedDipolePolar + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom1.inducedDipole); dDotDelta = rr5*dot(deltaR, atom1.inducedDipole);
fields[2] = rr3*atom1.inducedDipole + dDotDelta*deltaR; atom2.field += rr3*atom1.inducedDipole + dDotDelta*deltaR;
dDotDelta = rr5*dot(deltaR, atom1.inducedDipolePolar); dDotDelta = rr5*dot(deltaR, atom1.inducedDipolePolar);
fields[3] = rr3*atom1.inducedDipolePolar + dDotDelta*deltaR; atom2.fieldPolar += rr3*atom1.inducedDipolePolar + dDotDelta*deltaR;
} }
#endif #endif
...@@ -110,10 +215,13 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -110,10 +215,13 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
* Compute the mutual induced field. * Compute the mutual induced field.
*/ */
extern "C" __global__ void computeInducedField( extern "C" __global__ void computeInducedField(
unsigned long long* __restrict__ fieldBuffers, unsigned long long* __restrict__ fieldPolarBuffers, const real4* __restrict__ posq, unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags, const ushort2* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, unsigned int maxTiles, const unsigned int* __restrict__ interactionFlags,
#elif defined USE_GK
unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS,
const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii,
#endif #endif
const float2* __restrict__ dampingAndThole) { const float2* __restrict__ dampingAndThole) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE; unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
...@@ -138,8 +246,7 @@ extern "C" __global__ void computeInducedField( ...@@ -138,8 +246,7 @@ extern "C" __global__ void computeInducedField(
const unsigned int tbx = threadIdx.x - tgx; const unsigned int tbx = threadIdx.x - tgx;
unsigned int x, y; unsigned int x, y;
AtomData data; AtomData data;
data.field = make_real3(0); zeroAtomData(data);
data.fieldPolar = make_real3(0);
if (pos < end) { if (pos < end) {
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -158,7 +265,11 @@ extern "C" __global__ void computeInducedField( ...@@ -158,7 +265,11 @@ extern "C" __global__ void computeInducedField(
} }
} }
unsigned int atom1 = x*TILE_SIZE + tgx; unsigned int atom1 = x*TILE_SIZE + tgx;
#ifdef USE_GK
loadAtomData(data, atom1, posq, inducedDipole, inducedDipolePolar, dampingAndThole, inducedDipoleS, inducedDipolePolarS, bornRadii);
#else
loadAtomData(data, atom1, posq, inducedDipole, inducedDipolePolar, dampingAndThole); loadAtomData(data, atom1, posq, inducedDipole, inducedDipolePolar, dampingAndThole);
#endif
if (pos >= end) if (pos >= end)
; // This warp is done. ; // This warp is done.
else if (x == y) { else if (x == y) {
...@@ -170,28 +281,26 @@ extern "C" __global__ void computeInducedField( ...@@ -170,28 +281,26 @@ extern "C" __global__ void computeInducedField(
localData[threadIdx.x].thole = data.thole; localData[threadIdx.x].thole = data.thole;
localData[threadIdx.x].damp = data.damp; localData[threadIdx.x].damp = data.damp;
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+j; real3 delta = localData[tbx+j].pos-data.pos;
real3 delta = localData[atom2].pos-data.pos;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z; delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real3 fields[4]; int atom2 = y*TILE_SIZE+j;
computeOneInteraction(data, localData[atom2], delta, fields); if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS)
atom2 = y*TILE_SIZE+j; computeOneInteraction(data, localData[tbx+j], delta, atom1 == atom2);
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
data.field += fields[0];
data.fieldPolar += fields[1];
}
} }
} }
else { else {
// This is an off-diagonal tile. // This is an off-diagonal tile.
#ifdef USE_GK
loadAtomData(localData[threadIdx.x], y*TILE_SIZE+tgx, posq, inducedDipole, inducedDipolePolar, dampingAndThole, inducedDipoleS, inducedDipolePolarS, bornRadii);
#else
loadAtomData(localData[threadIdx.x], y*TILE_SIZE+tgx, posq, inducedDipole, inducedDipolePolar, dampingAndThole); loadAtomData(localData[threadIdx.x], y*TILE_SIZE+tgx, posq, inducedDipole, inducedDipolePolar, dampingAndThole);
localData[threadIdx.x].field = make_real3(0); #endif
localData[threadIdx.x].fieldPolar = make_real3(0); zeroAtomData(localData[threadIdx.x]);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF); unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (flags == 0) { // TODO: Figure out what the flags != 0 case doesn't work!!! if (flags == 0) { // TODO: Figure out what the flags != 0 case doesn't work!!!
...@@ -270,21 +379,15 @@ extern "C" __global__ void computeInducedField( ...@@ -270,21 +379,15 @@ extern "C" __global__ void computeInducedField(
unsigned int tj = tgx; unsigned int tj = tgx;
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = tbx+tj; real3 delta = localData[tbx+tj].pos-data.pos;
real3 delta = localData[atom2].pos-data.pos;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z; delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
real3 fields[4]; int atom2 = y*TILE_SIZE+j;
computeOneInteraction(data, localData[atom2], delta, fields); if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS)
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) { computeOneInteraction(data, localData[tbx+tj], delta, false);
data.field += fields[0];
data.fieldPolar += fields[1];
localData[atom2].field += fields[2];
localData[atom2].fieldPolar += fields[3];
}
tj = (tj + 1) & (TILE_SIZE - 1); tj = (tj + 1) & (TILE_SIZE - 1);
} }
} }
...@@ -295,27 +398,44 @@ extern "C" __global__ void computeInducedField( ...@@ -295,27 +398,44 @@ extern "C" __global__ void computeInducedField(
if (pos < end) { if (pos < end) {
const unsigned int offset = x*TILE_SIZE + tgx; const unsigned int offset = x*TILE_SIZE + tgx;
atomicAdd(&fieldBuffers[offset], static_cast<unsigned long long>((long long) (data.field.x*0xFFFFFFFF))); atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (data.field.x*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0xFFFFFFFF))); atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.y*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0xFFFFFFFF))); atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.field.z*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0xFFFFFFFF))); atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (data.fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0xFFFFFFFF))); atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0xFFFFFFFF))); atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolar.z*0xFFFFFFFF)));
#ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (data.fieldS.x*0xFFFFFFFF)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.y*0xFFFFFFFF)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldS.z*0xFFFFFFFF)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (data.fieldPolarS.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.fieldPolarS.z*0xFFFFFFFF)));
#endif
} }
if (pos < end && x != y) { if (pos < end && x != y) {
const unsigned int offset = y*TILE_SIZE + tgx; const unsigned int offset = y*TILE_SIZE + tgx;
atomicAdd(&fieldBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0xFFFFFFFF))); atomicAdd(&field[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.x*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0xFFFFFFFF))); atomicAdd(&field[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.y*0xFFFFFFFF)));
atomicAdd(&fieldBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0xFFFFFFFF))); atomicAdd(&field[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].field.z*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0xFFFFFFFF))); atomicAdd(&fieldPolar[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0xFFFFFFFF))); atomicAdd(&fieldPolar[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0xFFFFFFFF))); atomicAdd(&fieldPolar[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolar.z*0xFFFFFFFF)));
#ifdef USE_GK
atomicAdd(&fieldS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.x*0xFFFFFFFF)));
atomicAdd(&fieldS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.y*0xFFFFFFFF)));
atomicAdd(&fieldS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldS.z*0xFFFFFFFF)));
atomicAdd(&fieldPolarS[offset], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.x*0xFFFFFFFF)));
atomicAdd(&fieldPolarS[offset+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.y*0xFFFFFFFF)));
atomicAdd(&fieldPolarS[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].fieldPolarS.z*0xFFFFFFFF)));
#endif
} }
pos++; pos++;
} while (pos < end); } while (pos < end);
} }
extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__ fixedField, const long long* __restrict__ fixedFieldPolar, extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__ fixedField, const long long* __restrict__ fixedFieldPolar,
const long long* __restrict__ fixedFieldS, const long long* __restrict__ fixedFieldPolarS,
const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar, real* __restrict__ inducedDipole, const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar, real* __restrict__ inducedDipole,
real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors) { real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors) {
extern __shared__ real2 buffer[]; extern __shared__ real2 buffer[];
...@@ -335,8 +455,10 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__ ...@@ -335,8 +455,10 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
int fieldIndex = atom+component*PADDED_NUM_ATOMS; int fieldIndex = atom+component*PADDED_NUM_ATOMS;
real previousDipole = inducedDipole[dipoleIndex]; real previousDipole = inducedDipole[dipoleIndex];
real previousDipolePolar = inducedDipolePolar[dipoleIndex]; real previousDipolePolar = inducedDipolePolar[dipoleIndex];
real newDipole = scale*((fixedField[fieldIndex]+inducedField[fieldIndex])*fieldScale+ewaldScale*previousDipole); long long fixed = fixedField[fieldIndex] + (fixedFieldS == NULL ? (long long) 0 : fixedFieldS[fieldIndex]);
real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+inducedFieldPolar[fieldIndex])*fieldScale+ewaldScale*previousDipolePolar); long long fixedPolar = fixedFieldPolar[fieldIndex] + (fixedFieldPolarS == NULL ? (long long) 0 : fixedFieldPolarS[fieldIndex]);
real newDipole = scale*((fixed+inducedField[fieldIndex])*fieldScale+ewaldScale*previousDipole);
real newDipolePolar = scale*((fixedPolar+inducedFieldPolar[fieldIndex])*fieldScale+ewaldScale*previousDipolePolar);
newDipole = previousDipole + polarSOR*(newDipole-previousDipole); newDipole = previousDipole + polarSOR*(newDipole-previousDipole);
newDipolePolar = previousDipolePolar + polarSOR*(newDipolePolar-previousDipolePolar); newDipolePolar = previousDipolePolar + polarSOR*(newDipolePolar-previousDipolePolar);
inducedDipole[dipoleIndex] = newDipole; inducedDipole[dipoleIndex] = newDipole;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment