Unverified Commit 07b7d2d3 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Cleanup to AmoebaMultipoleForce (#3068)

* Cleanup to CUDA AmoebaMultipoleForce

* Deleted obsolete SOR code
parent 8c5479c5
......@@ -223,8 +223,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
double4* posqd = (double4*) &temp[0];
vector<float2> dampingAndTholeVec;
vector<float> polarizabilityVec;
vector<float> molecularDipolesVec;
vector<float> molecularQuadrupolesVec;
vector<float> localDipolesVec;
vector<float> localQuadrupolesVec;
vector<int4> multipoleParticlesVec;
for (int i = 0; i < numMultipoles; i++) {
double charge, thole, damping, polarity;
......@@ -239,15 +239,15 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
polarizabilityVec.push_back((float) polarity);
multipoleParticlesVec.push_back(make_int4(atomX, atomY, atomZ, axisType));
for (int j = 0; j < 3; j++)
molecularDipolesVec.push_back((float) dipole[j]);
molecularQuadrupolesVec.push_back((float) quadrupole[0]);
molecularQuadrupolesVec.push_back((float) quadrupole[1]);
molecularQuadrupolesVec.push_back((float) quadrupole[2]);
molecularQuadrupolesVec.push_back((float) quadrupole[4]);
molecularQuadrupolesVec.push_back((float) quadrupole[5]);
localDipolesVec.push_back((float) dipole[j]);
localQuadrupolesVec.push_back((float) quadrupole[0]);
localQuadrupolesVec.push_back((float) quadrupole[1]);
localQuadrupolesVec.push_back((float) quadrupole[2]);
localQuadrupolesVec.push_back((float) quadrupole[4]);
localQuadrupolesVec.push_back((float) quadrupole[5]);
}
hasQuadrupoles = false;
for (auto q : molecularQuadrupolesVec)
for (auto q : localQuadrupolesVec)
if (q != 0.0)
hasQuadrupoles = true;
int paddedNumAtoms = cu.getPaddedNumAtoms();
......@@ -256,38 +256,38 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
polarizabilityVec.push_back(0);
multipoleParticlesVec.push_back(make_int4(0, 0, 0, 0));
for (int j = 0; j < 3; j++)
molecularDipolesVec.push_back(0);
localDipolesVec.push_back(0);
for (int j = 0; j < 5; j++)
molecularQuadrupolesVec.push_back(0);
localQuadrupolesVec.push_back(0);
}
dampingAndThole.initialize<float2>(cu, paddedNumAtoms, "dampingAndThole");
polarizability.initialize<float>(cu, paddedNumAtoms, "polarizability");
multipoleParticles.initialize<int4>(cu, paddedNumAtoms, "multipoleParticles");
molecularDipoles.initialize<float>(cu, 3*paddedNumAtoms, "molecularDipoles");
molecularQuadrupoles.initialize<float>(cu, 5*paddedNumAtoms, "molecularQuadrupoles");
localDipoles.initialize<float>(cu, 3*paddedNumAtoms, "localDipoles");
localQuadrupoles.initialize<float>(cu, 5*paddedNumAtoms, "localQuadrupoles");
lastPositions.initialize(cu, cu.getPosq().getSize(), cu.getPosq().getElementSize(), "lastPositions");
dampingAndThole.upload(dampingAndTholeVec);
polarizability.upload(polarizabilityVec);
multipoleParticles.upload(multipoleParticlesVec);
molecularDipoles.upload(molecularDipolesVec);
molecularQuadrupoles.upload(molecularQuadrupolesVec);
localDipoles.upload(localDipolesVec);
localQuadrupoles.upload(localQuadrupolesVec);
posq.upload(&temp[0]);
// Create workspace arrays.
polarizationType = force.getPolarizationType();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
labFrameDipoles.initialize(cu, 3*paddedNumAtoms, elementSize, "labFrameDipoles");
labFrameQuadrupoles.initialize(cu, 5*paddedNumAtoms, elementSize, "labFrameQuadrupoles");
labDipoles.initialize(cu, paddedNumAtoms, 3*elementSize, "labDipoles");
labQuadrupoles.initialize(cu, 5*paddedNumAtoms, elementSize, "labQuadrupoles");
sphericalDipoles.initialize(cu, 3*paddedNumAtoms, elementSize, "sphericalDipoles");
sphericalQuadrupoles.initialize(cu, 5*paddedNumAtoms, elementSize, "sphericalQuadrupoles");
fracDipoles.initialize(cu, 3*paddedNumAtoms, elementSize, "fracDipoles");
fracDipoles.initialize(cu, paddedNumAtoms, 3*elementSize, "fracDipoles");
fracQuadrupoles.initialize(cu, 6*paddedNumAtoms, elementSize, "fracQuadrupoles");
field.initialize(cu, 3*paddedNumAtoms, sizeof(long long), "field");
fieldPolar.initialize(cu, 3*paddedNumAtoms, sizeof(long long), "fieldPolar");
torque.initialize(cu, 3*paddedNumAtoms, sizeof(long long), "torque");
inducedDipole.initialize(cu, 3*paddedNumAtoms, elementSize, "inducedDipole");
inducedDipolePolar.initialize(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar");
inducedDipole.initialize(cu, paddedNumAtoms, 3*elementSize, "inducedDipole");
inducedDipolePolar.initialize(cu, paddedNumAtoms, 3*elementSize, "inducedDipolePolar");
if (polarizationType == AmoebaMultipoleForce::Mutual) {
inducedDipoleErrors.initialize(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors");
prevDipoles.initialize(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipoles");
......@@ -423,12 +423,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["EXTRAPOLATION_COEFFICIENTS_SUM"] = coefficients.str();
if (usePME) {
int nx, ny, nz;
force.getPMEParameters(alpha, nx, ny, nz);
if (nx == 0 || alpha == 0.0) {
force.getPMEParameters(pmeAlpha, nx, ny, nz);
if (nx == 0 || pmeAlpha == 0) {
NonbondedForce nb;
nb.setEwaldErrorTolerance(force.getEwaldErrorTolerance());
nb.setCutoffDistance(force.getCutoffDistance());
NonbondedForceImpl::calcPMEParameters(system, nb, alpha, gridSizeX, gridSizeY, gridSizeZ, false);
NonbondedForceImpl::calcPMEParameters(system, nb, pmeAlpha, gridSizeX, gridSizeY, gridSizeZ, false);
gridSizeX = CudaFFT3D::findLegalDimension(gridSizeX);
gridSizeY = CudaFFT3D::findLegalDimension(gridSizeY);
gridSizeZ = CudaFFT3D::findLegalDimension(gridSizeZ);
......@@ -437,7 +437,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
gridSizeY = CudaFFT3D::findLegalDimension(ny);
gridSizeZ = CudaFFT3D::findLegalDimension(nz);
}
defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
defines["EWALD_ALPHA"] = cu.doubleToString(pmeAlpha);
defines["SQRT_PI"] = cu.doubleToString(sqrt(M_PI));
defines["USE_EWALD"] = "";
defines["USE_CUTOFF"] = "";
......@@ -512,7 +512,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
// Create the PME kernels.
map<string, string> pmeDefines;
pmeDefines["EWALD_ALPHA"] = cu.doubleToString(alpha);
pmeDefines["EWALD_ALPHA"] = cu.doubleToString(pmeAlpha);
pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = cu.intToString(numMultipoles);
pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
......@@ -563,7 +563,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
hasInitializedFFT = true;
// Initialize the b-spline moduli.
// Initialize the B-spline moduli.
double data[PmeOrder];
double x = 0.0;
......@@ -756,21 +756,20 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Compute the lab frame moments.
void* computeMomentsArgs[] = {&cu.getPosq().getDevicePointer(), &multipoleParticles.getDevicePointer(),
&molecularDipoles.getDevicePointer(), &molecularQuadrupoles.getDevicePointer(),
&labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(),
&localDipoles.getDevicePointer(), &localQuadrupoles.getDevicePointer(),
&labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(),
&sphericalDipoles.getDevicePointer(), &sphericalQuadrupoles.getDevicePointer()};
cu.executeKernel(computeMomentsKernel, computeMomentsArgs, cu.getNumAtoms());
int startTileIndex = nb.getStartTileIndex();
int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
if (!pmeGrid.isInitialized()) {
// Compute induced dipoles.
if (gkKernel == NULL) {
void* computeFixedFieldArgs[] = {&field.getDevicePointer(), &fieldPolar.getDevicePointer(), &cu.getPosq().getDevicePointer(),
&covalentFlags.getDevicePointer(), &polarizationGroupFlags.getDevicePointer(), &nb.getExclusionTiles().getDevicePointer(), &startTileIndex, &numTileIndices,
&labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(), &dampingAndThole.getDevicePointer()};
&labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(), &dampingAndThole.getDevicePointer()};
cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*fixedFieldThreads, fixedFieldThreads);
void* recordInducedDipolesArgs[] = {&field.getDevicePointer(), &fieldPolar.getDevicePointer(),
&inducedDipole.getDevicePointer(), &inducedDipolePolar.getDevicePointer(), &polarizability.getDevicePointer()};
......@@ -781,7 +780,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* computeFixedFieldArgs[] = {&field.getDevicePointer(), &fieldPolar.getDevicePointer(), &cu.getPosq().getDevicePointer(),
&covalentFlags.getDevicePointer(), &polarizationGroupFlags.getDevicePointer(), &nb.getExclusionTiles().getDevicePointer(), &startTileIndex, &numTileIndices,
&gkKernel->getBornRadii().getDevicePointer(), &gkKernel->getField().getDevicePointer(),
&labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(), &dampingAndThole.getDevicePointer()};
&labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(), &dampingAndThole.getDevicePointer()};
cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*fixedFieldThreads, fixedFieldThreads);
void* recordInducedDipolesArgs[] = {&field.getDevicePointer(), &fieldPolar.getDevicePointer(),
&gkKernel->getField().getDevicePointer(), &gkKernel->getInducedDipoles().getDevicePointer(),
......@@ -810,7 +809,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&inducedDipole.getDevicePointer(), &inducedDipolePolar.getDevicePointer(), &dampingAndThole.getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*electrostaticsThreads, electrostaticsThreads);
if (gkKernel != NULL)
gkKernel->finishComputation(torque, labFrameDipoles, labFrameQuadrupoles, inducedDipole, inducedDipolePolar, dampingAndThole, covalentFlags, polarizationGroupFlags);
gkKernel->finishComputation(torque, labDipoles, labQuadrupoles, inducedDipole, inducedDipolePolar, dampingAndThole, covalentFlags, polarizationGroupFlags);
}
else {
// Compute reciprocal box vectors.
......@@ -842,7 +841,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Reciprocal space calculation.
unsigned int maxTiles = nb.getInteractingTiles().getSize();
void* pmeTransformMultipolesArgs[] = {&labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(),
void* pmeTransformMultipolesArgs[] = {&labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(),
&fracDipoles.getDevicePointer(), &fracQuadrupoles.getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeTransformMultipolesKernel, pmeTransformMultipolesArgs, cu.getNumAtoms());
void* pmeSpreadFixedMultipolesArgs[] = {&cu.getPosq().getDevicePointer(), &fracDipoles.getDevicePointer(), &fracQuadrupoles.getDevicePointer(),
......@@ -864,14 +863,14 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
else
cufftExecC2C(fft, (float2*) pmeGrid.getDevicePointer(), (float2*) pmeGrid.getDevicePointer(), CUFFT_INVERSE);
void* pmeFixedPotentialArgs[] = {&pmeGrid.getDevicePointer(), &pmePhi.getDevicePointer(), &field.getDevicePointer(),
&fieldPolar .getDevicePointer(), &cu.getPosq().getDevicePointer(), &labFrameDipoles.getDevicePointer(),
&fieldPolar .getDevicePointer(), &cu.getPosq().getDevicePointer(), &labDipoles.getDevicePointer(),
cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeFixedPotentialKernel, pmeFixedPotentialArgs, cu.getNumAtoms());
void* pmeTransformFixedPotentialArgs[] = {&pmePhi.getDevicePointer(), &pmeCphi.getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeTransformPotentialKernel, pmeTransformFixedPotentialArgs, cu.getNumAtoms());
void* pmeFixedForceArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &torque.getDevicePointer(),
&cu.getEnergyBuffer().getDevicePointer(), &labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(),
&cu.getEnergyBuffer().getDevicePointer(), &labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(),
&fracDipoles.getDevicePointer(), &fracQuadrupoles.getDevicePointer(), &pmePhi.getDevicePointer(), &pmeCphi.getDevicePointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeFixedForceKernel, pmeFixedForceArgs, cu.getNumAtoms());
......@@ -883,7 +882,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
&labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(), &dampingAndThole.getDevicePointer()};
&labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(), &dampingAndThole.getDevicePointer()};
cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*fixedFieldThreads, fixedFieldThreads);
void* recordInducedDipolesArgs[] = {&field.getDevicePointer(), &fieldPolar.getDevicePointer(),
&inducedDipole.getDevicePointer(), &inducedDipolePolar.getDevicePointer(), &polarizability.getDevicePointer()};
......@@ -937,7 +936,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* pmeTransformInducedPotentialArgs[] = {&pmePhidp.getDevicePointer(), &pmeCphi.getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeTransformPotentialKernel, pmeTransformInducedPotentialArgs, cu.getNumAtoms());
void* pmeInducedForceArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &torque.getDevicePointer(),
&cu.getEnergyBuffer().getDevicePointer(), &labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(),
&cu.getEnergyBuffer().getDevicePointer(), &labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(),
&fracDipoles.getDevicePointer(), &fracQuadrupoles.getDevicePointer(),
&inducedDipole.getDevicePointer(), &inducedDipolePolar.getDevicePointer(), &pmePhi.getDevicePointer(), &pmePhid.getDevicePointer(),
&pmePhip.getDevicePointer(), &pmePhidp.getDevicePointer(), &pmeCphi.getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
......@@ -1220,23 +1219,22 @@ void CudaCalcAmoebaMultipoleForceKernel::ensureMultipolesValid(ContextImpl& cont
context.calcForcesAndEnergy(false, false, context.getIntegrator().getIntegrationForceGroups());
}
void CudaCalcAmoebaMultipoleForceKernel::getLabFramePermanentDipoles(ContextImpl& context, vector<Vec3>& dipoles) {
ensureMultipolesValid(context);
int numParticles = cu.getNumAtoms();
dipoles.resize(numParticles);
const vector<int>& order = cu.getAtomIndex();
if (cu.getUseDoublePrecision()) {
vector<double> labDipoleVec;
labFrameDipoles.download(labDipoleVec);
vector<double3> labDipoleVec;
labDipoles.download(labDipoleVec);
for (int i = 0; i < numParticles; i++)
dipoles[order[i]] = Vec3(labDipoleVec[3*i], labDipoleVec[3*i+1], labDipoleVec[3*i+2]);
dipoles[order[i]] = Vec3(labDipoleVec[i].x, labDipoleVec[i].y, labDipoleVec[i].z);
}
else {
vector<float> labDipoleVec;
labFrameDipoles.download(labDipoleVec);
vector<float3> labDipoleVec;
labDipoles.download(labDipoleVec);
for (int i = 0; i < numParticles; i++)
dipoles[order[i]] = Vec3(labDipoleVec[3*i], labDipoleVec[3*i+1], labDipoleVec[3*i+2]);
dipoles[order[i]] = Vec3(labDipoleVec[i].x, labDipoleVec[i].y, labDipoleVec[i].z);
}
}
......@@ -1247,16 +1245,16 @@ void CudaCalcAmoebaMultipoleForceKernel::getInducedDipoles(ContextImpl& context,
dipoles.resize(numParticles);
const vector<int>& order = cu.getAtomIndex();
if (cu.getUseDoublePrecision()) {
vector<double> d;
vector<double3> d;
inducedDipole.download(d);
for (int i = 0; i < numParticles; i++)
dipoles[order[i]] = Vec3(d[3*i], d[3*i+1], d[3*i+2]);
dipoles[order[i]] = Vec3(d[i].x, d[i].y, d[i].z);
}
else {
vector<float> d;
vector<float3> d;
inducedDipole.download(d);
for (int i = 0; i < numParticles; i++)
dipoles[order[i]] = Vec3(d[3*i], d[3*i+1], d[3*i+2]);
dipoles[order[i]] = Vec3(d[i].x, d[i].y, d[i].z);
}
}
......@@ -1268,35 +1266,35 @@ void CudaCalcAmoebaMultipoleForceKernel::getTotalDipoles(ContextImpl& context, v
const vector<int>& order = cu.getAtomIndex();
if (cu.getUseDoublePrecision()) {
vector<double4> posqVec;
vector<double> labDipoleVec;
vector<double> inducedDipoleVec;
vector<double3> labDipoleVec;
vector<double3> inducedDipoleVec;
double totalDipoleVecX;
double totalDipoleVecY;
double totalDipoleVecZ;
inducedDipole.download(inducedDipoleVec);
labFrameDipoles.download(labDipoleVec);
labDipoles.download(labDipoleVec);
cu.getPosq().download(posqVec);
for (int i = 0; i < numParticles; i++) {
totalDipoleVecX = labDipoleVec[3*i] + inducedDipoleVec[3*i];
totalDipoleVecY = labDipoleVec[3*i+1] + inducedDipoleVec[3*i+1];
totalDipoleVecZ = labDipoleVec[3*i+2] + inducedDipoleVec[3*i+2];
totalDipoleVecX = labDipoleVec[i].x + inducedDipoleVec[i].x;
totalDipoleVecY = labDipoleVec[i].y + inducedDipoleVec[i].y;
totalDipoleVecZ = labDipoleVec[i].z + inducedDipoleVec[i].z;
dipoles[order[i]] = Vec3(totalDipoleVecX, totalDipoleVecY, totalDipoleVecZ);
}
}
else {
vector<float4> posqVec;
vector<float> labDipoleVec;
vector<float> inducedDipoleVec;
vector<float3> labDipoleVec;
vector<float3> inducedDipoleVec;
float totalDipoleVecX;
float totalDipoleVecY;
float totalDipoleVecZ;
inducedDipole.download(inducedDipoleVec);
labFrameDipoles.download(labDipoleVec);
labDipoles.download(labDipoleVec);
cu.getPosq().download(posqVec);
for (int i = 0; i < numParticles; i++) {
totalDipoleVecX = labDipoleVec[3*i] + inducedDipoleVec[3*i];
totalDipoleVecY = labDipoleVec[3*i+1] + inducedDipoleVec[3*i+1];
totalDipoleVecZ = labDipoleVec[3*i+2] + inducedDipoleVec[3*i+2];
totalDipoleVecX = labDipoleVec[i].x + inducedDipoleVec[i].x;
totalDipoleVecY = labDipoleVec[i].y + inducedDipoleVec[i].y;
totalDipoleVecZ = labDipoleVec[i].z + inducedDipoleVec[i].z;
dipoles[order[i]] = Vec3(totalDipoleVecX, totalDipoleVecY, totalDipoleVecZ);
}
}
......@@ -1326,8 +1324,8 @@ void CudaCalcAmoebaMultipoleForceKernel::getElectrostaticPotential(ContextImpl&
// Compute the potential.
void* computePotentialArgs[] = {&cu.getPosq().getDevicePointer(), &labFrameDipoles.getDevicePointer(),
&labFrameQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(), &points.getDevicePointer(),
void* computePotentialArgs[] = {&cu.getPosq().getDevicePointer(), &labDipoles.getDevicePointer(),
&labQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(), &points.getDevicePointer(),
&potential.getDevicePointer(), &numPoints, cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer(),
cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer()};
int blockSize = 128;
......@@ -1343,7 +1341,7 @@ void CudaCalcAmoebaMultipoleForceKernel::getElectrostaticPotential(ContextImpl&
}
}
template <class T, class T4, class M4>
template <class T, class T3, class T4, class M4>
void CudaCalcAmoebaMultipoleForceKernel::computeSystemMultipoleMoments(ContextImpl& context, vector<double>& outputMultipoleMoments) {
// Compute the local coordinates relative to the center of mass.
int numAtoms = cu.getNumAtoms();
......@@ -1388,15 +1386,16 @@ void CudaCalcAmoebaMultipoleForceKernel::computeSystemMultipoleMoments(ContextIm
double zxqdp = 0.0;
double zyqdp = 0.0;
double zzqdp = 0.0;
vector<T> labDipoleVec, inducedDipoleVec, quadrupoleVec;
labFrameDipoles.download(labDipoleVec);
vector<T3> labDipoleVec, inducedDipoleVec;
vector<T> quadrupoleVec;
labDipoles.download(labDipoleVec);
inducedDipole.download(inducedDipoleVec);
labFrameQuadrupoles.download(quadrupoleVec);
labQuadrupoles.download(quadrupoleVec);
for (int i = 0; i < numAtoms; i++) {
totalCharge += posqLocal[i].w;
double netDipoleX = (labDipoleVec[3*i] + inducedDipoleVec[3*i]);
double netDipoleY = (labDipoleVec[3*i+1] + inducedDipoleVec[3*i+1]);
double netDipoleZ = (labDipoleVec[3*i+2] + inducedDipoleVec[3*i+2]);
double netDipoleX = (labDipoleVec[i].x + inducedDipoleVec[i].x);
double netDipoleY = (labDipoleVec[i].y + inducedDipoleVec[i].y);
double netDipoleZ = (labDipoleVec[i].z + inducedDipoleVec[i].z);
xdpl += posqLocal[i].x*posqLocal[i].w + netDipoleX;
ydpl += posqLocal[i].y*posqLocal[i].w + netDipoleY;
zdpl += posqLocal[i].z*posqLocal[i].w + netDipoleZ;
......@@ -1459,11 +1458,11 @@ void CudaCalcAmoebaMultipoleForceKernel::computeSystemMultipoleMoments(ContextIm
void CudaCalcAmoebaMultipoleForceKernel::getSystemMultipoleMoments(ContextImpl& context, vector<double>& outputMultipoleMoments) {
ensureMultipolesValid(context);
if (cu.getUseDoublePrecision())
computeSystemMultipoleMoments<double, double4, double4>(context, outputMultipoleMoments);
computeSystemMultipoleMoments<double, double3, double4, double4>(context, outputMultipoleMoments);
else if (cu.getUseMixedPrecision())
computeSystemMultipoleMoments<float, float4, double4>(context, outputMultipoleMoments);
computeSystemMultipoleMoments<float, float3, float4, double4>(context, outputMultipoleMoments);
else
computeSystemMultipoleMoments<float, float4, float4>(context, outputMultipoleMoments);
computeSystemMultipoleMoments<float, float3, float4, float4>(context, outputMultipoleMoments);
}
void CudaCalcAmoebaMultipoleForceKernel::copyParametersToContext(ContextImpl& context, const AmoebaMultipoleForce& force) {
......@@ -1480,8 +1479,8 @@ void CudaCalcAmoebaMultipoleForceKernel::copyParametersToContext(ContextImpl& co
double4* posqd = (double4*) cu.getPinnedBuffer();
vector<float2> dampingAndTholeVec;
vector<float> polarizabilityVec;
vector<float> molecularDipolesVec;
vector<float> molecularQuadrupolesVec;
vector<float> localDipolesVec;
vector<float> localQuadrupolesVec;
vector<int4> multipoleParticlesVec;
for (int i = 0; i < force.getNumMultipoles(); i++) {
double charge, thole, damping, polarity;
......@@ -1496,15 +1495,15 @@ void CudaCalcAmoebaMultipoleForceKernel::copyParametersToContext(ContextImpl& co
polarizabilityVec.push_back((float) polarity);
multipoleParticlesVec.push_back(make_int4(atomX, atomY, atomZ, axisType));
for (int j = 0; j < 3; j++)
molecularDipolesVec.push_back((float) dipole[j]);
molecularQuadrupolesVec.push_back((float) quadrupole[0]);
molecularQuadrupolesVec.push_back((float) quadrupole[1]);
molecularQuadrupolesVec.push_back((float) quadrupole[2]);
molecularQuadrupolesVec.push_back((float) quadrupole[4]);
molecularQuadrupolesVec.push_back((float) quadrupole[5]);
localDipolesVec.push_back((float) dipole[j]);
localQuadrupolesVec.push_back((float) quadrupole[0]);
localQuadrupolesVec.push_back((float) quadrupole[1]);
localQuadrupolesVec.push_back((float) quadrupole[2]);
localQuadrupolesVec.push_back((float) quadrupole[4]);
localQuadrupolesVec.push_back((float) quadrupole[5]);
}
if (!hasQuadrupoles) {
for (auto q : molecularQuadrupolesVec)
for (auto q : localQuadrupolesVec)
if (q != 0.0)
throw OpenMMException("updateParametersInContext: Cannot set a non-zero quadrupole moment, because quadrupoles were excluded from the kernel");
}
......@@ -1513,15 +1512,15 @@ void CudaCalcAmoebaMultipoleForceKernel::copyParametersToContext(ContextImpl& co
polarizabilityVec.push_back(0);
multipoleParticlesVec.push_back(make_int4(0, 0, 0, 0));
for (int j = 0; j < 3; j++)
molecularDipolesVec.push_back(0);
localDipolesVec.push_back(0);
for (int j = 0; j < 5; j++)
molecularQuadrupolesVec.push_back(0);
localQuadrupolesVec.push_back(0);
}
dampingAndThole.upload(dampingAndTholeVec);
polarizability.upload(polarizabilityVec);
multipoleParticles.upload(multipoleParticlesVec);
molecularDipoles.upload(molecularDipolesVec);
molecularQuadrupoles.upload(molecularQuadrupolesVec);
localDipoles.upload(localDipolesVec);
localQuadrupoles.upload(localQuadrupolesVec);
cu.getPosq().upload(cu.getPinnedBuffer());
cu.invalidateMolecules();
multipolesAreValid = false;
......@@ -1530,7 +1529,7 @@ void CudaCalcAmoebaMultipoleForceKernel::copyParametersToContext(ContextImpl& co
void CudaCalcAmoebaMultipoleForceKernel::getPMEParameters(double& alpha, int& nx, int& ny, int& nz) const {
if (!usePME)
throw OpenMMException("getPMEParametersInContext: This Context is not using PME");
alpha = this->alpha;
alpha = pmeAlpha;
nx = gridSizeX;
ny = gridSizeY;
nz = gridSizeZ;
......@@ -1712,7 +1711,7 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::computeBornRadii() {
cu.executeKernel(reduceBornSumKernel, reduceBornSumArgs, cu.getNumAtoms());
}
void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles,
void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray& torque, CudaArray& labDipoles, CudaArray& labQuadrupoles,
CudaArray& inducedDipole, CudaArray& inducedDipolePolar, CudaArray& dampingAndThole, CudaArray& covalentFlags, CudaArray& polarizationGroupFlags) {
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
int startTileIndex = nb.getStartTileIndex();
......@@ -1722,8 +1721,8 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray&
// Compute the GK force.
void* gkForceArgs[] = {&cu.getForce().getDevicePointer(), &torque.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&cu.getPosq().getDevicePointer(), &startTileIndex, &numTileIndices, &labFrameDipoles.getDevicePointer(),
&labFrameQuadrupoles.getDevicePointer(), &inducedDipoleS.getDevicePointer(), &inducedDipolePolarS.getDevicePointer(),
&cu.getPosq().getDevicePointer(), &startTileIndex, &numTileIndices, &labDipoles.getDevicePointer(),
&labQuadrupoles.getDevicePointer(), &inducedDipoleS.getDevicePointer(), &inducedDipolePolarS.getDevicePointer(),
&bornRadii.getDevicePointer(), &bornForce.getDevicePointer()};
cu.executeKernel(gkForceKernel, gkForceArgs, numForceThreadBlocks*gkForceThreads, gkForceThreads);
......@@ -1742,7 +1741,7 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray&
void* ediffArgs[] = {&cu.getForce().getDevicePointer(), &torque.getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(),
&cu.getPosq().getDevicePointer(), &covalentFlags.getDevicePointer(), &polarizationGroupFlags.getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &startTileIndex, &numTileIndices,
&labFrameDipoles.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(),
&labDipoles.getDevicePointer(), &labQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(),
&inducedDipolePolar.getDevicePointer(), &inducedDipoleS.getDevicePointer(), &inducedDipolePolarS.getDevicePointer(),
&dampingAndThole.getDevicePointer()};
cu.executeKernel(ediffKernel, ediffArgs, numForceThreadBlocks*ediffThreads, ediffThreads);
......
......@@ -161,11 +161,11 @@ private:
bool iterateDipolesByDIIS(int iteration);
void computeExtrapolatedDipoles(void** recipBoxVectorPointer);
void ensureMultipolesValid(ContextImpl& context);
template <class T, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments);
template <class T, class T3, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments);
int numMultipoles, maxInducedIterations, maxExtrapolationOrder;
int fixedFieldThreads, inducedFieldThreads, electrostaticsThreads;
int gridSizeX, gridSizeY, gridSizeZ;
double alpha, inducedEpsilon;
double pmeAlpha, inducedEpsilon;
bool usePME, hasQuadrupoles, hasInitializedScaleFactors, hasInitializedFFT, multipolesAreValid, hasCreatedEvent;
AmoebaMultipoleForce::PolarizationType polarizationType;
CudaContext& cu;
......@@ -173,10 +173,10 @@ private:
std::vector<int3> covalentFlagValues;
std::vector<int2> polarizationFlagValues;
CudaArray multipoleParticles;
CudaArray molecularDipoles;
CudaArray molecularQuadrupoles;
CudaArray labFrameDipoles;
CudaArray labFrameQuadrupoles;
CudaArray localDipoles;
CudaArray localQuadrupoles;
CudaArray labDipoles;
CudaArray labQuadrupoles;
CudaArray sphericalDipoles;
CudaArray sphericalQuadrupoles;
CudaArray fracDipoles;
......
......@@ -189,7 +189,7 @@ __device__ void computeOneInteractionT2(AtomData2& atom1, volatile AtomData2& at
__device__ void computeOneInteractionB1B2(AtomData2& atom1, volatile AtomData2& atom2);
inline __device__ void loadAtomData2(AtomData2& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const real* __restrict__ bornRadius) {
const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar, const real* __restrict__ bornRadius) {
real4 atomPosq = posq[atom];
data.pos = trimTo3(atomPosq);
data.q = atomPosq.w;
......@@ -202,12 +202,8 @@ inline __device__ void loadAtomData2(AtomData2& data, int atom, const real4* __r
data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY);
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
data.inducedDipole = inducedDipole[atom];
data.inducedDipolePolar = inducedDipolePolar[atom];
data.bornRadius = bornRadius[atom];
}
......@@ -222,7 +218,7 @@ inline __device__ void zeroAtomData(AtomData2& data) {
extern "C" __global__ void computeGKForces(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, unsigned int startTileIndex, unsigned int numTileIndices, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar,
const real* __restrict__ bornRadii, unsigned long long* __restrict__ bornForce) {
unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
......@@ -565,8 +561,8 @@ __device__ void computeOneEDiffInteractionT1(AtomData4& atom1, volatile AtomData
__device__ void computeOneEDiffInteractionT3(AtomData4& atom1, volatile AtomData4& atom2, float dScale, float pScale, real3& outputForce);
inline __device__ void loadAtomData4(AtomData4& data, int atom, const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
const real* __restrict__ inducedDipoleS, const real* __restrict__ inducedDipolePolarS, const float2* __restrict__ dampingAndThole) {
const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar,
const real3* __restrict__ inducedDipoleS, const real3* __restrict__ inducedDipolePolarS, const float2* __restrict__ dampingAndThole) {
real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.q = atomPosq.w;
......@@ -579,18 +575,10 @@ inline __device__ void loadAtomData4(AtomData4& data, int atom, const real4* __r
data.quadrupoleYY = labFrameQuadrupole[atom*5+3];
data.quadrupoleYZ = labFrameQuadrupole[atom*5+4];
data.quadrupoleZZ = -(data.quadrupoleXX+data.quadrupoleYY);
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
data.inducedDipoleS.x = inducedDipoleS[atom*3];
data.inducedDipoleS.y = inducedDipoleS[atom*3+1];
data.inducedDipoleS.z = inducedDipoleS[atom*3+2];
data.inducedDipolePolarS.x = inducedDipolePolarS[atom*3];
data.inducedDipolePolarS.y = inducedDipolePolarS[atom*3+1];
data.inducedDipolePolarS.z = inducedDipolePolarS[atom*3+2];
data.inducedDipole = inducedDipole[atom];
data.inducedDipolePolar = inducedDipolePolar[atom];
data.inducedDipoleS = inducedDipoleS[atom];
data.inducedDipolePolarS = inducedDipolePolarS[atom];
float2 temp = dampingAndThole[atom];
data.damp = temp.x;
data.thole = temp.y;
......@@ -615,8 +603,8 @@ extern "C" __global__ void computeEDiffForce(
unsigned long long* __restrict__ forceBuffers, unsigned long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real4* __restrict__ posq, const uint2* __restrict__ covalentFlags, const unsigned int* __restrict__ polarizationGroupFlags,
const int2* __restrict__ exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices,
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const real* __restrict__ inducedDipoleS, const real* __restrict__ inducedDipolePolarS,
const real* __restrict__ labFrameDipole, const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole,
const real3* __restrict__ inducedDipolePolar, const real3* __restrict__ inducedDipoleS, const real3* __restrict__ inducedDipolePolarS,
const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
......
......@@ -10,7 +10,7 @@ typedef struct {
} AtomData;
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ sphericalDipole,
const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
const real* __restrict__ sphericalQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.q = atomPosq.w;
......@@ -24,12 +24,8 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data.sphericalQuadrupole[3] = sphericalQuadrupole[atom*5+3];
data.sphericalQuadrupole[4] = sphericalQuadrupole[atom*5+4];
#endif
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
data.inducedDipole = inducedDipole[atom];
data.inducedDipolePolar = inducedDipolePolar[atom];
float2 temp = dampingAndThole[atom];
data.damp = temp.x;
data.thole = temp.y;
......@@ -382,8 +378,8 @@ extern "C" __global__ void computeElectrostatics(
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const unsigned int* __restrict__ interactingAtoms,
#endif
const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real3* __restrict__ inducedDipole,
const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
......
......@@ -18,31 +18,23 @@ typedef struct {
} AtomData;
#ifdef USE_GK
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole, const real* __restrict__ inducedDipoleS,
const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii) {
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real3* __restrict__ inducedDipole,
const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole, const real3* __restrict__ inducedDipoleS,
const real3* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii) {
#else
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real3* __restrict__ inducedDipole,
const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
#endif
real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
data.inducedDipole = inducedDipole[atom];
data.inducedDipolePolar = inducedDipolePolar[atom];
float2 temp = dampingAndThole[atom];
data.damp = temp.x;
data.thole = temp.y;
#ifdef USE_GK
data.inducedDipoleS.x = inducedDipoleS[atom*3];
data.inducedDipoleS.y = inducedDipoleS[atom*3+1];
data.inducedDipoleS.z = inducedDipoleS[atom*3+2];
data.inducedDipolePolarS.x = inducedDipolePolarS[atom*3];
data.inducedDipolePolarS.y = inducedDipolePolarS[atom*3+1];
data.inducedDipolePolarS.z = inducedDipolePolarS[atom*3+2];
data.inducedDipoleS = inducedDipoleS[atom];
data.inducedDipolePolarS = inducedDipolePolarS[atom];
data.bornRadius = bornRadii[atom];
#endif
}
......@@ -358,7 +350,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
*/
extern "C" __global__ void computeInducedField(
unsigned long long* __restrict__ field, unsigned long long* __restrict__ fieldPolar, const real4* __restrict__ posq, const int2* __restrict__ exclusionTiles,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar, unsigned int startTileIndex, unsigned int numTileIndices,
#ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar,
#endif
......@@ -366,8 +358,8 @@ extern "C" __global__ void computeInducedField(
const int* __restrict__ tiles, const unsigned int* __restrict__ interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter, const unsigned int* __restrict__ interactingAtoms,
#elif defined USE_GK
unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real* __restrict__ inducedDipoleS,
const real* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii,
unsigned long long* __restrict__ fieldS, unsigned long long* __restrict__ fieldPolarS, const real3* __restrict__ inducedDipoleS,
const real3* __restrict__ inducedDipolePolarS, const real* __restrict__ bornRadii,
#ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradientS, unsigned long long* __restrict__ fieldGradientPolarS,
#endif
......@@ -556,53 +548,6 @@ extern "C" __global__ void computeInducedField(
}
}
extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__ fixedField, const long long* __restrict__ fixedFieldPolar,
const long long* __restrict__ fixedFieldS, const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar,
real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors) {
extern __shared__ real2 buffer[];
const float polarSOR = 0.55f;
#ifdef USE_EWALD
const real ewaldScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
#else
const real ewaldScale = 0;
#endif
const real fieldScale = 1/(real) 0x100000000;
real sumErrors = 0;
real sumPolarErrors = 0;
for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
real scale = polarizability[atom];
for (int component = 0; component < 3; component++) {
int dipoleIndex = 3*atom+component;
int fieldIndex = atom+component*PADDED_NUM_ATOMS;
real previousDipole = inducedDipole[dipoleIndex];
real previousDipolePolar = inducedDipolePolar[dipoleIndex];
long long fixedS = (fixedFieldS == NULL ? (long long) 0 : fixedFieldS[fieldIndex]);
real newDipole = scale*((fixedField[fieldIndex]+fixedS+inducedField[fieldIndex])*fieldScale+ewaldScale*previousDipole);
real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+fixedS+inducedFieldPolar[fieldIndex])*fieldScale+ewaldScale*previousDipolePolar);
newDipole = previousDipole + polarSOR*(newDipole-previousDipole);
newDipolePolar = previousDipolePolar + polarSOR*(newDipolePolar-previousDipolePolar);
inducedDipole[dipoleIndex] = newDipole;
inducedDipolePolar[dipoleIndex] = newDipolePolar;
sumErrors += (newDipole-previousDipole)*(newDipole-previousDipole);
sumPolarErrors += (newDipolePolar-previousDipolePolar)*(newDipolePolar-previousDipolePolar);
}
}
// Sum the errors over threads and store the total for this block.
buffer[threadIdx.x] = make_real2(sumErrors, sumPolarErrors);
__syncthreads();
for (int offset = 1; offset < blockDim.x; offset *= 2) {
if (threadIdx.x+offset < blockDim.x && (threadIdx.x&(2*offset-1)) == 0) {
buffer[threadIdx.x].x += buffer[threadIdx.x+offset].x;
buffer[threadIdx.x].y += buffer[threadIdx.x+offset].y;
}
__syncthreads();
}
if (threadIdx.x == 0)
errors[blockIdx.x] = make_float2((float) buffer[0].x, (float) buffer[0].y);
}
extern "C" __global__ void recordInducedDipolesForDIIS(const long long* __restrict__ fixedField, const long long* __restrict__ fixedFieldPolar,
const long long* __restrict__ fixedFieldS, const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar,
const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors,
......
......@@ -72,11 +72,11 @@ __device__ void computeBSplinePoint(real4* thetai, real w, real* array) {
/**
* Convert the fixed multipoles from Cartesian to fractional coordinates.
*/
extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real* __restrict__ labFrameDipole,
extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real* __restrict__ labDipole,
#ifdef HIPPO
const real* __restrict__ labQXX, const real* __restrict__ labQXY, const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ,
#else
const real* __restrict__ labFrameQuadrupole,
const real* __restrict__ labQuadrupole,
#endif
real* __restrict__ fracDipole, real* __restrict__ fracQuadrupole, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
// Build matrices for transforming the dipoles and quadrupoles.
......@@ -113,7 +113,7 @@ extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real
for (int j = 0; j < 3; j++) {
real dipole = 0;
for (int k = 0; k < 3; k++)
dipole += a[j][k]*labFrameDipole[3*i+k];
dipole += a[j][k]*labDipole[3*i+k];
fracDipole[3*i+j] = dipole;
}
for (int j = 0; j < 6; j++) {
......@@ -127,8 +127,8 @@ extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real
#else
real quadrupole = 0;
for (int k = 0; k < 5; k++)
quadrupole += quadScale[k]*b[j][k]*labFrameQuadrupole[5*i+k];
quadrupole -= quadScale[5]*b[j][5]*(labFrameQuadrupole[5*i]+labFrameQuadrupole[5*i+3]);
quadrupole += quadScale[k]*b[j][k]*labQuadrupole[5*i+k];
quadrupole -= quadScale[5]*b[j][5]*(labQuadrupole[5*i]+labQuadrupole[5*i+3]);
#endif
fracQuadrupole[6*i+j] = quadrupole;
}
......@@ -289,11 +289,11 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
}
}
extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ posq, const real* __restrict__ inducedDipole,
extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ posq, const real3* __restrict__ inducedDipole,
#ifdef HIPPO
real* __restrict__ pmeGrid,
#else
const real* __restrict__ inducedDipolePolar, real2* __restrict__ pmeGrid,
const real3* __restrict__ inducedDipolePolar, real2* __restrict__ pmeGrid,
#endif
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
#if __CUDA_ARCH__ < 500
......@@ -325,12 +325,12 @@ extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ po
pos -= periodicBoxVecZ*floor(pos.z*recipBoxVecZ.z+0.5f);
pos -= periodicBoxVecY*floor(pos.y*recipBoxVecY.z+0.5f);
pos -= periodicBoxVecX*floor(pos.x*recipBoxVecX.z+0.5f);
real3 cinducedDipole = ((const real3*) inducedDipole)[m];
real3 cinducedDipole = inducedDipole[m];
real3 finducedDipole = make_real3(cinducedDipole.x*cartToFrac[0][0] + cinducedDipole.y*cartToFrac[0][1] + cinducedDipole.z*cartToFrac[0][2],
cinducedDipole.x*cartToFrac[1][0] + cinducedDipole.y*cartToFrac[1][1] + cinducedDipole.z*cartToFrac[1][2],
cinducedDipole.x*cartToFrac[2][0] + cinducedDipole.y*cartToFrac[2][1] + cinducedDipole.z*cartToFrac[2][2]);
#ifndef HIPPO
real3 cinducedDipolePolar = ((const real3*) inducedDipolePolar)[m];
real3 cinducedDipolePolar = inducedDipolePolar[m];
real3 finducedDipolePolar = make_real3(cinducedDipolePolar.x*cartToFrac[0][0] + cinducedDipolePolar.y*cartToFrac[0][1] + cinducedDipolePolar.z*cartToFrac[0][2],
cinducedDipolePolar.x*cartToFrac[1][0] + cinducedDipolePolar.y*cartToFrac[1][1] + cinducedDipolePolar.z*cartToFrac[1][2],
cinducedDipolePolar.x*cartToFrac[2][0] + cinducedDipolePolar.y*cartToFrac[2][1] + cinducedDipolePolar.z*cartToFrac[2][2]);
......@@ -480,7 +480,7 @@ extern "C" __global__ void computeFixedPotentialFromGrid(
#ifndef HIPPO
long long* __restrict__ fieldPolarBuffers,
#endif
const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
const real4* __restrict__ posq, const real* __restrict__ labDipole, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
#if __CUDA_ARCH__ < 500
real array[PME_ORDER*PME_ORDER];
......@@ -643,9 +643,9 @@ extern "C" __global__ void computeFixedPotentialFromGrid(
phi[m+NUM_ATOMS*18] = tuv012;
phi[m+NUM_ATOMS*19] = tuv111;
real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
long long fieldx = (long long) ((dipoleScale*labFrameDipole[m*3]-tuv100*fracToCart[0][0]-tuv010*fracToCart[0][1]-tuv001*fracToCart[0][2])*0x100000000);
long long fieldy = (long long) ((dipoleScale*labFrameDipole[m*3+1]-tuv100*fracToCart[1][0]-tuv010*fracToCart[1][1]-tuv001*fracToCart[1][2])*0x100000000);
long long fieldz = (long long) ((dipoleScale*labFrameDipole[m*3+2]-tuv100*fracToCart[2][0]-tuv010*fracToCart[2][1]-tuv001*fracToCart[2][2])*0x100000000);
long long fieldx = (long long) ((dipoleScale*labDipole[m*3]-tuv100*fracToCart[0][0]-tuv010*fracToCart[0][1]-tuv001*fracToCart[0][2])*0x100000000);
long long fieldy = (long long) ((dipoleScale*labDipole[m*3+1]-tuv100*fracToCart[1][0]-tuv010*fracToCart[1][1]-tuv001*fracToCart[1][2])*0x100000000);
long long fieldz = (long long) ((dipoleScale*labDipole[m*3+2]-tuv100*fracToCart[2][0]-tuv010*fracToCart[2][1]-tuv001*fracToCart[2][2])*0x100000000);
fieldBuffers[m] = fieldx;
fieldBuffers[m+PADDED_NUM_ATOMS] = fieldy;
fieldBuffers[m+2*PADDED_NUM_ATOMS] = fieldz;
......@@ -942,12 +942,12 @@ extern "C" __global__ void computeInducedPotentialFromGrid(
}
extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers,
long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole,
long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labDipole,
#ifdef HIPPO
const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge, const real* __restrict__ labQXX,
const real* __restrict__ labQXY, const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ,
#else
const real* __restrict__ labFrameQuadrupole,
const real* __restrict__ labQuadrupole,
#endif
const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole,
const real* __restrict__ phi, const real* __restrict__ cphi_global, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
......@@ -972,9 +972,9 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
// Compute the torque.
multipole[1] = labFrameDipole[i*3];
multipole[2] = labFrameDipole[i*3+1];
multipole[3] = labFrameDipole[i*3+2];
multipole[1] = labDipole[i*3];
multipole[2] = labDipole[i*3+1];
multipole[3] = labDipole[i*3+2];
#ifdef HIPPO
multipole[0] = coreCharge[i]+valenceCharge[i];
multipole[4] = labQXX[i];
......@@ -984,11 +984,11 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
multipole[9] = 2*labQYZ[i];
#else
multipole[0] = posq[i].w;
multipole[4] = labFrameQuadrupole[i*5];
multipole[5] = labFrameQuadrupole[i*5+3];
multipole[7] = 2*labFrameQuadrupole[i*5+1];
multipole[8] = 2*labFrameQuadrupole[i*5+2];
multipole[9] = 2*labFrameQuadrupole[i*5+4];
multipole[4] = labQuadrupole[i*5];
multipole[5] = labQuadrupole[i*5+3];
multipole[7] = 2*labQuadrupole[i*5+1];
multipole[8] = 2*labQuadrupole[i*5+2];
multipole[9] = 2*labQuadrupole[i*5+4];
#endif
multipole[6] = -(multipole[4]+multipole[5]);
......@@ -1039,17 +1039,17 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
}
extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers,
long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole,
long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer, const real* __restrict__ labDipole,
#ifdef HIPPO
const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge, const real* __restrict__ extrapolatedDipole,
const real* __restrict__ extrapolatedPhi, const real* __restrict__ labQXX, const real* __restrict__ labQXY,
const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ,
#else
const real* __restrict__ labFrameQuadrupole,
const real* __restrict__ labQuadrupole,
#endif
const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole, const real* __restrict__ inducedDipole_global,
const real* __restrict__ fracDipole, const real* __restrict__ fracQuadrupole, const real3* __restrict__ inducedDipole_global,
#ifndef HIPPO
const real* __restrict__ inducedDipolePolar_global,
const real3* __restrict__ inducedDipolePolar_global,
#endif
const real* __restrict__ phi,
#ifndef HIPPO
......@@ -1082,9 +1082,9 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
// Compute the torque.
multipole[1] = labFrameDipole[i*3];
multipole[2] = labFrameDipole[i*3+1];
multipole[3] = labFrameDipole[i*3+2];
multipole[1] = labDipole[i*3];
multipole[2] = labDipole[i*3+1];
multipole[3] = labDipole[i*3+2];
#ifdef HIPPO
multipole[0] = coreCharge[i]+valenceCharge[i];
multipole[4] = labQXX[i];
......@@ -1095,11 +1095,11 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
const real scale = EPSILON_FACTOR;
#else
multipole[0] = posq[i].w;
multipole[4] = labFrameQuadrupole[i*5];
multipole[5] = labFrameQuadrupole[i*5+3];
multipole[7] = 2*labFrameQuadrupole[i*5+1];
multipole[8] = 2*labFrameQuadrupole[i*5+2];
multipole[9] = 2*labFrameQuadrupole[i*5+4];
multipole[4] = labQuadrupole[i*5];
multipole[5] = labQuadrupole[i*5+3];
multipole[7] = 2*labQuadrupole[i*5+1];
multipole[8] = 2*labQuadrupole[i*5+2];
multipole[9] = 2*labQuadrupole[i*5+4];
const real scale = EPSILON_FACTOR/2;
#endif
multipole[6] = -(multipole[4]+multipole[5]);
......@@ -1132,13 +1132,13 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
multipole[8] = fracQuadrupole[i*6+2];
multipole[9] = fracQuadrupole[i*6+4];
cinducedDipole[0] = inducedDipole_global[i*3];
cinducedDipole[1] = inducedDipole_global[i*3+1];
cinducedDipole[2] = inducedDipole_global[i*3+2];
cinducedDipole[0] = inducedDipole_global[i].x;
cinducedDipole[1] = inducedDipole_global[i].y;
cinducedDipole[2] = inducedDipole_global[i].z;
#ifndef HIPPO
cinducedDipolePolar[0] = inducedDipolePolar_global[i*3];
cinducedDipolePolar[1] = inducedDipolePolar_global[i*3+1];
cinducedDipolePolar[2] = inducedDipolePolar_global[i*3+2];
cinducedDipolePolar[0] = inducedDipolePolar_global[i].x;
cinducedDipolePolar[1] = inducedDipolePolar_global[i].y;
cinducedDipolePolar[2] = inducedDipolePolar_global[i].z;
#endif
// Multiply the dipoles by cartToFrac, which is just the transpose of fracToCart.
......@@ -1212,7 +1212,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
#ifdef HIPPO
extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phidp, long long* __restrict__ inducedField,
const real* __restrict__ inducedDipole, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
const real3* __restrict__ inducedDipole, real3 recipBoxVecX, real3 recipBoxVecY, real3 recipBoxVecZ) {
__shared__ real fracToCart[3][3];
if (threadIdx.x == 0) {
fracToCart[0][0] = GRID_SIZE_X*recipBoxVecX.x;
......@@ -1228,15 +1228,15 @@ extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ ph
__syncthreads();
real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
inducedField[i] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[0][0] + phidp[i+NUM_ATOMS*2]*fracToCart[0][1] + phidp[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[3*i]));
inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[1][0] + phidp[i+NUM_ATOMS*2]*fracToCart[1][1] + phidp[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[3*i+1]));
inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[2][0] + phidp[i+NUM_ATOMS*2]*fracToCart[2][1] + phidp[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[3*i+2]));
inducedField[i] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[0][0] + phidp[i+NUM_ATOMS*2]*fracToCart[0][1] + phidp[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[i].x));
inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[1][0] + phidp[i+NUM_ATOMS*2]*fracToCart[1][1] + phidp[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[i].y));
inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phidp[i+NUM_ATOMS]*fracToCart[2][0] + phidp[i+NUM_ATOMS*2]*fracToCart[2][1] + phidp[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[i].z));
}
}
extern "C" __global__ void calculateSelfEnergyAndTorque(long long* __restrict__ torqueBuffers, mixed* __restrict__ energyBuffer,
const real* __restrict__ labFrameDipole, const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge,
const real* __restrict__ c6, const real* __restrict__ inducedDipole, const real* __restrict__ labQXX, const real* __restrict__ labQXY,
const real3* __restrict__ labDipole, const real* __restrict__ coreCharge, const real* __restrict__ valenceCharge,
const real* __restrict__ c6, const real3* __restrict__ inducedDipole, const real* __restrict__ labQXX, const real* __restrict__ labQXY,
const real* __restrict__ labQXZ, const real* __restrict__ labQYY, const real* __restrict__ labQYZ) {
const real torqueScale = 4*EPSILON_FACTOR*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/(3*SQRT_PI);
real cii = 0;
......@@ -1245,8 +1245,8 @@ extern "C" __global__ void calculateSelfEnergyAndTorque(long long* __restrict__
real c6ii = 0;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
real charge = coreCharge[i]+valenceCharge[i];
real3 dipole = make_real3(labFrameDipole[i*3], labFrameDipole[i*3+1], labFrameDipole[i*3+2]);
real3 induced = make_real3(inducedDipole[i*3], inducedDipole[i*3+1], inducedDipole[i*3+2]);
real3 dipole = labDipole[i];
real3 induced = inducedDipole[i];
real qXX = labQXX[i];
real qXY = labQXY[i];
real qXZ = labQXZ[i];
......@@ -1270,7 +1270,7 @@ extern "C" __global__ void calculateSelfEnergyAndTorque(long long* __restrict__
}
#else
extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip, long long* __restrict__ inducedField,
long long* __restrict__ inducedFieldPolar, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
long long* __restrict__ inducedFieldPolar, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar,
#ifdef EXTRAPOLATED_POLARIZATION
unsigned long long* __restrict__ fieldGradient, unsigned long long* __restrict__ fieldGradientPolar,
#endif
......@@ -1290,12 +1290,12 @@ extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ ph
__syncthreads();
real selfDipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
inducedField[i] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[0][0] + phid[i+NUM_ATOMS*2]*fracToCart[0][1] + phid[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[3*i]));
inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[1][0] + phid[i+NUM_ATOMS*2]*fracToCart[1][1] + phid[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[3*i+1]));
inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[2][0] + phid[i+NUM_ATOMS*2]*fracToCart[2][1] + phid[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[3*i+2]));
inducedFieldPolar[i] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[0][0] + phip[i+NUM_ATOMS*2]*fracToCart[0][1] + phip[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipolePolar[3*i]));
inducedFieldPolar[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[1][0] + phip[i+NUM_ATOMS*2]*fracToCart[1][1] + phip[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipolePolar[3*i+1]));
inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[2][0] + phip[i+NUM_ATOMS*2]*fracToCart[2][1] + phip[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipolePolar[3*i+2]));
inducedField[i] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[0][0] + phid[i+NUM_ATOMS*2]*fracToCart[0][1] + phid[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipole[i].x));
inducedField[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[1][0] + phid[i+NUM_ATOMS*2]*fracToCart[1][1] + phid[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipole[i].y));
inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phid[i+NUM_ATOMS]*fracToCart[2][0] + phid[i+NUM_ATOMS*2]*fracToCart[2][1] + phid[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipole[i].z));
inducedFieldPolar[i] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[0][0] + phip[i+NUM_ATOMS*2]*fracToCart[0][1] + phip[i+NUM_ATOMS*3]*fracToCart[0][2] - selfDipoleScale*inducedDipolePolar[i].x));
inducedFieldPolar[i+PADDED_NUM_ATOMS] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[1][0] + phip[i+NUM_ATOMS*2]*fracToCart[1][1] + phip[i+NUM_ATOMS*3]*fracToCart[1][2] - selfDipoleScale*inducedDipolePolar[i].y));
inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= (long long) (0x100000000*(phip[i+NUM_ATOMS]*fracToCart[2][0] + phip[i+NUM_ATOMS*2]*fracToCart[2][1] + phip[i+NUM_ATOMS*3]*fracToCart[2][2] - selfDipoleScale*inducedDipolePolar[i].z));
#ifdef EXTRAPOLATED_POLARIZATION
// Compute and store the field gradients for later use.
......
extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4* __restrict__ multipoleParticles, float* __restrict__ molecularDipoles,
float* __restrict__ molecularQuadrupoles, real* __restrict__ labFrameDipoles, real* __restrict__ labFrameQuadrupoles,
float* __restrict__ molecularQuadrupoles, real3* __restrict__ labFrameDipoles, real* __restrict__ labFrameQuadrupoles,
real* __restrict__ sphericalDipoles, real* __restrict__ sphericalQuadrupoles) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
// Load the spherical multipoles.
......@@ -176,9 +176,9 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
molDipole[2] = molecularDipoles[offset+2];
if (reverse)
molDipole[1] *= -1;
labFrameDipoles[offset] = molDipole[0]*vectorX.x + molDipole[1]*vectorY.x + molDipole[2]*vectorZ.x;
labFrameDipoles[offset+1] = molDipole[0]*vectorX.y + molDipole[1]*vectorY.y + molDipole[2]*vectorZ.y;
labFrameDipoles[offset+2] = molDipole[0]*vectorX.z + molDipole[1]*vectorY.z + molDipole[2]*vectorZ.z;
labFrameDipoles[atom] = make_real3(molDipole[0]*vectorX.x + molDipole[1]*vectorY.x + molDipole[2]*vectorZ.x,
molDipole[0]*vectorX.y + molDipole[1]*vectorY.y + molDipole[2]*vectorZ.y,
molDipole[0]*vectorX.z + molDipole[1]*vectorY.z + molDipole[2]*vectorZ.z);
// ---------------------------------------------------------------------------------------
......@@ -275,9 +275,7 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
sphericalQuadrupoles[offset+4] = rotatedQuadrupole[4];
}
else {
labFrameDipoles[3*atom] = molecularDipoles[3*atom];
labFrameDipoles[3*atom+1] = molecularDipoles[3*atom+1];
labFrameDipoles[3*atom+2] = molecularDipoles[3*atom+2];
labFrameDipoles[atom] = make_real3(molecularDipoles[3*atom], molecularDipoles[3*atom+1], molecularDipoles[3*atom+2]);
labFrameQuadrupoles[5*atom] = molecularQuadrupoles[5*atom];
labFrameQuadrupoles[5*atom+1] = molecularQuadrupoles[5*atom+1];
labFrameQuadrupoles[5*atom+2] = molecularQuadrupoles[5*atom+2];
......@@ -289,24 +287,24 @@ extern "C" __global__ void computeLabFrameMoments(real4* __restrict__ posq, int4
extern "C" __global__ void recordInducedDipoles(const long long* __restrict__ fieldBuffers, const long long* __restrict__ fieldPolarBuffers,
#ifdef USE_GK
const long long* __restrict__ gkFieldBuffers, real* __restrict__ inducedDipoleS, real* __restrict__ inducedDipolePolarS,
const long long* __restrict__ gkFieldBuffers, real3* __restrict__ inducedDipoleS, real3* __restrict__ inducedDipolePolarS,
#endif
real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) {
real3* __restrict__ inducedDipole, real3* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability) {
for (int atom = blockIdx.x*blockDim.x+threadIdx.x; atom < NUM_ATOMS; atom += gridDim.x*blockDim.x) {
real scale = polarizability[atom]/(real) 0x100000000;
inducedDipole[3*atom] = scale*fieldBuffers[atom];
inducedDipole[3*atom+1] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS];
inducedDipole[3*atom+2] = scale*fieldBuffers[atom+PADDED_NUM_ATOMS*2];
inducedDipolePolar[3*atom] = scale*fieldPolarBuffers[atom];
inducedDipolePolar[3*atom+1] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS];
inducedDipolePolar[3*atom+2] = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2];
inducedDipole[atom].x = scale*fieldBuffers[atom];
inducedDipole[atom].y = scale*fieldBuffers[atom+PADDED_NUM_ATOMS];
inducedDipole[atom].z = scale*fieldBuffers[atom+PADDED_NUM_ATOMS*2];
inducedDipolePolar[atom].x = scale*fieldPolarBuffers[atom];
inducedDipolePolar[atom].y = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS];
inducedDipolePolar[atom].z = scale*fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2];
#ifdef USE_GK
inducedDipoleS[3*atom] = scale*(fieldBuffers[atom]+gkFieldBuffers[atom]);
inducedDipoleS[3*atom+1] = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
inducedDipoleS[3*atom+2] = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
inducedDipolePolarS[3*atom] = scale*(fieldPolarBuffers[atom]+gkFieldBuffers[atom]);
inducedDipolePolarS[3*atom+1] = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
inducedDipolePolarS[3*atom+2] = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
inducedDipoleS[atom].x = scale*(fieldBuffers[atom]+gkFieldBuffers[atom]);
inducedDipoleS[atom].y = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
inducedDipoleS[atom].z = scale*(fieldBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
inducedDipolePolarS[atom].x = scale*(fieldPolarBuffers[atom]+gkFieldBuffers[atom]);
inducedDipolePolarS[atom].y = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS]+gkFieldBuffers[atom+PADDED_NUM_ATOMS]);
inducedDipolePolarS[atom].z = scale*(fieldPolarBuffers[atom+PADDED_NUM_ATOMS*2]+gkFieldBuffers[atom+PADDED_NUM_ATOMS*2]);
#endif
}
}
......@@ -532,7 +530,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
* Compute the electrostatic potential at each of a set of points.
*/
extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole, const real4* __restrict__ points,
const real* __restrict__ labFrameQuadrupole, const real3* __restrict__ inducedDipole, const real4* __restrict__ points,
real* __restrict__ potential, int numPoints, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ) {
extern __shared__ real4 localPosq[];
real3* localDipole = (real3*) &localPosq[blockDim.x];
......@@ -550,7 +548,7 @@ extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ po
if (atom < NUM_ATOMS) {
localPosq[threadIdx.x] = posq[atom];
localDipole[threadIdx.x] = make_real3(labFrameDipole[3*atom], labFrameDipole[3*atom+1], labFrameDipole[3*atom+2]);
localInducedDipole[threadIdx.x] = make_real3(inducedDipole[3*atom], inducedDipole[3*atom+1], inducedDipole[3*atom+2]);
localInducedDipole[threadIdx.x] = inducedDipole[atom];
localQuadrupole[5*threadIdx.x] = labFrameQuadrupole[5*atom];
localQuadrupole[5*threadIdx.x+1] = labFrameQuadrupole[5*atom+1];
localQuadrupole[5*threadIdx.x+2] = labFrameQuadrupole[5*atom+2];
......
......@@ -10,7 +10,7 @@ typedef struct {
} AtomData;
inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __restrict__ posq, const real* __restrict__ sphericalDipole,
const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole, const real* __restrict__ inducedDipolePolar,
const real* __restrict__ sphericalQuadrupole, const real3* __restrict__ inducedDipole, const real3* __restrict__ inducedDipolePolar,
const float2* __restrict__ dampingAndThole) {
real4 atomPosq = posq[atom];
data.pos = make_real3(atomPosq.x, atomPosq.y, atomPosq.z);
......@@ -25,12 +25,8 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data.sphericalQuadrupole[3] = sphericalQuadrupole[atom*5+3];
data.sphericalQuadrupole[4] = sphericalQuadrupole[atom*5+4];
#endif
data.inducedDipole.x = inducedDipole[atom*3];
data.inducedDipole.y = inducedDipole[atom*3+1];
data.inducedDipole.z = inducedDipole[atom*3+2];
data.inducedDipolePolar.x = inducedDipolePolar[atom*3];
data.inducedDipolePolar.y = inducedDipolePolar[atom*3+1];
data.inducedDipolePolar.z = inducedDipolePolar[atom*3+2];
data.inducedDipole = inducedDipole[atom];
data.inducedDipolePolar = inducedDipolePolar[atom];
float2 temp = dampingAndThole[atom];
data.damp = temp.x;
data.thole = temp.y;
......@@ -447,8 +443,8 @@ extern "C" __global__ void computeElectrostatics(
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, const real4* __restrict__ blockCenter,
const unsigned int* __restrict__ interactingAtoms,
#endif
const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real* __restrict__ inducedDipole,
const real* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
const real* __restrict__ sphericalDipole, const real* __restrict__ sphericalQuadrupole, const real3* __restrict__ inducedDipole,
const real3* __restrict__ inducedDipolePolar, const float2* __restrict__ dampingAndThole) {
const unsigned int totalWarps = (blockDim.x*gridDim.x)/TILE_SIZE;
const unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/TILE_SIZE;
const unsigned int tgx = threadIdx.x & (TILE_SIZE-1);
......
......@@ -44,7 +44,6 @@ AmoebaReferenceMultipoleForce::AmoebaReferenceMultipoleForce() :
_maximumMutualInducedDipoleIterations(100),
_mutualInducedDipoleEpsilon(1.0e+50),
_mutualInducedDipoleTargetEpsilon(1.0e-04),
_polarSOR(0.55),
_debye(48.033324)
{
initialize();
......@@ -60,7 +59,6 @@ AmoebaReferenceMultipoleForce::AmoebaReferenceMultipoleForce(NonbondedMethod non
_maximumMutualInducedDipoleIterations(100),
_mutualInducedDipoleEpsilon(1.0e+50),
_mutualInducedDipoleTargetEpsilon(1.0e-04),
_polarSOR(0.55),
_debye(48.033324)
{
initialize();
......@@ -890,78 +888,6 @@ void AmoebaReferenceMultipoleForce::calculateInducedDipoleFields(const vector<Mu
calculateInducedDipolePairIxns(particleData[ii], particleData[jj], updateInducedDipoleFields);
}
double AmoebaReferenceMultipoleForce::updateInducedDipoleFields(const vector<MultipoleParticleData>& particleData,
vector<UpdateInducedDipoleFieldStruct>& updateInducedDipoleFields)
{
// Calculate the fields coming from induced dipoles.
calculateInducedDipoleFields(particleData, updateInducedDipoleFields);
// Update the induced dipoles and calculate the convergence factor, maxEpsilon
double maxEpsilon = 0.0;
for (auto& field : updateInducedDipoleFields) {
double epsilon = updateInducedDipole(particleData,
*field.fixedMultipoleField,
field.inducedDipoleField,
*field.inducedDipoles);
maxEpsilon = epsilon > maxEpsilon ? epsilon : maxEpsilon;
}
return maxEpsilon;
}
double AmoebaReferenceMultipoleForce::updateInducedDipole(const vector<MultipoleParticleData>& particleData,
const vector<Vec3>& fixedMultipoleField,
const vector<Vec3>& inducedDipoleField,
vector<Vec3>& inducedDipole)
{
double epsilon = 0.0;
for (unsigned int ii = 0; ii < particleData.size(); ii++) {
Vec3 oldValue = inducedDipole[ii];
Vec3 newValue = fixedMultipoleField[ii] + inducedDipoleField[ii]*particleData[ii].polarity;
Vec3 delta = newValue - oldValue;
inducedDipole[ii] = oldValue + delta*_polarSOR;
epsilon += delta.dot(delta);
}
return epsilon;
}
void AmoebaReferenceMultipoleForce::convergeInduceDipolesBySOR(const vector<MultipoleParticleData>& particleData,
vector<UpdateInducedDipoleFieldStruct>& updateInducedDipoleField)
{
bool done = false;
setMutualInducedDipoleConverged(false);
int iteration = 0;
double currentEpsilon = 1.0e+50;
// loop until (1) induced dipoles are converged or
// (2) iterations == max iterations or
// (3) convergence factor (spsilon) increases
while (!done) {
double epsilon = updateInducedDipoleFields(particleData, updateInducedDipoleField);
epsilon = _polarSOR*_debye*sqrt(epsilon/_numParticles);
if (epsilon < getMutualInducedDipoleTargetEpsilon()) {
setMutualInducedDipoleConverged(true);
done = true;
} else if (currentEpsilon < epsilon || iteration >= getMaximumMutualInducedDipoleIterations()) {
done = true;
}
currentEpsilon = epsilon;
iteration++;
}
setMutualInducedDipoleEpsilon(currentEpsilon);
setMutualInducedDipoleIterations(iteration);
}
void AmoebaReferenceMultipoleForce::convergeInduceDipolesByExtrapolation(const vector<MultipoleParticleData>& particleData, vector<UpdateInducedDipoleFieldStruct>& updateInducedDipoleField) {
// Start by storing the direct dipoles as PT0
......
......@@ -750,7 +750,6 @@ protected:
std::vector<double> _extPartCoefficients;
double _mutualInducedDipoleEpsilon;
double _mutualInducedDipoleTargetEpsilon;
double _polarSOR;
double _debye;
/**
......@@ -1027,14 +1026,6 @@ protected:
* @param particleData vector of particle positions and parameters (charge, labFrame dipoles, quadrupoles, ...)
* @param updateInducedDipoleFields vector of UpdateInducedDipoleFieldStruct containing input induced dipoles and output fields
*/
void convergeInduceDipolesBySOR(const std::vector<MultipoleParticleData>& particleData,
std::vector<UpdateInducedDipoleFieldStruct>& calculateInducedDipoleField);
/**
* Converge induced dipoles.
*
* @param particleData vector of particle positions and parameters (charge, labFrame dipoles, quadrupoles, ...)
* @param updateInducedDipoleFields vector of UpdateInducedDipoleFieldStruct containing input induced dipoles and output fields
*/
void convergeInduceDipolesByDIIS(const std::vector<MultipoleParticleData>& particleData,
std::vector<UpdateInducedDipoleFieldStruct>& calculateInducedDipoleField);
......@@ -1046,28 +1037,6 @@ protected:
*/
void computeDIISCoefficients(const std::vector<std::vector<Vec3> >& prevErrors, std::vector<double>& coefficients) const;
/**
* Update fields due to induced dipoles for each particle.
*
* @param particleData vector of particle positions and parameters (charge, labFrame dipoles, quadrupoles, ...)
* @param updateInducedDipoleFields vector of UpdateInducedDipoleFieldStruct containing input induced dipoles and output fields
*/
double updateInducedDipoleFields(const std::vector<MultipoleParticleData>& particleData,
std::vector<UpdateInducedDipoleFieldStruct>& calculateInducedDipoleField);
/**
* Update induced dipole for a particle given updated induced dipole field at the site.
*
* @param particleI positions and parameters (charge, labFrame dipoles, quadrupoles, ...) for particle I
* @param fixedMultipoleField fields due fixed multipoles at each site
* @param inducedDipoleField fields due induced dipoles at each site
* @param inducedDipoles output vector of updated induced dipoles
*/
double updateInducedDipole(const std::vector<MultipoleParticleData>& particleI,
const std::vector<Vec3>& fixedMultipoleField,
const std::vector<Vec3>& inducedDipoleField,
std::vector<Vec3>& inducedDipoles);
/**
* Calculate induced dipoles.
*
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment