"platforms/vscode:/vscode.git/clone" did not exist on "d92e09370ffd3aa31c6dd0ec43162d073765ac43"
Commit a4e2d9a6 authored by Peter Eastman's avatar Peter Eastman
Browse files

Began CUDA implementation of extrapolated polarization

parent e6804811
...@@ -810,7 +810,10 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri ...@@ -810,7 +810,10 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri
multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL), sphericalDipoles(NULL), sphericalQuadrupoles(NULL), multipoleParticles(NULL), molecularDipoles(NULL), molecularQuadrupoles(NULL), labFrameDipoles(NULL), labFrameQuadrupoles(NULL), sphericalDipoles(NULL), sphericalQuadrupoles(NULL),
fracDipoles(NULL), fracQuadrupoles(NULL), field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL), inducedDipole(NULL), fracDipoles(NULL), fracQuadrupoles(NULL), field(NULL), fieldPolar(NULL), inducedField(NULL), inducedFieldPolar(NULL), torque(NULL), dampingAndThole(NULL), inducedDipole(NULL),
diisCoefficients(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), prevDipoles(NULL), prevDipolesPolar(NULL), prevDipolesGk(NULL), diisCoefficients(NULL), inducedDipolePolar(NULL), inducedDipoleErrors(NULL), prevDipoles(NULL), prevDipolesPolar(NULL), prevDipolesGk(NULL),
prevDipolesGkPolar(NULL), prevErrors(NULL), diisMatrix(NULL), polarizability(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL), prevDipolesGkPolar(NULL), prevErrors(NULL), diisMatrix(NULL), polarizability(NULL), extrapolatedDipole(NULL), extrapolatedDipolePolar(NULL),
extrapolatedDipoleGk(NULL), extrapolatedDipoleGkPolar(NULL), inducedDipoleFieldGradient(NULL), inducedDipoleFieldGradientPolar(NULL),
inducedDipoleFieldGradientGk(NULL), inducedDipoleFieldGradientGkPolar(NULL), extrapolatedDipoleFieldGradient(NULL), extrapolatedDipoleFieldGradientPolar(NULL),
extrapolatedDipoleFieldGradientGk(NULL), extrapolatedDipoleFieldGradientGkPolar(NULL), covalentFlags(NULL), polarizationGroupFlags(NULL),
pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeIgrid(NULL), pmePhi(NULL), pmeGrid(NULL), pmeBsplineModuliX(NULL), pmeBsplineModuliY(NULL), pmeBsplineModuliZ(NULL), pmeIgrid(NULL), pmePhi(NULL),
pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeCphi(NULL), pmeAtomGridIndex(NULL), lastPositions(NULL), sort(NULL), gkKernel(NULL) { pmePhid(NULL), pmePhip(NULL), pmePhidp(NULL), pmeCphi(NULL), pmeAtomGridIndex(NULL), lastPositions(NULL), sort(NULL), gkKernel(NULL) {
} }
...@@ -867,6 +870,30 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() { ...@@ -867,6 +870,30 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete diisMatrix; delete diisMatrix;
if (diisCoefficients != NULL) if (diisCoefficients != NULL)
delete diisCoefficients; delete diisCoefficients;
if (extrapolatedDipole != NULL)
delete extrapolatedDipole;
if (extrapolatedDipolePolar != NULL)
delete extrapolatedDipolePolar;
if (extrapolatedDipoleGk != NULL)
delete extrapolatedDipoleGk;
if (extrapolatedDipoleGkPolar != NULL)
delete extrapolatedDipoleGkPolar;
if (inducedDipoleFieldGradient != NULL)
delete inducedDipoleFieldGradient;
if (inducedDipoleFieldGradientPolar != NULL)
delete inducedDipoleFieldGradientPolar;
if (inducedDipoleFieldGradientGk != NULL)
delete inducedDipoleFieldGradientGk;
if (inducedDipoleFieldGradientGkPolar != NULL)
delete inducedDipoleFieldGradientGkPolar;
if (extrapolatedDipoleFieldGradient != NULL)
delete extrapolatedDipoleFieldGradient;
if (extrapolatedDipoleFieldGradientPolar != NULL)
delete extrapolatedDipoleFieldGradientPolar;
if (extrapolatedDipoleFieldGradientGk != NULL)
delete extrapolatedDipoleFieldGradientGk;
if (extrapolatedDipoleFieldGradientGkPolar != NULL)
delete extrapolatedDipoleFieldGradientGkPolar;
if (polarizability != NULL) if (polarizability != NULL)
delete polarizability; delete polarizability;
if (covalentFlags != NULL) if (covalentFlags != NULL)
...@@ -967,6 +994,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -967,6 +994,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
// Create workspace arrays. // Create workspace arrays.
polarizationType = force.getPolarizationType();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
labFrameDipoles = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "labFrameDipoles"); labFrameDipoles = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "labFrameDipoles");
labFrameQuadrupoles = new CudaArray(cu, 5*paddedNumAtoms, elementSize, "labFrameQuadrupoles"); labFrameQuadrupoles = new CudaArray(cu, 5*paddedNumAtoms, elementSize, "labFrameQuadrupoles");
...@@ -979,12 +1007,23 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -979,12 +1007,23 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
torque = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "torque"); torque = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "torque");
inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole"); inducedDipole = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipole");
inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar"); inducedDipolePolar = new CudaArray(cu, 3*paddedNumAtoms, elementSize, "inducedDipolePolar");
inducedDipoleErrors = new CudaArray(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors"); if (polarizationType == AmoebaMultipoleForce::Mutual) {
prevDipoles = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipoles"); inducedDipoleErrors = new CudaArray(cu, cu.getNumThreadBlocks(), sizeof(float2), "inducedDipoleErrors");
prevDipolesPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesPolar"); prevDipoles = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipoles");
prevErrors = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevErrors"); prevDipolesPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesPolar");
diisMatrix = new CudaArray(cu, MaxPrevDIISDipoles*MaxPrevDIISDipoles, elementSize, "diisMatrix"); prevErrors = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevErrors");
diisCoefficients = new CudaArray(cu, MaxPrevDIISDipoles+1, sizeof(float), "diisMatrix"); diisMatrix = new CudaArray(cu, MaxPrevDIISDipoles*MaxPrevDIISDipoles, elementSize, "diisMatrix");
diisCoefficients = new CudaArray(cu, MaxPrevDIISDipoles+1, sizeof(float), "diisMatrix");
}
else if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
int numOrders = force.getExtrapolationCoefficients().size();
extrapolatedDipole = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipole");
extrapolatedDipolePolar = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipolePolar");
inducedDipoleFieldGradient = new CudaArray(cu, 6*numMultipoles, elementSize, "inducedDipoleFieldGradient");
inducedDipoleFieldGradientPolar = new CudaArray(cu, 6*numMultipoles, elementSize, "inducedDipoleFieldGradientPolar");
extrapolatedDipoleFieldGradient = new CudaArray(cu, 6*numMultipoles*numOrders, elementSize, "extrapolatedDipoleFieldGradient");
extrapolatedDipoleFieldGradientPolar = new CudaArray(cu, 6*numMultipoles*numOrders, elementSize, "extrapolatedDipoleFieldGradientPolar");
}
cu.addAutoclearBuffer(*field); cu.addAutoclearBuffer(*field);
cu.addAutoclearBuffer(*fieldPolar); cu.addAutoclearBuffer(*fieldPolar);
cu.addAutoclearBuffer(*torque); cu.addAutoclearBuffer(*torque);
...@@ -1036,14 +1075,16 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1036,14 +1075,16 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
// Record other options. // Record other options.
if (force.getPolarizationType() == AmoebaMultipoleForce::Mutual) { if (polarizationType == AmoebaMultipoleForce::Mutual) {
maxInducedIterations = force.getMutualInducedMaxIterations(); maxInducedIterations = force.getMutualInducedMaxIterations();
inducedEpsilon = force.getMutualInducedTargetEpsilon(); inducedEpsilon = force.getMutualInducedTargetEpsilon();
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedFieldPolar");
} }
else else
maxInducedIterations = 0; maxInducedIterations = 0;
if (polarizationType != AmoebaMultipoleForce::Direct) {
inducedField = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedField");
inducedFieldPolar = new CudaArray(cu, 3*paddedNumAtoms, sizeof(long long), "inducedFieldPolar");
}
usePME = (force.getNonbondedMethod() == AmoebaMultipoleForce::PME); usePME = (force.getNonbondedMethod() == AmoebaMultipoleForce::PME);
// See whether there's an AmoebaGeneralizedKirkwoodForce in the System. // See whether there's an AmoebaGeneralizedKirkwoodForce in the System.
...@@ -1066,7 +1107,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1066,7 +1107,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); defines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks()); defines["NUM_BLOCKS"] = cu.intToString(cu.getNumAtomBlocks());
defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/innerDielectric); defines["ENERGY_SCALE_FACTOR"] = cu.doubleToString(138.9354558456/innerDielectric);
if (force.getPolarizationType() == AmoebaMultipoleForce::Direct) if (polarizationType == AmoebaMultipoleForce::Direct)
defines["DIRECT_POLARIZATION"] = ""; defines["DIRECT_POLARIZATION"] = "";
if (useShuffle) if (useShuffle)
defines["USE_SHUFFLE"] = ""; defines["USE_SHUFFLE"] = "";
...@@ -1080,6 +1121,18 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1080,6 +1121,18 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts; int endExclusionIndex = (cu.getContextIndex()+1)*numExclusionTiles/numContexts;
defines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex); defines["FIRST_EXCLUSION_TILE"] = cu.intToString(startExclusionIndex);
defines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex); defines["LAST_EXCLUSION_TILE"] = cu.intToString(endExclusionIndex);
maxExtrapolationOrder = force.getExtrapolationCoefficients().size();
defines["MAX_EXTRAPOLATION_ORDER"] = cu.intToString(maxExtrapolationOrder);
stringstream coefficients;
for (int i = 0; i < maxExtrapolationOrder; i++) {
if (i > 0)
coefficients << ",";
double sum = 0;
for (int j = i; j < maxExtrapolationOrder; j++)
sum = force.getExtrapolationCoefficients()[j];
coefficients << cu.doubleToString(sum);
}
defines["EXTRAPOLATION_COEFFICIENTS_SUM"] = coefficients.str();
alpha = force.getAEwald(); alpha = force.getAEwald();
if (usePME) { if (usePME) {
vector<int> pmeGridDimension; vector<int> pmeGridDimension;
...@@ -1113,8 +1166,19 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1113,8 +1166,19 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric)); defines["GK_FQ"] = cu.doubleToString(3*(1-solventDielectric)/(2+3*solventDielectric));
fixedThreadMemory += 4*elementSize; fixedThreadMemory += 4*elementSize;
inducedThreadMemory += 13*elementSize; inducedThreadMemory += 13*elementSize;
prevDipolesGk = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGk"); if (polarizationType == AmoebaMultipoleForce::Mutual) {
prevDipolesGkPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGkPolar"); prevDipolesGk = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGk");
prevDipolesGkPolar = new CudaArray(cu, 3*numMultipoles*MaxPrevDIISDipoles, elementSize, "prevDipolesGkPolar");
}
else if (polarizationType == AmoebaMultipoleForce::Extrapolated) {
int numOrders = force.getExtrapolationCoefficients().size();
extrapolatedDipoleGk = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipoleGk");
extrapolatedDipoleGkPolar = new CudaArray(cu, 3*numMultipoles*numOrders, elementSize, "extrapolatedDipoleGkPolar");
inducedDipoleFieldGradientGk = new CudaArray(cu, 6*numMultipoles, elementSize, "inducedDipoleFieldGradientGk");
inducedDipoleFieldGradientGkPolar = new CudaArray(cu, 6*numMultipoles, elementSize, "inducedDipoleFieldGradientGkPolar");
extrapolatedDipoleFieldGradientGk = new CudaArray(cu, 6*numMultipoles*numOrders, elementSize, "extrapolatedDipoleFieldGradientGk");
extrapolatedDipoleFieldGradientGkPolar = new CudaArray(cu, 6*numMultipoles*numOrders, elementSize, "extrapolatedDipoleFieldGradientGkPolar");
}
} }
int maxThreads = cu.getNonbondedUtilities().getForceThreadBlockSize(); int maxThreads = cu.getNonbondedUtilities().getForceThreadBlockSize();
fixedFieldThreads = min(maxThreads, cu.computeThreadBlockSize(fixedThreadMemory)); fixedFieldThreads = min(maxThreads, cu.computeThreadBlockSize(fixedThreadMemory));
...@@ -1127,7 +1191,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1127,7 +1191,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines["THREAD_BLOCK_SIZE"] = cu.intToString(fixedFieldThreads); defines["THREAD_BLOCK_SIZE"] = cu.intToString(fixedFieldThreads);
module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleFixedField, defines); module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipoleFixedField, defines);
computeFixedFieldKernel = cu.getKernel(module, "computeFixedField"); computeFixedFieldKernel = cu.getKernel(module, "computeFixedField");
if (maxInducedIterations > 0) { if (polarizationType != AmoebaMultipoleForce::Direct) {
defines["THREAD_BLOCK_SIZE"] = cu.intToString(inducedFieldThreads); defines["THREAD_BLOCK_SIZE"] = cu.intToString(inducedFieldThreads);
defines["MAX_PREV_DIIS_DIPOLES"] = cu.intToString(MaxPrevDIISDipoles); defines["MAX_PREV_DIIS_DIPOLES"] = cu.intToString(MaxPrevDIISDipoles);
defines["USE_MUTUAL_POLARIZATION"] = "1"; defines["USE_MUTUAL_POLARIZATION"] = "1";
...@@ -1136,6 +1200,9 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1136,6 +1200,9 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
updateInducedFieldKernel = cu.getKernel(module, "updateInducedFieldByDIIS"); updateInducedFieldKernel = cu.getKernel(module, "updateInducedFieldByDIIS");
recordDIISDipolesKernel = cu.getKernel(module, "recordInducedDipolesForDIIS"); recordDIISDipolesKernel = cu.getKernel(module, "recordInducedDipolesForDIIS");
buildMatrixKernel = cu.getKernel(module, "computeDIISMatrix"); buildMatrixKernel = cu.getKernel(module, "computeDIISMatrix");
initExtrapolatedKernel = cu.getKernel(module, "initExtrapolatedDipoles");
iterateExtrapolatedKernel = cu.getKernel(module, "iterateExtrapolatedDipoles");
computeExtrapolatedKernel = cu.getKernel(module, "computeExtrapolatedDipoles");
} }
stringstream electrostaticsSource; stringstream electrostaticsSource;
electrostaticsSource << CudaKernelSources::vectorOps; electrostaticsSource << CudaKernelSources::vectorOps;
...@@ -1166,7 +1233,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1166,7 +1233,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ); pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
pmeDefines["M_PI"] = cu.doubleToString(M_PI); pmeDefines["M_PI"] = cu.doubleToString(M_PI);
pmeDefines["SQRT_PI"] = cu.doubleToString(sqrt(M_PI)); pmeDefines["SQRT_PI"] = cu.doubleToString(sqrt(M_PI));
if (force.getPolarizationType() == AmoebaMultipoleForce::Direct) if (polarizationType == AmoebaMultipoleForce::Direct)
pmeDefines["DIRECT_POLARIZATION"] = ""; pmeDefines["DIRECT_POLARIZATION"] = "";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipolePme, pmeDefines); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipolePme, pmeDefines);
pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex"); pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
...@@ -1407,7 +1474,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1407,7 +1474,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
int numTileIndices = nb.getNumTiles(); int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks(); int numForceThreadBlocks = nb.getNumForceThreadBlocks();
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float)); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
void* npt = NULL;
if (pmeGrid == NULL) { if (pmeGrid == NULL) {
// Compute induced dipoles. // Compute induced dipoles.
...@@ -1436,25 +1502,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1436,25 +1502,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge. // Iterate until the dipoles converge.
if (polarizationType == AmoebaMultipoleForce::Extrapolated)
computeExtrapolatedDipoles(NULL);
for (int i = 0; i < maxInducedIterations; i++) { for (int i = 0; i < maxInducedIterations; i++) {
cu.clearBuffer(*inducedField); computeInducedField(NULL);
cu.clearBuffer(*inducedFieldPolar);
if (gkKernel == NULL) {
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
}
else {
cu.clearBuffer(*gkKernel->getInducedField());
cu.clearBuffer(*gkKernel->getInducedFieldPolar());
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&gkKernel->getInducedField()->getDevicePointer(), &gkKernel->getInducedFieldPolar()->getDevicePointer(),
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&gkKernel->getBornRadii()->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
}
bool converged = iterateDipolesByDIIS(i); bool converged = iterateDipolesByDIIS(i);
if (converged) if (converged)
break; break;
...@@ -1579,34 +1630,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1579,34 +1630,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge. // Iterate until the dipoles converge.
vector<float2> errors; if (polarizationType == AmoebaMultipoleForce::Extrapolated)
computeExtrapolatedDipoles(recipBoxVectorPointer);
for (int i = 0; i < maxInducedIterations; i++) { for (int i = 0; i < maxInducedIterations; i++) {
cu.clearBuffer(*inducedField); computeInducedField(recipBoxVectorPointer);
cu.clearBuffer(*inducedFieldPolar);
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
&dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
cu.clearBuffer(*pmeGrid);
cu.executeKernel(pmeSpreadInducedDipolesKernel, pmeSpreadInducedDipolesArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, pmeGrid->getSize());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
cu.executeKernel(pmeConvolutionKernel, pmeConvolutionArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
bool converged = iterateDipolesByDIIS(i); bool converged = iterateDipolesByDIIS(i);
if (converged) if (converged)
break; break;
...@@ -1646,6 +1673,73 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1646,6 +1673,73 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
return 0.0; return 0.0;
} }
void CudaCalcAmoebaMultipoleForceKernel::computeInducedField(void** recipBoxVectorPointer) {
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
int startTileIndex = nb.getStartTileIndex();
int numTileIndices = nb.getNumTiles();
int numForceThreadBlocks = nb.getNumForceThreadBlocks();
if (pmeGrid == NULL) {
cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar);
if (gkKernel == NULL) {
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
}
else {
cu.clearBuffer(*gkKernel->getInducedField());
cu.clearBuffer(*gkKernel->getInducedFieldPolar());
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&gkKernel->getInducedField()->getDevicePointer(), &gkKernel->getInducedFieldPolar()->getDevicePointer(),
&gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(),
&gkKernel->getBornRadii()->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
}
}
else {
cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar);
unsigned int maxTiles = nb.getInteractingTiles().getSize();
void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionTiles().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
&nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(), cu.getPeriodicBoxSizePointer(),
cu.getInvPeriodicBoxSizePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
&maxTiles, &nb.getBlockCenters().getDevicePointer(), &nb.getInteractingAtoms().getDevicePointer(),
&dampingAndThole->getDevicePointer()};
cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*inducedFieldThreads, inducedFieldThreads);
cu.clearBuffer(*pmeGrid);
void* pmeSpreadInducedDipolesArgs[] = {&cu.getPosq().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(), cu.getPeriodicBoxVecZPointer(),
recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeSpreadInducedDipolesKernel, pmeSpreadInducedDipolesArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision()) {
void* finishSpreadArgs[] = {&pmeGrid->getDevicePointer()};
cu.executeKernel(pmeFinishSpreadChargeKernel, finishSpreadArgs, pmeGrid->getSize());
}
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
void* pmeConvolutionArgs[] = {&pmeGrid->getDevicePointer(), &pmeBsplineModuliX->getDevicePointer(), &pmeBsplineModuliY->getDevicePointer(),
&pmeBsplineModuliZ->getDevicePointer(), cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeConvolutionKernel, pmeConvolutionArgs, cu.getNumAtoms());
if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
void* pmeInducedPotentialArgs[] = {&pmeGrid->getDevicePointer(), &pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&pmePhidp->getDevicePointer(), &cu.getPosq().getDevicePointer(), cu.getPeriodicBoxVecXPointer(), cu.getPeriodicBoxVecYPointer(),
cu.getPeriodicBoxVecZPointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2],
&pmeAtomGridIndex->getDevicePointer()};
cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2]};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
}
}
bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) { bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
void* npt = NULL; void* npt = NULL;
bool trueValue = true, falseValue = false; bool trueValue = true, falseValue = false;
...@@ -1744,6 +1838,45 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) { ...@@ -1744,6 +1838,45 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
return false; return false;
} }
void CudaCalcAmoebaMultipoleForceKernel::computeExtrapolatedDipoles(void** recipBoxVectorPointer) {
// Start by storing the direct dipoles as PT0
void* initArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer()};
cu.executeKernel(initExtrapolatedKernel, initArgs, extrapolatedDipole->getSize());
// Recursively apply alpha.Tau to the µ_(n) components to generate µ_(n+1), and store the result
CudaNonbondedUtilities& nb = cu.getNonbondedUtilities();
for (int order = 1; order < maxExtrapolationOrder; ++order) {
cu.clearBuffer(*inducedField);
cu.clearBuffer(*inducedFieldPolar);
computeInducedField(recipBoxVectorPointer);
void* iterateArgs[] = {&order, &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer(), &inducedDipoleFieldGradient->getDevicePointer(), &inducedDipoleFieldGradientPolar->getDevicePointer(),
&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &extrapolatedDipoleFieldGradient->getDevicePointer(), &extrapolatedDipoleFieldGradientPolar->getDevicePointer(),
&polarizability->getDevicePointer()};
cu.executeKernel(iterateExtrapolatedKernel, iterateArgs, extrapolatedDipole->getSize());
}
cout << "CUDA"<< endl;
vector<float> d;
extrapolatedDipole->download(d);
for (int i = 0; i < maxExtrapolationOrder; i++) {
cout << "order "<<i<< endl;
for (int j = 0; j < numMultipoles; j++) {
int k = 3*(j+i*numMultipoles);
cout << d[k]<<" "<<d[k+1]<<" "<<d[k+2]<< endl;
}
}
// Take a linear combination of the µ_(n) components to form the total dipole
void* computeArgs[] = {&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &extrapolatedDipole->getDevicePointer(),
&extrapolatedDipolePolar->getDevicePointer()};
cu.executeKernel(computeExtrapolatedKernel, computeArgs, extrapolatedDipole->getSize());
}
void CudaCalcAmoebaMultipoleForceKernel::ensureMultipolesValid(ContextImpl& context) { void CudaCalcAmoebaMultipoleForceKernel::ensureMultipolesValid(ContextImpl& context) {
if (multipolesAreValid) { if (multipolesAreValid) {
int numParticles = cu.getNumAtoms(); int numParticles = cu.getNumAtoms();
......
...@@ -385,14 +385,17 @@ private: ...@@ -385,14 +385,17 @@ private:
const char* getSortKey() const {return "value.y";} const char* getSortKey() const {return "value.y";}
}; };
void initializeScaleFactors(); void initializeScaleFactors();
void computeInducedField(void** recipBoxVectorPointer);
bool iterateDipolesByDIIS(int iteration); bool iterateDipolesByDIIS(int iteration);
void computeExtrapolatedDipoles(void** recipBoxVectorPointer);
void ensureMultipolesValid(ContextImpl& context); void ensureMultipolesValid(ContextImpl& context);
template <class T, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments); template <class T, class T4, class M4> void computeSystemMultipoleMoments(ContextImpl& context, std::vector<double>& outputMultipoleMoments);
int numMultipoles, maxInducedIterations; int numMultipoles, maxInducedIterations, maxExtrapolationOrder;
int fixedFieldThreads, inducedFieldThreads, electrostaticsThreads; int fixedFieldThreads, inducedFieldThreads, electrostaticsThreads;
int gridSizeX, gridSizeY, gridSizeZ; int gridSizeX, gridSizeY, gridSizeZ;
double alpha, inducedEpsilon; double alpha, inducedEpsilon;
bool usePME, hasQuadrupoles, hasInitializedScaleFactors, hasInitializedFFT, multipolesAreValid; bool usePME, hasQuadrupoles, hasInitializedScaleFactors, hasInitializedFFT, multipolesAreValid;
AmoebaMultipoleForce::PolarizationType polarizationType;
CudaContext& cu; CudaContext& cu;
const System& system; const System& system;
std::vector<int3> covalentFlagValues; std::vector<int3> covalentFlagValues;
...@@ -422,6 +425,18 @@ private: ...@@ -422,6 +425,18 @@ private:
CudaArray* prevErrors; CudaArray* prevErrors;
CudaArray* diisMatrix; CudaArray* diisMatrix;
CudaArray* diisCoefficients; CudaArray* diisCoefficients;
CudaArray* extrapolatedDipole;
CudaArray* extrapolatedDipolePolar;
CudaArray* extrapolatedDipoleGk;
CudaArray* extrapolatedDipoleGkPolar;
CudaArray* inducedDipoleFieldGradient;
CudaArray* inducedDipoleFieldGradientPolar;
CudaArray* inducedDipoleFieldGradientGk;
CudaArray* inducedDipoleFieldGradientGkPolar;
CudaArray* extrapolatedDipoleFieldGradient;
CudaArray* extrapolatedDipoleFieldGradientPolar;
CudaArray* extrapolatedDipoleFieldGradientGk;
CudaArray* extrapolatedDipoleFieldGradientGkPolar;
CudaArray* polarizability; CudaArray* polarizability;
CudaArray* covalentFlags; CudaArray* covalentFlags;
CudaArray* polarizationGroupFlags; CudaArray* polarizationGroupFlags;
...@@ -444,6 +459,7 @@ private: ...@@ -444,6 +459,7 @@ private:
CUfunction pmeGridIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeFinishSpreadChargeKernel, pmeConvolutionKernel; CUfunction pmeGridIndexKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeFinishSpreadChargeKernel, pmeConvolutionKernel;
CUfunction pmeFixedPotentialKernel, pmeInducedPotentialKernel, pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel; CUfunction pmeFixedPotentialKernel, pmeInducedPotentialKernel, pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel, computePotentialKernel;
CUfunction recordDIISDipolesKernel, buildMatrixKernel; CUfunction recordDIISDipolesKernel, buildMatrixKernel;
CUfunction initExtrapolatedKernel, iterateExtrapolatedKernel, computeExtrapolatedKernel;
CUfunction pmeTransformMultipolesKernel, pmeTransformPotentialKernel; CUfunction pmeTransformMultipolesKernel, pmeTransformPotentialKernel;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel; CudaCalcAmoebaGeneralizedKirkwoodForceKernel* gkKernel;
static const int PmeOrder = 5; static const int PmeOrder = 5;
......
...@@ -607,3 +607,95 @@ extern "C" __global__ void updateInducedFieldByDIIS(real* __restrict__ inducedDi ...@@ -607,3 +607,95 @@ extern "C" __global__ void updateInducedFieldByDIIS(real* __restrict__ inducedDi
inducedDipolePolar[index] = sumPolar; inducedDipolePolar[index] = sumPolar;
} }
} }
extern "C" __global__ void initExtrapolatedDipoles(real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipole,
real* __restrict__ extrapolatedDipolePolar, real* __restrict__ inducedDipoleFieldGradient, real* __restrict__ inducedDipoleFieldGradientPolar
#ifdef USE_GK
, real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar,
real* __restrict__ inducedDipoleFieldGradientGk, real* __restrict__ inducedDipoleFieldGradientGkPolar
#endif
) {
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS*MAX_EXTRAPOLATION_ORDER; index += blockDim.x*gridDim.x) {
extrapolatedDipole[index] = inducedDipole[index];
extrapolatedDipolePolar[index] = inducedDipolePolar[index];
#ifdef USE_GK
extrapolatedDipoleGk[index] = inducedDipoleGk[index];
extrapolatedDipoleGkPolar[index] = inducedDipoleGkPolar[index];
#endif
}
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 6*NUM_ATOMS*MAX_EXTRAPOLATION_ORDER; index += blockDim.x*gridDim.x) {
inducedDipoleFieldGradient[index] = 0;
inducedDipoleFieldGradientPolar[index] = 0;
#ifdef USE_GK
inducedDipoleFieldGradientGk[index] = 0;
inducedDipoleFieldGradientGkPolar[index] = 0;
#endif
}
}
extern "C" __global__ void iterateExtrapolatedDipoles(int order, real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipole,
real* __restrict__ extrapolatedDipolePolar, real* __restrict__ inducedDipoleFieldGradient, real* __restrict__ inducedDipoleFieldGradientPolar,
long long* __restrict__ inducedDipoleField, long long* __restrict__ inducedDipoleFieldPolar, real* __restrict__ extrapolatedDipoleFieldGradient, real* __restrict__ extrapolatedDipoleFieldGradientPolar,
#ifdef USE_GK
real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar,
real* __restrict__ inducedDipoleFieldGradientGk, real* __restrict__ inducedDipoleFieldGradientGkPolar, long long* __restrict__ inducedDipoleFieldGk,
long long* __restrict__ inducedDipoleFieldGkPolar, real* __restrict__ extrapolatedDipoleFieldGradientGk, real* __restrict__ extrapolatedDipoleFieldGradientGkPolar,
#endif
const float* __restrict__ polarizability) {
const real fieldScale = 1/(real) 0x100000000;
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) {
int atom = index/3;
int component = index-3*atom;
int fieldIndex = atom+component*PADDED_NUM_ATOMS;
float polar = polarizability[atom];
real value = inducedDipoleField[fieldIndex]*fieldScale*polar;
inducedDipole[index] = value;
printf("%d %d %g %g\n", order, index, inducedDipoleField[fieldIndex]*fieldScale, value);
extrapolatedDipole[order*3*NUM_ATOMS+index] = value;
value = inducedDipoleFieldPolar[fieldIndex]*fieldScale*polar;
inducedDipolePolar[index] = value;
extrapolatedDipolePolar[order*3*NUM_ATOMS+index] = value;
#ifdef USE_GK
value = inducedDipoleFieldGk[fieldIndex]*fieldScale*polar;
inducedDipoleGk[index] = value;
extrapolatedDipoleGk[order*3*NUM_ATOMS+index] = value;
value = inducedDipoleFieldGkPolar[fieldIndex]*fieldScale*polar;
inducedDipoleGkPolar[index] = value;
extrapolatedDipoleGkPolar[order*3*NUM_ATOMS+index] = value;
#endif
}
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 6*NUM_ATOMS; index += blockDim.x*gridDim.x) {
extrapolatedDipoleFieldGradient[order*6*NUM_ATOMS+index] = inducedDipoleFieldGradient[index];
extrapolatedDipoleFieldGradientPolar[order*6*NUM_ATOMS+index] = inducedDipoleFieldGradientPolar[index];
#ifdef USE_GK
extrapolatedDipoleFieldGradientGk[order*6*NUM_ATOMS+index] = inducedDipoleFieldGradientGk[index];
extrapolatedDipoleFieldGradientGkPolar[order*6*NUM_ATOMS+index] = inducedDipoleFieldGradientGkPolar[index];
#endif
}
}
extern "C" __global__ void computeExtrapolatedDipoles(real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real* __restrict__ extrapolatedDipole,
real* __restrict__ extrapolatedDipolePolar
#ifdef USE_GK
, real* __restrict__ inducedDipoleGk, real* __restrict__ inducedDipoleGkPolar, real* __restrict__ extrapolatedDipoleGk, real* __restrict__ extrapolatedDipoleGkPolar
#endif
) {
real coeff[] = {EXTRAPOLATION_COEFFICIENTS_SUM};
for (int index = blockIdx.x*blockDim.x+threadIdx.x; index < 3*NUM_ATOMS; index += blockDim.x*gridDim.x) {
real sum = 0, sumPolar = 0, sumGk = 0, sumGkPolar = 0;
for (int order = 0; order < MAX_EXTRAPOLATION_ORDER; order++) {
sum += extrapolatedDipole[order*3*NUM_ATOMS+index]*coeff[order];
sumPolar += extrapolatedDipolePolar[order*3*NUM_ATOMS+index]*coeff[order];
#ifdef USE_GK
sumGk += extrapolatedDipoleGk[order*3*NUM_ATOMS+index]*coeff[order];
sumGkPolar += extrapolatedDipoleGkPolar[order*3*NUM_ATOMS+index]*coeff[order];
#endif
}
inducedDipole[index] = sum;
inducedDipolePolar[index] = sumPolar;
#ifdef USE_GK
inducedDipoleGk[index] = sumGk;
inducedDipoleGkPolar[index] = sumGkPolar;
#endif
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment