Commit 4cebdb5a authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to implement AmoebaMultipoleForce: various bug fixes

parent c0d1eb7b
......@@ -1154,14 +1154,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmeDefines["NUM_ATOMS"] = cu.intToString(numMultipoles);
pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(138.9354558456);
// pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(alpha*alpha));
pmeDefines["GRID_SIZE_X"] = cu.intToString(gridSizeX);
pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY);
pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
// pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0));
pmeDefines["M_PI"] = cu.doubleToString(M_PI);
// if (cu.getUseDoublePrecision())
// pmeDefines["USE_DOUBLE_PRECISION"] = "1";
pmeDefines["SQRT_PI"] = cu.doubleToString(sqrt(M_PI));
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaAmoebaKernelSources::multipolePme, pmeDefines);
pmeUpdateBsplinesKernel = cu.getKernel(module, "updateBsplines");
pmeAtomRangeKernel = cu.getKernel(module, "findAtomRangeForGrid");
......@@ -1401,7 +1398,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &inducedField->getDevicePointer(),
&inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize);
cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
inducedDipoleErrors->download(errors);
double total1 = 0.0, total2 = 0.0;
for (int j = 0; j < (int) errors.size(); j++) {
......@@ -1525,7 +1522,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &inducedField->getDevicePointer(),
&inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize);
cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize, cu.ThreadBlockSize, cu.ThreadBlockSize*elementSize*2);
inducedDipoleErrors->download(errors);
double total1 = 0.0, total2 = 0.0;
for (int j = 0; j < (int) errors.size(); j++) {
......
......@@ -34,7 +34,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
real ralpha = EWALD_ALPHA*r;
real bn0 = erfc(ralpha)*rI;
real alsq2 = 2*EWALD_ALPHA*EWALD_ALPHA;
real alsq2n = RECIP(SQRT(M_PI)*EWALD_ALPHA);
real alsq2n = RECIP(SQRT_PI*EWALD_ALPHA);
real exp2a = expf(-(ralpha*ralpha));
alsq2n *= alsq2;
real bn1 = (bn0+alsq2n*exp2a)*rI*rI;
......@@ -129,7 +129,7 @@ extern "C" __global__ void computeInducedField(
#endif
__shared__ AtomData localData[THREAD_BLOCK_SIZE];
#ifndef ENABLE_SHUFFLE
__shared__ real tempBuffer[3*THREAD_BLOCK_SIZE];
// __shared__ real tempBuffer[3*THREAD_BLOCK_SIZE];
#endif
do {
......@@ -194,11 +194,12 @@ extern "C" __global__ void computeInducedField(
localData[threadIdx.x].fieldPolar = make_real3(0);
#ifdef USE_CUTOFF
unsigned int flags = (numTiles <= maxTiles ? interactionFlags[pos] : 0xFFFFFFFF);
if (flags != 0xFFFFFFFF) {
if (flags == 0) { // TODO: Figure out what the flags != 0 case doesn't work!!!
// if (flags != 0xFFFFFFFF) {
if (flags == 0) {
// No interactions in this tile.
}
else {
/* else {
// Compute only a subset of the interactions in this tile.
for (int j = 0; j < TILE_SIZE; j++) {
......@@ -213,6 +214,8 @@ extern "C" __global__ void computeInducedField(
real3 fields[4];
computeOneInteraction(data, localData[atom2], delta, fields);
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
data.field += fields[0];
data.fieldPolar += fields[1];
#ifdef ENABLE_SHUFFLE
for (int i = 16; i >= 1; i /= 2) {
fields[2].x += __shfl_xor(fields[2].x, i, 32);
......@@ -258,7 +261,7 @@ extern "C" __global__ void computeInducedField(
}
}
}
}
}*/
}
else
#endif
......@@ -317,7 +320,11 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors) {
extern __shared__ real2 buffer[];
const float polarSOR = 0.55f;
const real term = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT(M_PI);
#ifdef USE_EWALD
const real ewaldScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
#else
const real ewaldScale = 0;
#endif
const real fieldScale = 1/(real) 0xFFFFFFFF;
real sumErrors = 0;
real sumPolarErrors = 0;
......@@ -328,8 +335,8 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
int fieldIndex = atom+component*PADDED_NUM_ATOMS;
real previousDipole = inducedDipole[dipoleIndex];
real previousDipolePolar = inducedDipolePolar[dipoleIndex];
real newDipole = scale*((fixedField[fieldIndex]+inducedField[fieldIndex])*fieldScale+term*previousDipole);
real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+inducedFieldPolar[fieldIndex])*fieldScale+term*previousDipolePolar);
real newDipole = scale*((fixedField[fieldIndex]+inducedField[fieldIndex])*fieldScale+ewaldScale*previousDipole);
real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+inducedFieldPolar[fieldIndex])*fieldScale+ewaldScale*previousDipolePolar);
newDipole = previousDipole + polarSOR*(newDipole-previousDipole);
newDipolePolar = previousDipolePolar + polarSOR*(newDipolePolar-previousDipolePolar);
inducedDipole[dipoleIndex] = newDipole;
......
......@@ -159,6 +159,7 @@ extern "C" __global__ void findAtomRangeForGrid(int2* __restrict__ pmeAtomGridIn
pmeAtomRange[j] = NUM_ATOMS;
}
}
extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, real2* __restrict__ pmeGrid, int2* __restrict__ pmeAtomGridIndex, int* __restrict__ pmeAtomRange,
const real4* __restrict__ theta1, const real4* __restrict__ theta2, const real4* __restrict__ theta3, real4 invPeriodicBoxSize) {
......@@ -468,7 +469,7 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
phi[20*m+17] = tuv102;
phi[20*m+18] = tuv012;
phi[20*m+19] = tuv111;
real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT(M_PI);
real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI;
long long fieldx = (long long) ((dipoleScale*labFrameDipole[m*3]-GRID_SIZE_X*invPeriodicBoxSize.x*tuv100)*0xFFFFFFFF);
fieldBuffers[m] = fieldx;
fieldPolarBuffers[m] = fieldx;
......
......@@ -952,7 +952,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
real alsq2 = 2*EWALD_ALPHA*EWALD_ALPHA;
real alsq2n = 0;
if (EWALD_ALPHA > 0)
alsq2n = RECIP(SQRT(M_PI)*EWALD_ALPHA);
alsq2n = RECIP(SQRT_PI*EWALD_ALPHA);
real exp2a = EXP(-(ralpha*ralpha));
real rr1 = RECIP(r);
......@@ -1012,7 +1012,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
*/
__device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) {
real term = 2*EWALD_ALPHA*EWALD_ALPHA;
real fterm = -EWALD_ALPHA/SQRT(M_PI);
real fterm = -EWALD_ALPHA/SQRT_PI;
real cii = atom1.q*atom1.q;
real dii = dot(atom1.dipole, atom1.dipole);
real qii = 2*(atom1.quadrupoleXX*atom1.quadrupoleXX +
......@@ -1030,5 +1030,5 @@ __device__ void computeSelfEnergyAndTorque(AtomData& atom1, real& energy) {
// self-torque for PME
real3 ui = atom1.inducedDipole+atom1.inducedDipolePolar;
atom1.torque += ((2/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT(M_PI))*cross(atom1.dipole, ui);
atom1.torque += ((2/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT_PI)*cross(atom1.dipole, ui);
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment