Commit c0d1eb7b authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to convert AmoebaMultipoleForce: PME with mutual induced polarization now works

parent 3af4da86
...@@ -1387,6 +1387,9 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1387,6 +1387,9 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()}; &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()};
cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms()); cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms());
// Iterate until the dipoles converge.
vector<float2> errors; vector<float2> errors;
for (int i = 0; i < maxInducedIterations; i++) { for (int i = 0; i < maxInducedIterations; i++) {
cu.clearBuffer(*inducedField); cu.clearBuffer(*inducedField);
...@@ -1492,32 +1495,46 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1492,32 +1495,46 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&pmePhidp->getDevicePointer(), &pmeIgrid->getDevicePointer(), &pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(), &pmePhidp->getDevicePointer(), &pmeIgrid->getDevicePointer(), &pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(),
&pmeTheta3->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()}; &pmeTheta3->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms()); cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
// void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
// &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()}; // Iterate until the dipoles converge.
// cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
vector<float2> errors;
for (int i = 0; i < maxInducedIterations; i++) {
// vector<float2> errors; cu.clearBuffer(*inducedField);
// for (int i = 0; i < maxInducedIterations; i++) { cu.clearBuffer(*inducedFieldPolar);
// cu.clearBuffer(*inducedField); void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
// cu.clearBuffer(*inducedFieldPolar); &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
// void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(), &nb.getInteractingTiles().getDevicePointer(), &nb.getInteractionCount().getDevicePointer(), cu.getPeriodicBoxSizePointer(),
// &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices, cu.getInvPeriodicBoxSizePointer(), &maxTiles, &nb.getInteractionFlags().getDevicePointer(),
// &dampingAndThole->getDevicePointer()}; &dampingAndThole->getDevicePointer()};
// cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
// void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &inducedField->getDevicePointer(), cu.executeKernel(pmeSpreadInducedDipolesKernel, pmeSpreadInducedDipolesArgs, cu.getNumAtoms());
// &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), if (cu.getUseDoublePrecision())
// &polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()}; cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
// cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize); else
// inducedDipoleErrors->download(errors); cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
// double total1 = 0.0, total2 = 0.0; cu.executeKernel(pmeConvolutionKernel, pmeConvolutionArgs, cu.getNumAtoms());
// for (int j = 0; j < (int) errors.size(); j++) { if (cu.getUseDoublePrecision())
// total1 += errors[j].x; cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
// total2 += errors[j].y; else
// } cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
// if (48.033324*sqrt(max(total1, total2)/cu.getNumAtoms()) < inducedEpsilon) cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
// break; void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
// } &inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &inducedField->getDevicePointer(),
&inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
&polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize);
inducedDipoleErrors->download(errors);
double total1 = 0.0, total2 = 0.0;
for (int j = 0; j < (int) errors.size(); j++) {
total1 += errors[j].x;
total2 += errors[j].y;
}
if (48.033324*sqrt(max(total1, total2)/cu.getNumAtoms()) < inducedEpsilon)
break;
}
// Compute electrostatic force. // Compute electrostatic force.
......
...@@ -21,6 +21,67 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res ...@@ -21,6 +21,67 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data.thole = temp.y; data.thole = temp.y;
} }
#ifdef USE_EWALD
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, real3* fields) {
real scale1, scale2;
real r2 = dot(deltaR, deltaR);
if (r2 < CUTOFF_SQUARED) {
real rI = RSQRT(r2);
real r = RECIP(rI);
// calculate the error function damping terms
real ralpha = EWALD_ALPHA*r;
real bn0 = erfc(ralpha)*rI;
real alsq2 = 2*EWALD_ALPHA*EWALD_ALPHA;
real alsq2n = RECIP(SQRT(M_PI)*EWALD_ALPHA);
real exp2a = expf(-(ralpha*ralpha));
alsq2n *= alsq2;
real bn1 = (bn0+alsq2n*exp2a)*rI*rI;
alsq2n *= alsq2;
real bn2 = (3*bn1+alsq2n*exp2a)*rI*rI;
// compute the error function scaled and unscaled terms
real scale3 = 1;
real scale5 = 1;
real damp = atom1.damp*atom2.damp;
if (damp != 0) {
real ratio = (r/damp);
ratio = ratio*ratio*ratio;
float pgamma = atom1.thole < atom2.thole ? atom1.thole : atom2.thole;
damp = -pgamma*ratio;
if (damp > -50) {
real expdamp = EXP(damp);
scale3 = 1 - expdamp;
scale5 = 1 - expdamp*(1-damp);
}
}
real dsc3 = scale3;
real dsc5 = scale5;
real r3 = (r*r2);
real r5 = (r3*r2);
real rr3 = (1-dsc3)/r3;
real rr5 = 3*(1-dsc5)/r5;
scale1 = rr3 - bn1;
scale2 = bn2 - rr5;
}
else {
scale1 = 0;
scale2 = 0;
}
real dDotDelta = scale2*dot(deltaR, atom2.inducedDipole);
fields[0] = scale1*atom2.inducedDipole + dDotDelta*deltaR;
dDotDelta = scale2*dot(deltaR, atom2.inducedDipolePolar);
fields[1] = scale1*atom2.inducedDipolePolar + dDotDelta*deltaR;
dDotDelta = scale2*dot(deltaR, atom1.inducedDipole);
fields[2] = scale1*atom1.inducedDipole + dDotDelta*deltaR;
dDotDelta = scale2*dot(deltaR, atom1.inducedDipolePolar);
fields[3] = scale1*atom1.inducedDipolePolar + dDotDelta*deltaR;
}
#else
__device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, real3* fields) { __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 deltaR, real3* fields) {
real rI = RSQRT(dot(deltaR, deltaR)); real rI = RSQRT(dot(deltaR, deltaR));
real r = RECIP(rI); real r = RECIP(rI);
...@@ -43,6 +104,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de ...@@ -43,6 +104,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
dDotDelta = rr5*dot(deltaR, atom1.inducedDipolePolar); dDotDelta = rr5*dot(deltaR, atom1.inducedDipolePolar);
fields[3] = rr3*atom1.inducedDipolePolar + dDotDelta*deltaR; fields[3] = rr3*atom1.inducedDipolePolar + dDotDelta*deltaR;
} }
#endif
/** /**
* Compute the mutual induced field. * Compute the mutual induced field.
...@@ -139,10 +201,10 @@ extern "C" __global__ void computeInducedField( ...@@ -139,10 +201,10 @@ extern "C" __global__ void computeInducedField(
else { else {
// Compute only a subset of the interactions in this tile. // Compute only a subset of the interactions in this tile.
for (j = 0; j < TILE_SIZE; j++) { for (int j = 0; j < TILE_SIZE; j++) {
if ((flags&(1<<j)) != 0) { if ((flags&(1<<j)) != 0) {
int atom2 = tbx+j; int atom2 = tbx+j;
real3 delta = make_real3(localData[atom2].posq.x-data.posq.x, localData[atom2].posq.y-data.posq.y, localData[atom2].posq.z-data.posq.z); real3 delta = localData[atom2].pos-data.pos;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y; delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
...@@ -254,18 +316,20 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__ ...@@ -254,18 +316,20 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar, real* __restrict__ inducedDipole, const long long* __restrict__ inducedField, const long long* __restrict__ inducedFieldPolar, real* __restrict__ inducedDipole,
real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors) { real* __restrict__ inducedDipolePolar, const float* __restrict__ polarizability, float2* __restrict__ errors) {
extern __shared__ real2 buffer[]; extern __shared__ real2 buffer[];
float polarSOR = 0.55f; const float polarSOR = 0.55f;
const real term = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT(M_PI);
const real fieldScale = 1/(real) 0xFFFFFFFF;
real sumErrors = 0; real sumErrors = 0;
real sumPolarErrors = 0; real sumPolarErrors = 0;
for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) { for (int atom = blockIdx.x*blockDim.x + threadIdx.x; atom < NUM_ATOMS; atom += blockDim.x*gridDim.x) {
real scale = polarizability[atom]/(real) 0xFFFFFFFF; real scale = polarizability[atom];
for (int component = 0; component < 3; component++) { for (int component = 0; component < 3; component++) {
int dipoleIndex = 3*atom+component; int dipoleIndex = 3*atom+component;
int fieldIndex = atom+component*PADDED_NUM_ATOMS; int fieldIndex = atom+component*PADDED_NUM_ATOMS;
real previousDipole = inducedDipole[dipoleIndex]; real previousDipole = inducedDipole[dipoleIndex];
real previousDipolePolar = inducedDipolePolar[dipoleIndex]; real previousDipolePolar = inducedDipolePolar[dipoleIndex];
real newDipole = scale*(fixedField[fieldIndex]+inducedField[fieldIndex]); real newDipole = scale*((fixedField[fieldIndex]+inducedField[fieldIndex])*fieldScale+term*previousDipole);
real newDipolePolar = scale*(fixedFieldPolar[fieldIndex]+inducedFieldPolar[fieldIndex]); real newDipolePolar = scale*((fixedFieldPolar[fieldIndex]+inducedFieldPolar[fieldIndex])*fieldScale+term*previousDipolePolar);
newDipole = previousDipole + polarSOR*(newDipole-previousDipole); newDipole = previousDipole + polarSOR*(newDipole-previousDipole);
newDipolePolar = previousDipolePolar + polarSOR*(newDipolePolar-previousDipolePolar); newDipolePolar = previousDipolePolar + polarSOR*(newDipolePolar-previousDipolePolar);
inducedDipole[dipoleIndex] = newDipole; inducedDipole[dipoleIndex] = newDipole;
......
...@@ -864,16 +864,16 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_ ...@@ -864,16 +864,16 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
} }
extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip, extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip,
real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real4 invPeriodicBoxSize) { long long* __restrict__ inducedField, long long* __restrict__ inducedFieldPolar, real4 invPeriodicBoxSize) {
real xscale = GRID_SIZE_X*invPeriodicBoxSize.x; real xscale = GRID_SIZE_X*invPeriodicBoxSize.x*0xFFFFFFFF;
real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y; real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y*0xFFFFFFFF;
real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z; real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z*0xFFFFFFFF;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
inducedDipole[3*i] -= xscale*phid[10*i+1]; inducedField[i] -= (long long) (xscale*phid[10*i+1]);
inducedDipole[3*i+1] -= yscale*phid[10*i+2]; inducedField[i+PADDED_NUM_ATOMS] -= (long long) (yscale*phid[10*i+2]);
inducedDipole[3*i+2] -= zscale*phid[10*i+3]; inducedField[i+PADDED_NUM_ATOMS*2] -= (long long) (zscale*phid[10*i+3]);
inducedDipolePolar[3*i] -= xscale*phip[10*i+1]; inducedFieldPolar[i] -= (long long) (xscale*phip[10*i+1]);
inducedDipolePolar[3*i+1] -= yscale*phip[10*i+2]; inducedFieldPolar[i+PADDED_NUM_ATOMS] -= (long long) (yscale*phip[10*i+2]);
inducedDipolePolar[3*i+2] -= zscale*phip[10*i+3]; inducedFieldPolar[i+PADDED_NUM_ATOMS*2] -= (long long) (zscale*phip[10*i+3]);
} }
} }
...@@ -297,7 +297,6 @@ __device__ void computeOneInteractionF2(AtomData& atom1, volatile AtomData& atom ...@@ -297,7 +297,6 @@ __device__ void computeOneInteractionF2(AtomData& atom1, volatile AtomData& atom
ftm21 += prefactor1*((sci4*atom1.inducedDipolePolar.x + scip4*atom1.inducedDipole.x)); ftm21 += prefactor1*((sci4*atom1.inducedDipolePolar.x + scip4*atom1.inducedDipole.x));
ftm22 += prefactor1*((sci4*atom1.inducedDipolePolar.y + scip4*atom1.inducedDipole.y)); ftm22 += prefactor1*((sci4*atom1.inducedDipolePolar.y + scip4*atom1.inducedDipole.y));
ftm23 += prefactor1*((sci4*atom1.inducedDipolePolar.z + scip4*atom1.inducedDipole.z)); ftm23 += prefactor1*((sci4*atom1.inducedDipolePolar.z + scip4*atom1.inducedDipole.z));
}
#endif #endif
#ifdef APPLY_SCALE #ifdef APPLY_SCALE
...@@ -461,8 +460,8 @@ __device__ void computeOneInteractionF2(AtomData& atom1, volatile AtomData& atom ...@@ -461,8 +460,8 @@ __device__ void computeOneInteractionF2(AtomData& atom1, volatile AtomData& atom
ftm23 += prefactor1*(sci3*atom2.inducedDipolePolar.z + scip3*atom2.inducedDipole.z); ftm23 += prefactor1*(sci3*atom2.inducedDipolePolar.z + scip3*atom2.inducedDipole.z);
real sci34; real sci34;
real sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr; sci4 = atom2.inducedDipole.x*xr + atom2.inducedDipole.y*yr + atom2.inducedDipole.z*zr;
real scip4 = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr; scip4 = atom2.inducedDipolePolar.x*xr + atom2.inducedDipolePolar.y*yr + atom2.inducedDipolePolar.z*zr;
sci34 = (sci3*scip4+scip3*sci4); sci34 = (sci3*scip4+scip3*sci4);
#ifdef APPLY_SCALE #ifdef APPLY_SCALE
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment