Commit 35c974f6 authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimizations to PME

parent 92a338cf
......@@ -1018,18 +1018,7 @@ void kCalculateAmoebaPMEInducedDipoleField(amoebaGpuContext amoebaGpu)
*/
void kCalculateAmoebaPMEInducedDipoleForces(amoebaGpuContext amoebaGpu)
{
// Perform PME for the induced dipoles.
gpuContext gpu = amoebaGpu->gpuContext;
kGridSpreadInducedDipoles_kernel<<<10*gpu->sim.blocks, 64>>>();
LAUNCHERROR("kGridSpreadInducedDipoles");
cufftExecC2C(gpu->fftplan, gpu->psPmeGrid->_pDevData, gpu->psPmeGrid->_pDevData, CUFFT_FORWARD);
kAmoebaReciprocalConvolution_kernel<<<gpu->sim.blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kAmoebaReciprocalConvolution");
cufftExecC2C(gpu->fftplan, gpu->psPmeGrid->_pDevData, gpu->psPmeGrid->_pDevData, CUFFT_INVERSE);
int potentialThreads = (gpu->sm_version >= SM_20 ? 256 : (gpu->sm_version >= SM_12 ? 128 : 64));
kComputeInducedPotentialFromGrid_kernel<<<gpu->sim.blocks, potentialThreads>>>();
LAUNCHERROR("kComputeInducedPotentialFromGrid");
kComputeInducedDipoleForceAndEnergy_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kComputeInducedDipoleForceAndEnergy");
cudaComputeAmoebaMapTorquesAndAddTotalForce2(amoebaGpu, amoebaGpu->psTorque, gpu->psForce4);
......
......@@ -298,7 +298,8 @@ if( atomI == targetAtom ){
for (unsigned int j = 0; j < GRID; j++)
{
if ((flags&(1<<j)) != 0)
{
unsigned int jIdx = (flags == 0xFFFFFFFF) ? tj : j;
unsigned int atomJ = y + jIdx;
......@@ -386,6 +387,7 @@ if( atomI == targetAtom ){
psA[jIdx].torque[2] += sA[threadIdx.x].tempTorque[2] + sA[threadIdx.x+16].tempTorque[2];
}
}
}
tj = (tj + 1) & (GRID - 1);
......
......@@ -228,6 +228,7 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
for (unsigned int j = 0; j < GRID; j++){
if ((flags&(1<<j)) != 0) {
unsigned int jIdx = (flags == 0xFFFFFFFF) ? tj : j;
if( bExclusionFlag ){
getMaskedDScaleFactor( jIdx, dScaleMask, &dScaleValue );
......@@ -236,9 +237,9 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
float4 ijField[3];
calculateFixedFieldRealSpacePairIxn_kernel( localParticle, psA[jIdx], dScaleValue, pScaleValue, ijField
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
, pullBack
#endif
#endif
);
unsigned int outOfBounds = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+jIdx) >= cAmoebaSim.numberOfAtoms) ) ? 1 : 0;
......@@ -358,6 +359,7 @@ if( (atomI == targetAtom || (y + jIdx) == targetAtom) ){
}
}
#endif
}
tj = (tj + 1) & (GRID - 1);
} // j-loop block
......
......@@ -231,16 +231,17 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
for (unsigned int j = 0; j < GRID; j++)
{
if ((flags&(1<<j)) != 0)
{
unsigned int jIdx = (flags == 0xFFFFFFFF) ? tj : j;
float4 ijField[3];
// load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel( localParticle, psA[jIdx], uscale, ijField
#ifdef AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG
, pullBack
#endif
#endif
);
unsigned int mask = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+jIdx) >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1;
......@@ -359,6 +360,7 @@ if( atomI == targetAtom || (y+jIdx) == targetAtom ){
debugArray[index].w = flag;
}
#endif
}
tj = (tj + 1) & (GRID - 1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment