Commit 35c974f6 authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimizations to PME

parent 92a338cf
...@@ -1018,18 +1018,7 @@ void kCalculateAmoebaPMEInducedDipoleField(amoebaGpuContext amoebaGpu) ...@@ -1018,18 +1018,7 @@ void kCalculateAmoebaPMEInducedDipoleField(amoebaGpuContext amoebaGpu)
*/ */
void kCalculateAmoebaPMEInducedDipoleForces(amoebaGpuContext amoebaGpu) void kCalculateAmoebaPMEInducedDipoleForces(amoebaGpuContext amoebaGpu)
{ {
// Perform PME for the induced dipoles.
gpuContext gpu = amoebaGpu->gpuContext; gpuContext gpu = amoebaGpu->gpuContext;
kGridSpreadInducedDipoles_kernel<<<10*gpu->sim.blocks, 64>>>();
LAUNCHERROR("kGridSpreadInducedDipoles");
cufftExecC2C(gpu->fftplan, gpu->psPmeGrid->_pDevData, gpu->psPmeGrid->_pDevData, CUFFT_FORWARD);
kAmoebaReciprocalConvolution_kernel<<<gpu->sim.blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kAmoebaReciprocalConvolution");
cufftExecC2C(gpu->fftplan, gpu->psPmeGrid->_pDevData, gpu->psPmeGrid->_pDevData, CUFFT_INVERSE);
int potentialThreads = (gpu->sm_version >= SM_20 ? 256 : (gpu->sm_version >= SM_12 ? 128 : 64));
kComputeInducedPotentialFromGrid_kernel<<<gpu->sim.blocks, potentialThreads>>>();
LAUNCHERROR("kComputeInducedPotentialFromGrid");
kComputeInducedDipoleForceAndEnergy_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>(); kComputeInducedDipoleForceAndEnergy_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kComputeInducedDipoleForceAndEnergy"); LAUNCHERROR("kComputeInducedDipoleForceAndEnergy");
cudaComputeAmoebaMapTorquesAndAddTotalForce2(amoebaGpu, amoebaGpu->psTorque, gpu->psForce4); cudaComputeAmoebaMapTorquesAndAddTotalForce2(amoebaGpu, amoebaGpu->psTorque, gpu->psForce4);
......
...@@ -298,7 +298,8 @@ if( atomI == targetAtom ){ ...@@ -298,7 +298,8 @@ if( atomI == targetAtom ){
for (unsigned int j = 0; j < GRID; j++) for (unsigned int j = 0; j < GRID; j++)
{ {
if ((flags&(1<<j)) != 0)
{
unsigned int jIdx = (flags == 0xFFFFFFFF) ? tj : j; unsigned int jIdx = (flags == 0xFFFFFFFF) ? tj : j;
unsigned int atomJ = y + jIdx; unsigned int atomJ = y + jIdx;
...@@ -386,6 +387,7 @@ if( atomI == targetAtom ){ ...@@ -386,6 +387,7 @@ if( atomI == targetAtom ){
psA[jIdx].torque[2] += sA[threadIdx.x].tempTorque[2] + sA[threadIdx.x+16].tempTorque[2]; psA[jIdx].torque[2] += sA[threadIdx.x].tempTorque[2] + sA[threadIdx.x+16].tempTorque[2];
} }
} }
}
tj = (tj + 1) & (GRID - 1); tj = (tj + 1) & (GRID - 1);
......
...@@ -228,6 +228,7 @@ if( atomI == targetAtom || targetAtom == (y+j) ){ ...@@ -228,6 +228,7 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
for (unsigned int j = 0; j < GRID; j++){ for (unsigned int j = 0; j < GRID; j++){
if ((flags&(1<<j)) != 0) {
unsigned int jIdx = (flags == 0xFFFFFFFF) ? tj : j; unsigned int jIdx = (flags == 0xFFFFFFFF) ? tj : j;
if( bExclusionFlag ){ if( bExclusionFlag ){
getMaskedDScaleFactor( jIdx, dScaleMask, &dScaleValue ); getMaskedDScaleFactor( jIdx, dScaleMask, &dScaleValue );
...@@ -236,9 +237,9 @@ if( atomI == targetAtom || targetAtom == (y+j) ){ ...@@ -236,9 +237,9 @@ if( atomI == targetAtom || targetAtom == (y+j) ){
float4 ijField[3]; float4 ijField[3];
calculateFixedFieldRealSpacePairIxn_kernel( localParticle, psA[jIdx], dScaleValue, pScaleValue, ijField calculateFixedFieldRealSpacePairIxn_kernel( localParticle, psA[jIdx], dScaleValue, pScaleValue, ijField
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
, pullBack , pullBack
#endif #endif
); );
unsigned int outOfBounds = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+jIdx) >= cAmoebaSim.numberOfAtoms) ) ? 1 : 0; unsigned int outOfBounds = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+jIdx) >= cAmoebaSim.numberOfAtoms) ) ? 1 : 0;
...@@ -358,6 +359,7 @@ if( (atomI == targetAtom || (y + jIdx) == targetAtom) ){ ...@@ -358,6 +359,7 @@ if( (atomI == targetAtom || (y + jIdx) == targetAtom) ){
} }
} }
#endif #endif
}
tj = (tj + 1) & (GRID - 1); tj = (tj + 1) & (GRID - 1);
} // j-loop block } // j-loop block
......
...@@ -231,16 +231,17 @@ if( atomI == targetAtom || (y+j) == targetAtom ){ ...@@ -231,16 +231,17 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
for (unsigned int j = 0; j < GRID; j++) for (unsigned int j = 0; j < GRID; j++)
{ {
if ((flags&(1<<j)) != 0)
{
unsigned int jIdx = (flags == 0xFFFFFFFF) ? tj : j; unsigned int jIdx = (flags == 0xFFFFFFFF) ? tj : j;
float4 ijField[3]; float4 ijField[3];
// load coords, charge, ... // load coords, charge, ...
calculatePmeDirectMutualInducedFieldPairIxn_kernel( localParticle, psA[jIdx], uscale, ijField calculatePmeDirectMutualInducedFieldPairIxn_kernel( localParticle, psA[jIdx], uscale, ijField
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
, pullBack , pullBack
#endif #endif
); );
unsigned int mask = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+jIdx) >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1; unsigned int mask = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+jIdx) >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1;
...@@ -359,6 +360,7 @@ if( atomI == targetAtom || (y+jIdx) == targetAtom ){ ...@@ -359,6 +360,7 @@ if( atomI == targetAtom || (y+jIdx) == targetAtom ){
debugArray[index].w = flag; debugArray[index].w = flag;
} }
#endif #endif
}
tj = (tj + 1) & (GRID - 1); tj = (tj + 1) & (GRID - 1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment