Commit 50a15fb0 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Mods to reduce calculation of ixns within blocks for Vdw

parent e6c19b54
......@@ -268,14 +268,6 @@ if( atomI == targetAtom ){
// No interactions in this block.
} else {
if (lasty != y) {
// load shared data
loadPmeDirectElectrostaticShared( &(sA[threadIdx.x]), (y+tgx) );
}
sA[threadIdx.x].force[0] = 0.0f;
sA[threadIdx.x].force[1] = 0.0f;
sA[threadIdx.x].force[2] = 0.0f;
......
......@@ -250,6 +250,12 @@ void kCalculateAmoebaVdw14_7Reduction_kernel( float* inputForce, float4* outputF
}
}
__device__ void sumTempBuffer( Vdw14_7Particle& atomI, Vdw14_7Particle& atomJ ){
atomI.tempForce[0] += atomJ.tempForce[0];
atomI.tempForce[1] += atomJ.tempForce[1];
atomI.tempForce[2] += atomJ.tempForce[2];
}
static void kCalculateAmoebaVdw14_7Reduction(amoebaGpuContext amoebaGpu, CUDAStream<float>* vdwOutputArray, CUDAStream<float4>* forceOutputArray )
{
......
......@@ -212,12 +212,12 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
if (flags == 0) {
} else {
#endif
// zero shared fields
zeroVdw14_7SharedForce( &(sA[threadIdx.x]) );
if( bExclusionFlag ){
if( bExclusionFlag )
{
unsigned int xi = x >> GRIDBITS;
unsigned int yi = y >> GRIDBITS;
......@@ -231,20 +231,27 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
{
float ijForce[3];
#ifdef USE_CUTOFF
if ((flags&(1<<j)) != 0)
{
unsigned int jIdx = (flags == 0xFFFFFFFF) ? tj : j;
#else
unsigned int jIdx = tj;
#endif
// get combined sigma and epsilon
float combindedSigma;
float combindedEpsilon;
getVdw14_7CombindedSigmaEpsilon_kernel( sigmaCombiningRule, localParticle.sigma, psA[tj].sigma, &combindedSigma,
epsilonCombiningRule, localParticle.epsilon, psA[tj].epsilon, &combindedEpsilon );
getVdw14_7CombindedSigmaEpsilon_kernel( sigmaCombiningRule, localParticle.sigma, psA[jIdx].sigma, &combindedSigma,
epsilonCombiningRule, localParticle.epsilon, psA[jIdx].epsilon, &combindedEpsilon );
// calculate force
float energy;
ijForce[0] = psA[tj].x - localParticle.x;
ijForce[1] = psA[tj].y - localParticle.y;
ijForce[2] = psA[tj].z - localParticle.z;
ijForce[0] = psA[jIdx].x - localParticle.x;
ijForce[1] = psA[jIdx].y - localParticle.y;
ijForce[2] = psA[jIdx].z - localParticle.z;
if( cAmoebaSim.vdwUsePBC )
{
ijForce[0] -= floor(ijForce[0]*cSim.invPeriodicBoxSizeX+0.5f)*cSim.periodicBoxSizeX;
......@@ -259,32 +266,72 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
// mask out excluded ixns
unsigned int mask = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+tj) >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1;
unsigned int mask = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+jIdx) >= cAmoebaSim.numberOfAtoms) ) ? 0 : 1;
if( mask && bExclusionFlag ){
unsigned int maskIndex = 1 << tj;
unsigned int maskIndex = 1 << jIdx;
mask = (exclusionMask & maskIndex) ? 0 : 1;
}
if( mask == 0 )
{
energy = ijForce[0] = ijForce[1] = ijForce[2] = 0.0f;
}
// accumulate force for atomI
forceSum[0] += mask ? ijForce[0] : 0.0f;
forceSum[1] += mask ? ijForce[1] : 0.0f;
forceSum[2] += mask ? ijForce[2] : 0.0f;
forceSum[0] += ijForce[0];
forceSum[1] += ijForce[1];
forceSum[2] += ijForce[2];
// accumulate force for atomJ
totalEnergy += energy;
psA[tj].force[0] -= mask ? ijForce[0] : 0.0f;
psA[tj].force[1] -= mask ? ijForce[1] : 0.0f;
psA[tj].force[2] -= mask ? ijForce[2] : 0.0f;
#ifndef USE_CUTOFF
psA[jIdx].force[0] -= ijForce[0];
psA[jIdx].force[1] -= ijForce[1];
psA[jIdx].force[2] -= ijForce[2];
#else
if( flags == 0xFFFFFFFF ){
psA[jIdx].force[0] -= ijForce[0];
psA[jIdx].force[1] -= ijForce[1];
psA[jIdx].force[2] -= ijForce[2];
} else {
sA[threadIdx.x].tempForce[0] = ijForce[0];
sA[threadIdx.x].tempForce[1] = ijForce[1];
sA[threadIdx.x].tempForce[2] = ijForce[2];
if( tgx % 2 == 0 ){
sumTempBuffer( sA[threadIdx.x], sA[threadIdx.x+1] );
}
if( tgx % 4 == 0 ){
sumTempBuffer( sA[threadIdx.x], sA[threadIdx.x+2] );
}
if( tgx % 8 == 0 ){
sumTempBuffer( sA[threadIdx.x], sA[threadIdx.x+4] );
}
if( tgx % 16 == 0 ){
sumTempBuffer( sA[threadIdx.x], sA[threadIdx.x+8] );
}
if (tgx == 0)
{
psA[jIdx].force[0] -= sA[threadIdx.x].tempForce[0] + sA[threadIdx.x+16].tempForce[0];
psA[jIdx].force[1] -= sA[threadIdx.x].tempForce[1] + sA[threadIdx.x+16].tempForce[1];
psA[jIdx].force[2] -= sA[threadIdx.x].tempForce[2] + sA[threadIdx.x+16].tempForce[2];
}
}
#endif
totalEnergy += mask ? energy : 0.0f;
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+tj) == targetAtom ){
unsigned int index = (atomI == targetAtom) ? (y + tj) : atomI;
if( atomI == targetAtom || (y+jIdx) == targetAtom ){
unsigned int index = (atomI == targetAtom) ? (y + jIdx) : atomI;
debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + tj);
debugArray[index].y = (float) (y + jIdx);
debugArray[index].z = -3.0;
debugArray[index].w = (float) (mask + 1);
......@@ -311,12 +358,16 @@ if( atomI == targetAtom || (y+tj) == targetAtom ){
debugArray[index].y = mask ? ijForce[1] : 0.0f;
debugArray[index].z = mask ? ijForce[2] : 0.0f;
}
#endif
#ifdef USE_CUTOFF
}
#endif
tj = (tj + 1) & (GRID - 1);
} // end of j-loop
#ifdef USE_CUTOFF
}
}
#endif
// Write results
......
......@@ -13,6 +13,7 @@ struct Vdw14_7Particle {
float epsilon;
float force[3];
float tempForce[3];
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment