Commit 24148147 authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimizations to PME direct space calculation. Also added code for PME with GBSA.

parent f92c61b0
......@@ -102,19 +102,17 @@ void GetCalculateCDLJForcesSim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCDLJForces.h"
// Include version of the kernel with Ewald method
// Include versions of the kernels for Ewald
// Real Space Ewald summation utilizes almost the same kernel as Periodic
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define USE_EWALD
#define METHOD_NAME(a, b) a##EwaldDirect##b
#define METHOD_NAME(a, b) a##Ewald##b
#include "kCalculateCDLJForces.h"
#include "kFindInteractingBlocks.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##EwaldDirectByWarp##b
#define METHOD_NAME(a, b) a##EwaldByWarp##b
#include "kCalculateCDLJForces.h"
// Reciprocal Space Ewald summation is in a separate kernel
......@@ -169,20 +167,21 @@ void kCalculateCDLJForces(gpuContext gpu)
LAUNCHERROR("kCalculateCDLJPeriodicForces");
break;
case EWALD:
kFindBlockBoundsEwaldDirect_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsEwaldDirect");
kFindBlocksWithInteractionsEwaldDirect_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsEwaldDirect");
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksEwaldDirect_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJEwaldDirectByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
kCalculateCDLJEwaldByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJEwaldDirectForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
kCalculateCDLJEwaldForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJEwaldDirectForces");
LAUNCHERROR("kCalculateCDLJEwaldForces");
// Ewald summation
kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastCosSinSums");
......@@ -190,20 +189,21 @@ void kCalculateCDLJForces(gpuContext gpu)
LAUNCHERROR("kCalculateEwaldFastForces");
break;
case PARTICLE_MESH_EWALD:
kFindBlockBoundsEwaldDirect_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsEwaldDirect");
kFindBlocksWithInteractionsEwaldDirect_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsEwaldDirect");
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksEwaldDirect_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJEwaldDirectByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
kCalculateCDLJEwaldByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJEwaldDirectForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
kCalculateCDLJEwaldForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJEwaldDirectForces");
LAUNCHERROR("kCalculateCDLJEwaldForces");
kCalculatePME(gpu);
}
}
......@@ -48,7 +48,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#endif
#ifdef USE_EWALD
const float SQRT_PI = sqrt(LOCAL_HACK_PI);
const float TWO_OVER_SQRT_PI = 2.0f/sqrt(LOCAL_HACK_PI);
#endif
unsigned int lasty = 0xFFFFFFFF;
......@@ -118,9 +118,10 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI );
/* E */
CDLJ_energy += apos.w * psA[j].q * invR * erfc(alphaR);
CDLJ_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -179,16 +180,17 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJ_energy += apos.w * psA[j].q * invR * erfc(alphaR);
CDLJ_energy += apos.w * psA[j].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+j && x+tgx < cSim.atoms && y+j < cSim.atoms;
if (needCorrection)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[j].q * invR * (erf(alphaR) - 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJ_energy = -apos.w * psA[j].q * invR * erf(alphaR);
dEdR = -apos.w * psA[j].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJ_energy = -apos.w * psA[j].q * invR * (1.0f-erfcAlphaR);
}
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
......@@ -301,9 +303,10 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJ_energy += apos.w * psA[tj].q * invR * erfc(alphaR);
CDLJ_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -368,8 +371,9 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJ_energy += apos.w * psA[j].q * invR * erfc(alphaR);
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJ_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -470,16 +474,17 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJ_energy += apos.w * psA[tj].q * invR * erfc(alphaR);
CDLJ_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+tj && x+tgx < cSim.atoms && y+tj < cSim.atoms;
if (needCorrection)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[tj].q * invR * (erf(alphaR) - 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJ_energy = -apos.w * psA[tj].q * invR * erf(alphaR);
dEdR = -apos.w * psA[tj].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJ_energy = -apos.w * psA[tj].q * invR * (1.0f-erfcAlphaR);
}
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
......
......@@ -97,12 +97,28 @@ void GetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCDLJObcGbsaForces1.h"
// Include versions of the kernels for Ewald
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define USE_EWALD
#define METHOD_NAME(a, b) a##Ewald##b
#include "kCalculateCDLJObcGbsaForces1.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##EwaldByWarp##b
#include "kCalculateCDLJObcGbsaForces1.h"
extern __global__ void kFindBlockBoundsCutoff_kernel();
extern __global__ void kFindBlockBoundsPeriodic_kernel();
extern __global__ void kFindBlocksWithInteractionsCutoff_kernel();
extern __global__ void kFindBlocksWithInteractionsPeriodic_kernel();
extern __global__ void kFindInteractionsWithinBlocksCutoff_kernel(unsigned int*);
extern __global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int*);
extern __global__ void kCalculateEwaldFastCosSinSums_kernel();
extern __global__ void kCalculateEwaldFastForces_kernel();
extern void kCalculatePME(gpuContext gpu);
void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
{
......@@ -169,5 +185,54 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaPeriodicForces1");
break;
case EWALD:
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bRecalculateBornRadii)
{
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaEwaldByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJObcGbsaEwaldForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaEwaldForces");
// Ewald summation
kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastCosSinSums");
kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastForces");
break;
case PARTICLE_MESH_EWALD:
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bRecalculateBornRadii)
{
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaEwaldByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJObcGbsaEwaldForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaEwaldForces");
kCalculatePME(gpu);
}
}
......@@ -30,6 +30,9 @@
* different versions of the kernels.
*/
/* Cuda compiler on Windows does not recognized "static const float" values */
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
__global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
{
extern __shared__ Atom sA[];
......@@ -45,7 +48,7 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#endif
#ifdef USE_EWALD
const float SQRT_PI = sqrt(PI);
const float TWO_OVER_SQRT_PI = 2.0f/sqrt(LOCAL_HACK_PI);
#endif
unsigned int lasty = -0xFFFFFFFF;
......@@ -110,9 +113,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfc(alphaR);
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -191,16 +195,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfc(alphaR);
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+j && x+tgx < cSim.atoms && y+j < cSim.atoms;
if (needCorrection)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[j].q * invR * (erf(alphaR) - 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJObcGbsa_energy = -apos.w * psA[j].q * invR * erf(alphaR);
dEdR = -apos.w * psA[j].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJObcGbsa_energy = -apos.w * psA[j].q * invR * (1.0f-erfcAlphaR);
}
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
......@@ -345,9 +350,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfc(alphaR);
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -433,8 +439,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfc(alphaR);
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -565,16 +572,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfc(alphaR);
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+tj && x+tgx < cSim.atoms && y+tj < cSim.atoms;
if (needCorrection)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[tj].q * invR * (erf(alphaR) - 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJObcGbsa_energy = -apos.w * psA[tj].q * invR * erf(alphaR);
dEdR = -apos.w * psA[tj].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJObcGbsa_energy = -apos.w * psA[tj].q * invR * (1.0f-erfcAlphaR);
}
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment