"devtools/vscode:/vscode.git/clone" did not exist on "183598ed337d14ab00bfa8f286b8dda5d8b6902a"
Commit 24148147 authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimizations to PME direct space calculation. Also added code for PME with GBSA.

parent f92c61b0
......@@ -102,19 +102,17 @@ void GetCalculateCDLJForcesSim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCDLJForces.h"
// Include version of the kernel with Ewald method
// Include versions of the kernels for Ewald
// Real Space Ewald summation utilizes almost the same kernel as Periodic
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define USE_EWALD
#define METHOD_NAME(a, b) a##EwaldDirect##b
#define METHOD_NAME(a, b) a##Ewald##b
#include "kCalculateCDLJForces.h"
#include "kFindInteractingBlocks.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##EwaldDirectByWarp##b
#define METHOD_NAME(a, b) a##EwaldByWarp##b
#include "kCalculateCDLJForces.h"
// Reciprocal Space Ewald summation is in a separate kernel
......@@ -169,20 +167,21 @@ void kCalculateCDLJForces(gpuContext gpu)
LAUNCHERROR("kCalculateCDLJPeriodicForces");
break;
case EWALD:
kFindBlockBoundsEwaldDirect_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsEwaldDirect");
kFindBlocksWithInteractionsEwaldDirect_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsEwaldDirect");
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksEwaldDirect_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJEwaldDirectByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
kCalculateCDLJEwaldByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJEwaldDirectForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
kCalculateCDLJEwaldForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJEwaldDirectForces");
LAUNCHERROR("kCalculateCDLJEwaldForces");
// Ewald summation
kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastCosSinSums");
......@@ -190,20 +189,21 @@ void kCalculateCDLJForces(gpuContext gpu)
LAUNCHERROR("kCalculateEwaldFastForces");
break;
case PARTICLE_MESH_EWALD:
kFindBlockBoundsEwaldDirect_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsEwaldDirect");
kFindBlocksWithInteractionsEwaldDirect_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsEwaldDirect");
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksEwaldDirect_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJEwaldDirectByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
kCalculateCDLJEwaldByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJEwaldDirectForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
kCalculateCDLJEwaldForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJEwaldDirectForces");
LAUNCHERROR("kCalculateCDLJEwaldForces");
kCalculatePME(gpu);
}
}
......@@ -30,9 +30,9 @@
* different versions of the kernels.
*/
/* Cuda compiler on Windows does not recognized "static const float" values */
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
/* Cuda compiler on Windows does not recognized "static const float" values */
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
__global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUnit)
{
extern __shared__ Atom sA[];
......@@ -48,7 +48,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#endif
#ifdef USE_EWALD
const float SQRT_PI = sqrt(LOCAL_HACK_PI);
const float TWO_OVER_SQRT_PI = 2.0f/sqrt(LOCAL_HACK_PI);
#endif
unsigned int lasty = 0xFFFFFFFF;
......@@ -118,9 +118,10 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI );
/* E */
CDLJ_energy += apos.w * psA[j].q * invR * erfc(alphaR);
CDLJ_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -179,16 +180,17 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJ_energy += apos.w * psA[j].q * invR * erfc(alphaR);
CDLJ_energy += apos.w * psA[j].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+j && x+tgx < cSim.atoms && y+j < cSim.atoms;
if (needCorrection)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[j].q * invR * (erf(alphaR) - 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJ_energy = -apos.w * psA[j].q * invR * erf(alphaR);
dEdR = -apos.w * psA[j].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJ_energy = -apos.w * psA[j].q * invR * (1.0f-erfcAlphaR);
}
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
......@@ -301,9 +303,10 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJ_energy += apos.w * psA[tj].q * invR * erfc(alphaR);
CDLJ_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -368,8 +371,9 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJ_energy += apos.w * psA[j].q * invR * erfc(alphaR);
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJ_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -470,16 +474,17 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJ_energy += apos.w * psA[tj].q * invR * erfc(alphaR);
CDLJ_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+tj && x+tgx < cSim.atoms && y+tj < cSim.atoms;
if (needCorrection)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[tj].q * invR * (erf(alphaR) - 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJ_energy = -apos.w * psA[tj].q * invR * erf(alphaR);
dEdR = -apos.w * psA[tj].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJ_energy = -apos.w * psA[tj].q * invR * (1.0f-erfcAlphaR);
}
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
......
......@@ -97,12 +97,28 @@ void GetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCDLJObcGbsaForces1.h"
// Include versions of the kernels for Ewald
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define USE_EWALD
#define METHOD_NAME(a, b) a##Ewald##b
#include "kCalculateCDLJObcGbsaForces1.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##EwaldByWarp##b
#include "kCalculateCDLJObcGbsaForces1.h"
extern __global__ void kFindBlockBoundsCutoff_kernel();
extern __global__ void kFindBlockBoundsPeriodic_kernel();
extern __global__ void kFindBlocksWithInteractionsCutoff_kernel();
extern __global__ void kFindBlocksWithInteractionsPeriodic_kernel();
extern __global__ void kFindInteractionsWithinBlocksCutoff_kernel(unsigned int*);
extern __global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int*);
extern __global__ void kCalculateEwaldFastCosSinSums_kernel();
extern __global__ void kCalculateEwaldFastForces_kernel();
extern void kCalculatePME(gpuContext gpu);
void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
{
......@@ -169,5 +185,54 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaPeriodicForces1");
break;
case EWALD:
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bRecalculateBornRadii)
{
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaEwaldByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJObcGbsaEwaldForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaEwaldForces");
// Ewald summation
kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastCosSinSums");
kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastForces");
break;
case PARTICLE_MESH_EWALD:
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bRecalculateBornRadii)
{
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaEwaldByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJObcGbsaEwaldForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaEwaldForces");
kCalculatePME(gpu);
}
}
......@@ -30,6 +30,9 @@
* different versions of the kernels.
*/
/* Cuda compiler on Windows does not recognized "static const float" values */
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
__global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
{
extern __shared__ Atom sA[];
......@@ -45,7 +48,7 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#endif
#ifdef USE_EWALD
const float SQRT_PI = sqrt(PI);
const float TWO_OVER_SQRT_PI = 2.0f/sqrt(LOCAL_HACK_PI);
#endif
unsigned int lasty = -0xFFFFFFFF;
......@@ -110,9 +113,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfc(alphaR);
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -191,16 +195,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfc(alphaR);
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+j && x+tgx < cSim.atoms && y+j < cSim.atoms;
if (needCorrection)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[j].q * invR * (erf(alphaR) - 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJObcGbsa_energy = -apos.w * psA[j].q * invR * erf(alphaR);
dEdR = -apos.w * psA[j].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJObcGbsa_energy = -apos.w * psA[j].q * invR * (1.0f-erfcAlphaR);
}
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
......@@ -345,9 +350,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfc(alphaR);
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -433,8 +439,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfc(alphaR);
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */
......@@ -565,16 +572,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfc(alphaR);
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+tj && x+tgx < cSim.atoms && y+tj < cSim.atoms;
if (needCorrection)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[tj].q * invR * (erf(alphaR) - 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI );
CDLJObcGbsa_energy = -apos.w * psA[tj].q * invR * erf(alphaR);
dEdR = -apos.w * psA[tj].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJObcGbsa_energy = -apos.w * psA[tj].q * invR * (1.0f-erfcAlphaR);
}
#else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment