Commit 24148147 authored by Peter Eastman's avatar Peter Eastman
Browse files

Optimizations to PME direct space calculation. Also added code for PME with GBSA.

parent f92c61b0
...@@ -102,19 +102,17 @@ void GetCalculateCDLJForcesSim(gpuContext gpu) ...@@ -102,19 +102,17 @@ void GetCalculateCDLJForcesSim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b #define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCDLJForces.h" #include "kCalculateCDLJForces.h"
// Include version of the kernel with Ewald method // Include versions of the kernels for Ewald
// Real Space Ewald summation utilizes almost the same kernel as Periodic
#undef METHOD_NAME #undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP #undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC #define USE_PERIODIC
#define USE_EWALD #define USE_EWALD
#define METHOD_NAME(a, b) a##EwaldDirect##b #define METHOD_NAME(a, b) a##Ewald##b
#include "kCalculateCDLJForces.h" #include "kCalculateCDLJForces.h"
#include "kFindInteractingBlocks.h"
#define USE_OUTPUT_BUFFER_PER_WARP #define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME #undef METHOD_NAME
#define METHOD_NAME(a, b) a##EwaldDirectByWarp##b #define METHOD_NAME(a, b) a##EwaldByWarp##b
#include "kCalculateCDLJForces.h" #include "kCalculateCDLJForces.h"
// Reciprocal Space Ewald summation is in a separate kernel // Reciprocal Space Ewald summation is in a separate kernel
...@@ -169,20 +167,21 @@ void kCalculateCDLJForces(gpuContext gpu) ...@@ -169,20 +167,21 @@ void kCalculateCDLJForces(gpuContext gpu)
LAUNCHERROR("kCalculateCDLJPeriodicForces"); LAUNCHERROR("kCalculateCDLJPeriodicForces");
break; break;
case EWALD: case EWALD:
kFindBlockBoundsEwaldDirect_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>(); kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsEwaldDirect"); LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsEwaldDirect_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>(); kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsEwaldDirect"); LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount); compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksEwaldDirect_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateCDLJEwaldDirectByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateCDLJEwaldByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else else
kCalculateCDLJEwaldDirectForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateCDLJEwaldForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJEwaldDirectForces"); LAUNCHERROR("kCalculateCDLJEwaldForces");
// Ewald summation // Ewald summation
kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>(); kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastCosSinSums"); LAUNCHERROR("kCalculateEwaldFastCosSinSums");
...@@ -190,20 +189,21 @@ void kCalculateCDLJForces(gpuContext gpu) ...@@ -190,20 +189,21 @@ void kCalculateCDLJForces(gpuContext gpu)
LAUNCHERROR("kCalculateEwaldFastForces"); LAUNCHERROR("kCalculateEwaldFastForces");
break; break;
case PARTICLE_MESH_EWALD: case PARTICLE_MESH_EWALD:
kFindBlockBoundsEwaldDirect_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>(); kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsEwaldDirect"); LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsEwaldDirect_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>(); kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsEwaldDirect"); LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount); compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksEwaldDirect_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bOutputBufferPerWarp) if (gpu->bOutputBufferPerWarp)
kCalculateCDLJEwaldDirectByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateCDLJEwaldByWarpForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else else
kCalculateCDLJEwaldDirectForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block, kCalculateCDLJEwaldForces_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJEwaldDirectForces"); LAUNCHERROR("kCalculateCDLJEwaldForces");
kCalculatePME(gpu); kCalculatePME(gpu);
} }
} }
...@@ -48,7 +48,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni ...@@ -48,7 +48,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#endif #endif
#ifdef USE_EWALD #ifdef USE_EWALD
const float SQRT_PI = sqrt(LOCAL_HACK_PI); const float TWO_OVER_SQRT_PI = 2.0f/sqrt(LOCAL_HACK_PI);
#endif #endif
unsigned int lasty = 0xFFFFFFFF; unsigned int lasty = 0xFFFFFFFF;
...@@ -118,9 +118,10 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni ...@@ -118,9 +118,10 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD #ifdef USE_EWALD
float r = sqrt(r2); float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r; float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI );
/* E */ /* E */
CDLJ_energy += apos.w * psA[j].q * invR * erfc(alphaR); CDLJ_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else #else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2); dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */ /* E */
...@@ -179,16 +180,17 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni ...@@ -179,16 +180,17 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD #ifdef USE_EWALD
float r = sqrt(r2); float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r; float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */ /* E */
CDLJ_energy += apos.w * psA[j].q * invR * erfc(alphaR); CDLJ_energy += apos.w * psA[j].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+j && x+tgx < cSim.atoms && y+j < cSim.atoms; bool needCorrection = !(excl & 0x1) && x+tgx != y+j && x+tgx < cSim.atoms && y+j < cSim.atoms;
if (needCorrection) if (needCorrection)
{ {
// Subtract off the part of this interaction that was included in the reciprocal space contribution. // Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[j].q * invR * (erf(alphaR) - 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); dEdR = -apos.w * psA[j].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJ_energy = -apos.w * psA[j].q * invR * erf(alphaR); CDLJ_energy = -apos.w * psA[j].q * invR * (1.0f-erfcAlphaR);
} }
#else #else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2); dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
...@@ -301,9 +303,10 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni ...@@ -301,9 +303,10 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD #ifdef USE_EWALD
float r = sqrt(r2); float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r; float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */ /* E */
CDLJ_energy += apos.w * psA[tj].q * invR * erfc(alphaR); CDLJ_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
#else #else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2); dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */ /* E */
...@@ -368,8 +371,9 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni ...@@ -368,8 +371,9 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD #ifdef USE_EWALD
float r = sqrt(r2); float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r; float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); float erfcAlphaR = erfc(alphaR);
CDLJ_energy += apos.w * psA[j].q * invR * erfc(alphaR); dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJ_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else #else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2); dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */ /* E */
...@@ -470,16 +474,17 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni ...@@ -470,16 +474,17 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD #ifdef USE_EWALD
float r = sqrt(r2); float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r; float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */ /* E */
CDLJ_energy += apos.w * psA[tj].q * invR * erfc(alphaR); CDLJ_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+tj && x+tgx < cSim.atoms && y+tj < cSim.atoms; bool needCorrection = !(excl & 0x1) && x+tgx != y+tj && x+tgx < cSim.atoms && y+tj < cSim.atoms;
if (needCorrection) if (needCorrection)
{ {
// Subtract off the part of this interaction that was included in the reciprocal space contribution. // Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[tj].q * invR * (erf(alphaR) - 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); dEdR = -apos.w * psA[tj].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJ_energy = -apos.w * psA[tj].q * invR * erf(alphaR); CDLJ_energy = -apos.w * psA[tj].q * invR * (1.0f-erfcAlphaR);
} }
#else #else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2); dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
......
...@@ -97,12 +97,28 @@ void GetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu) ...@@ -97,12 +97,28 @@ void GetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b #define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCDLJObcGbsaForces1.h" #include "kCalculateCDLJObcGbsaForces1.h"
// Include versions of the kernels for Ewald
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define USE_EWALD
#define METHOD_NAME(a, b) a##Ewald##b
#include "kCalculateCDLJObcGbsaForces1.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##EwaldByWarp##b
#include "kCalculateCDLJObcGbsaForces1.h"
extern __global__ void kFindBlockBoundsCutoff_kernel(); extern __global__ void kFindBlockBoundsCutoff_kernel();
extern __global__ void kFindBlockBoundsPeriodic_kernel(); extern __global__ void kFindBlockBoundsPeriodic_kernel();
extern __global__ void kFindBlocksWithInteractionsCutoff_kernel(); extern __global__ void kFindBlocksWithInteractionsCutoff_kernel();
extern __global__ void kFindBlocksWithInteractionsPeriodic_kernel(); extern __global__ void kFindBlocksWithInteractionsPeriodic_kernel();
extern __global__ void kFindInteractionsWithinBlocksCutoff_kernel(unsigned int*); extern __global__ void kFindInteractionsWithinBlocksCutoff_kernel(unsigned int*);
extern __global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int*); extern __global__ void kFindInteractionsWithinBlocksPeriodic_kernel(unsigned int*);
extern __global__ void kCalculateEwaldFastCosSinSums_kernel();
extern __global__ void kCalculateEwaldFastForces_kernel();
extern void kCalculatePME(gpuContext gpu);
void kCalculateCDLJObcGbsaForces1(gpuContext gpu) void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
{ {
...@@ -169,5 +185,54 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu) ...@@ -169,5 +185,54 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
(sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); (sizeof(Atom)+sizeof(float))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaPeriodicForces1"); LAUNCHERROR("kCalculateCDLJObcGbsaPeriodicForces1");
break; break;
case EWALD:
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bRecalculateBornRadii)
{
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaEwaldByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJObcGbsaEwaldForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaEwaldForces");
// Ewald summation
kCalculateEwaldFastCosSinSums_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastCosSinSums");
kCalculateEwaldFastForces_kernel<<<gpu->sim.blocks, gpu->sim.update_threads_per_block>>>();
LAUNCHERROR("kCalculateEwaldFastForces");
break;
case PARTICLE_MESH_EWALD:
kFindBlockBoundsPeriodic_kernel<<<(gpu->psGridBoundingBox->_length+63)/64, 64>>>();
LAUNCHERROR("kFindBlockBoundsPeriodic");
kFindBlocksWithInteractionsPeriodic_kernel<<<gpu->sim.interaction_blocks, gpu->sim.interaction_threads_per_block>>>();
LAUNCHERROR("kFindBlocksWithInteractionsPeriodic");
compactStream(gpu->compactPlan, gpu->sim.pInteractingWorkUnit, gpu->sim.pWorkUnit, gpu->sim.pInteractionFlag, gpu->sim.workUnits, gpu->sim.pInteractionCount);
kFindInteractionsWithinBlocksPeriodic_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksPeriodic");
if (gpu->bRecalculateBornRadii)
{
kCalculateObcGbsaBornSum(gpu);
kReduceObcGbsaBornSum(gpu);
}
if (gpu->bOutputBufferPerWarp)
kCalculateCDLJObcGbsaEwaldByWarpForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
else
kCalculateCDLJObcGbsaEwaldForces1_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.nonbond_threads_per_block,
(sizeof(Atom)+sizeof(float3))*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kCalculateCDLJObcGbsaEwaldForces");
kCalculatePME(gpu);
} }
} }
...@@ -30,6 +30,9 @@ ...@@ -30,6 +30,9 @@
* different versions of the kernels. * different versions of the kernels.
*/ */
/* Cuda compiler on Windows does not recognized "static const float" values */
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
__global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit) __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
{ {
extern __shared__ Atom sA[]; extern __shared__ Atom sA[];
...@@ -45,7 +48,7 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* ...@@ -45,7 +48,7 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#endif #endif
#ifdef USE_EWALD #ifdef USE_EWALD
const float SQRT_PI = sqrt(PI); const float TWO_OVER_SQRT_PI = 2.0f/sqrt(LOCAL_HACK_PI);
#endif #endif
unsigned int lasty = -0xFFFFFFFF; unsigned int lasty = -0xFFFFFFFF;
...@@ -110,9 +113,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* ...@@ -110,9 +113,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD #ifdef USE_EWALD
float r = sqrt(r2); float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r; float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */ /* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfc(alphaR); CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else #else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2); dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */ /* E */
...@@ -191,16 +195,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* ...@@ -191,16 +195,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD #ifdef USE_EWALD
float r = sqrt(r2); float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r; float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */ /* E */
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfc(alphaR); CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+j && x+tgx < cSim.atoms && y+j < cSim.atoms; bool needCorrection = !(excl & 0x1) && x+tgx != y+j && x+tgx < cSim.atoms && y+j < cSim.atoms;
if (needCorrection) if (needCorrection)
{ {
// Subtract off the part of this interaction that was included in the reciprocal space contribution. // Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[j].q * invR * (erf(alphaR) - 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); dEdR = -apos.w * psA[j].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJObcGbsa_energy = -apos.w * psA[j].q * invR * erf(alphaR); CDLJObcGbsa_energy = -apos.w * psA[j].q * invR * (1.0f-erfcAlphaR);
} }
#else #else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2); dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
...@@ -345,9 +350,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* ...@@ -345,9 +350,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD #ifdef USE_EWALD
float r = sqrt(r2); float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r; float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */ /* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfc(alphaR); CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
#else #else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2); dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */ /* E */
...@@ -433,8 +439,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* ...@@ -433,8 +439,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD #ifdef USE_EWALD
float r = sqrt(r2); float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r; float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[j].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); float erfcAlphaR = erfc(alphaR);
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfc(alphaR); dEdR += apos.w * psA[j].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJObcGbsa_energy += apos.w * psA[j].q * invR * erfcAlphaR;
#else #else
dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2); dEdR += apos.w * psA[j].q * (invR - 2.0f * cSim.reactionFieldK * r2);
/* E */ /* E */
...@@ -565,16 +572,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* ...@@ -565,16 +572,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD #ifdef USE_EWALD
float r = sqrt(r2); float r = sqrt(r2);
float alphaR = cSim.alphaEwald * r; float alphaR = cSim.alphaEwald * r;
dEdR += apos.w * psA[tj].q * invR * (erfc(alphaR) + 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); float erfcAlphaR = erfc(alphaR);
dEdR += apos.w * psA[tj].q * invR * (erfcAlphaR + alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
/* E */ /* E */
CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfc(alphaR); CDLJObcGbsa_energy += apos.w * psA[tj].q * invR * erfcAlphaR;
bool needCorrection = !(excl & 0x1) && x+tgx != y+tj && x+tgx < cSim.atoms && y+tj < cSim.atoms; bool needCorrection = !(excl & 0x1) && x+tgx != y+tj && x+tgx < cSim.atoms && y+tj < cSim.atoms;
if (needCorrection) if (needCorrection)
{ {
// Subtract off the part of this interaction that was included in the reciprocal space contribution. // Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR = -apos.w * psA[tj].q * invR * (erf(alphaR) - 2.0f * alphaR * exp ( - alphaR * alphaR) / SQRT_PI ); dEdR = -apos.w * psA[tj].q * invR * ((1.0f-erfcAlphaR) - alphaR * exp ( - alphaR * alphaR) * TWO_OVER_SQRT_PI);
CDLJObcGbsa_energy = -apos.w * psA[tj].q * invR * erf(alphaR); CDLJObcGbsa_energy = -apos.w * psA[tj].q * invR * (1.0f-erfcAlphaR);
} }
#else #else
dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2); dEdR += apos.w * psA[tj].q * (invR - 2.0f * cSim.reactionFieldK * r2);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment