Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
24148147
Commit
24148147
authored
Sep 03, 2009
by
Peter Eastman
Browse files
Optimizations to PME direct space calculation. Also added code for PME with GBSA.
parent
f92c61b0
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
132 additions
and
54 deletions
+132
-54
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
+21
-21
platforms/cuda/src/kernels/kCalculateCDLJForces.h
platforms/cuda/src/kernels/kCalculateCDLJForces.h
+23
-18
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
+65
-0
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
+23
-15
No files found.
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
View file @
24148147
...
@@ -102,19 +102,17 @@ void GetCalculateCDLJForcesSim(gpuContext gpu)
...
@@ -102,19 +102,17 @@ void GetCalculateCDLJForcesSim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCDLJForces.h"
#include "kCalculateCDLJForces.h"
// Include version of the kernel
with Ewald metho
d
// Include version
s
of the kernel
s for Ewal
d
// Real Space Ewald summation utilizes almost the same kernel as Periodic
#undef METHOD_NAME
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define USE_PERIODIC
#define USE_EWALD
#define USE_EWALD
#define METHOD_NAME(a, b) a##Ewald
Direct
##b
#define METHOD_NAME(a, b) a##Ewald##b
#include "kCalculateCDLJForces.h"
#include "kCalculateCDLJForces.h"
#include "kFindInteractingBlocks.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##Ewald
Direct
ByWarp##b
#define METHOD_NAME(a, b) a##EwaldByWarp##b
#include "kCalculateCDLJForces.h"
#include "kCalculateCDLJForces.h"
// Reciprocal Space Ewald summation is in a separate kernel
// Reciprocal Space Ewald summation is in a separate kernel
...
@@ -169,20 +167,21 @@ void kCalculateCDLJForces(gpuContext gpu)
...
@@ -169,20 +167,21 @@ void kCalculateCDLJForces(gpuContext gpu)
LAUNCHERROR
(
"kCalculateCDLJPeriodicForces"
);
LAUNCHERROR
(
"kCalculateCDLJPeriodicForces"
);
break
;
break
;
case
EWALD
:
case
EWALD
:
kFindBlockBounds
EwaldDirect
_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
kFindBlockBounds
Periodic
_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBounds
EwaldDirect
"
);
LAUNCHERROR
(
"kFindBlockBounds
Periodic
"
);
kFindBlocksWithInteractions
EwaldDirect
_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
kFindBlocksWithInteractions
Periodic
_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractions
EwaldDirect
"
);
LAUNCHERROR
(
"kFindBlocksWithInteractions
Periodic
"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocks
EwaldDirect
_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kFindInteractionsWithinBlocks
Periodic
_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksPeriodic"
);
if
(
gpu
->
bOutputBufferPerWarp
)
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJEwald
Direct
ByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJEwaldByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
else
kCalculateCDLJEwald
Direct
Forces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJEwaldForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJEwald
Direct
Forces"
);
LAUNCHERROR
(
"kCalculateCDLJEwaldForces"
);
// Ewald summation
// Ewald summation
kCalculateEwaldFastCosSinSums_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
kCalculateEwaldFastCosSinSums_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastCosSinSums"
);
LAUNCHERROR
(
"kCalculateEwaldFastCosSinSums"
);
...
@@ -190,20 +189,21 @@ void kCalculateCDLJForces(gpuContext gpu)
...
@@ -190,20 +189,21 @@ void kCalculateCDLJForces(gpuContext gpu)
LAUNCHERROR
(
"kCalculateEwaldFastForces"
);
LAUNCHERROR
(
"kCalculateEwaldFastForces"
);
break
;
break
;
case
PARTICLE_MESH_EWALD
:
case
PARTICLE_MESH_EWALD
:
kFindBlockBounds
EwaldDirect
_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
kFindBlockBounds
Periodic
_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBounds
EwaldDirect
"
);
LAUNCHERROR
(
"kFindBlockBounds
Periodic
"
);
kFindBlocksWithInteractions
EwaldDirect
_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
kFindBlocksWithInteractions
Periodic
_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractions
EwaldDirect
"
);
LAUNCHERROR
(
"kFindBlocksWithInteractions
Periodic
"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocks
EwaldDirect
_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kFindInteractionsWithinBlocks
Periodic
_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksPeriodic"
);
if
(
gpu
->
bOutputBufferPerWarp
)
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJEwald
Direct
ByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJEwaldByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
else
kCalculateCDLJEwald
Direct
Forces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJEwaldForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJEwald
Direct
Forces"
);
LAUNCHERROR
(
"kCalculateCDLJEwaldForces"
);
kCalculatePME
(
gpu
);
kCalculatePME
(
gpu
);
}
}
}
}
platforms/cuda/src/kernels/kCalculateCDLJForces.h
View file @
24148147
...
@@ -48,7 +48,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
...
@@ -48,7 +48,7 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#endif
#endif
#ifdef USE_EWALD
#ifdef USE_EWALD
const
float
SQRT_PI
=
sqrt
(
LOCAL_HACK_PI
);
const
float
TWO_OVER_
SQRT_PI
=
2
.
0
f
/
sqrt
(
LOCAL_HACK_PI
);
#endif
#endif
unsigned
int
lasty
=
0xFFFFFFFF
;
unsigned
int
lasty
=
0xFFFFFFFF
;
...
@@ -118,9 +118,10 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
...
@@ -118,9 +118,10 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
alphaR
=
cSim
.
alphaEwald
*
r
;
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfc
(
alphaR
)
+
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
float
erfcAlphaR
=
erfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
/* E */
/* E */
CDLJ_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfc
(
a
lphaR
)
;
CDLJ_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfc
A
lphaR
;
#else
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
/* E */
/* E */
...
@@ -179,16 +180,17 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
...
@@ -179,16 +180,17 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
alphaR
=
cSim
.
alphaEwald
*
r
;
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfc
(
alphaR
)
+
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
float
erfcAlphaR
=
erfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
/* E */
/* E */
CDLJ_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfc
(
a
lphaR
)
;
CDLJ_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfc
A
lphaR
;
bool
needCorrection
=
!
(
excl
&
0x1
)
&&
x
+
tgx
!=
y
+
j
&&
x
+
tgx
<
cSim
.
atoms
&&
y
+
j
<
cSim
.
atoms
;
bool
needCorrection
=
!
(
excl
&
0x1
)
&&
x
+
tgx
!=
y
+
j
&&
x
+
tgx
<
cSim
.
atoms
&&
y
+
j
<
cSim
.
atoms
;
if
(
needCorrection
)
if
(
needCorrection
)
{
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR
=
-
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erf
(
a
lphaR
)
-
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
dEdR
=
-
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
(
1
.
0
f
-
erf
cA
lphaR
)
-
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_
SQRT_PI
);
CDLJ_energy
=
-
apos
.
w
*
psA
[
j
].
q
*
invR
*
erf
(
a
lphaR
);
CDLJ_energy
=
-
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
1
.
0
f
-
erf
cA
lphaR
);
}
}
#else
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
...
@@ -301,9 +303,10 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
...
@@ -301,9 +303,10 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
alphaR
=
cSim
.
alphaEwald
*
r
;
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
erfc
(
alphaR
)
+
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
float
erfcAlphaR
=
erfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
/* E */
/* E */
CDLJ_energy
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
erfc
(
a
lphaR
)
;
CDLJ_energy
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
erfc
A
lphaR
;
#else
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
/* E */
/* E */
...
@@ -368,8 +371,9 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
...
@@ -368,8 +371,9 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
alphaR
=
cSim
.
alphaEwald
*
r
;
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfc
(
alphaR
)
+
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
float
erfcAlphaR
=
erfc
(
alphaR
);
CDLJ_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
CDLJ_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfcAlphaR
;
#else
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
/* E */
/* E */
...
@@ -470,16 +474,17 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
...
@@ -470,16 +474,17 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
#ifdef USE_EWALD
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
alphaR
=
cSim
.
alphaEwald
*
r
;
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
erfc
(
alphaR
)
+
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
float
erfcAlphaR
=
erfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
/* E */
/* E */
CDLJ_energy
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
erfc
(
a
lphaR
)
;
CDLJ_energy
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
erfc
A
lphaR
;
bool
needCorrection
=
!
(
excl
&
0x1
)
&&
x
+
tgx
!=
y
+
tj
&&
x
+
tgx
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
;
bool
needCorrection
=
!
(
excl
&
0x1
)
&&
x
+
tgx
!=
y
+
tj
&&
x
+
tgx
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
;
if
(
needCorrection
)
if
(
needCorrection
)
{
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR
=
-
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
erf
(
a
lphaR
)
-
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
dEdR
=
-
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
(
1
.
0
f
-
erf
cA
lphaR
)
-
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_
SQRT_PI
);
CDLJ_energy
=
-
apos
.
w
*
psA
[
tj
].
q
*
invR
*
erf
(
a
lphaR
);
CDLJ_energy
=
-
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
1
.
0
f
-
erf
cA
lphaR
);
}
}
#else
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
...
...
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
View file @
24148147
...
@@ -97,12 +97,28 @@ void GetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu)
...
@@ -97,12 +97,28 @@ void GetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu)
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCDLJObcGbsaForces1.h"
#include "kCalculateCDLJObcGbsaForces1.h"
// Include versions of the kernels for Ewald
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define USE_EWALD
#define METHOD_NAME(a, b) a##Ewald##b
#include "kCalculateCDLJObcGbsaForces1.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##EwaldByWarp##b
#include "kCalculateCDLJObcGbsaForces1.h"
extern
__global__
void
kFindBlockBoundsCutoff_kernel
();
extern
__global__
void
kFindBlockBoundsCutoff_kernel
();
extern
__global__
void
kFindBlockBoundsPeriodic_kernel
();
extern
__global__
void
kFindBlockBoundsPeriodic_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsCutoff_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsCutoff_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsPeriodic_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsPeriodic_kernel
();
extern
__global__
void
kFindInteractionsWithinBlocksCutoff_kernel
(
unsigned
int
*
);
extern
__global__
void
kFindInteractionsWithinBlocksCutoff_kernel
(
unsigned
int
*
);
extern
__global__
void
kFindInteractionsWithinBlocksPeriodic_kernel
(
unsigned
int
*
);
extern
__global__
void
kFindInteractionsWithinBlocksPeriodic_kernel
(
unsigned
int
*
);
extern
__global__
void
kCalculateEwaldFastCosSinSums_kernel
();
extern
__global__
void
kCalculateEwaldFastForces_kernel
();
extern
void
kCalculatePME
(
gpuContext
gpu
);
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
)
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
)
{
{
...
@@ -169,5 +185,54 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
...
@@ -169,5 +185,54 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaPeriodicForces1"
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaPeriodicForces1"
);
break
;
break
;
case
EWALD
:
kFindBlockBoundsPeriodic_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsPeriodic"
);
kFindBlocksWithInteractionsPeriodic_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsPeriodic"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksPeriodic"
);
if
(
gpu
->
bRecalculateBornRadii
)
{
kCalculateObcGbsaBornSum
(
gpu
);
kReduceObcGbsaBornSum
(
gpu
);
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaEwaldByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJObcGbsaEwaldForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaEwaldForces"
);
// Ewald summation
kCalculateEwaldFastCosSinSums_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastCosSinSums"
);
kCalculateEwaldFastForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastForces"
);
break
;
case
PARTICLE_MESH_EWALD
:
kFindBlockBoundsPeriodic_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsPeriodic"
);
kFindBlocksWithInteractionsPeriodic_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsPeriodic"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksPeriodic"
);
if
(
gpu
->
bRecalculateBornRadii
)
{
kCalculateObcGbsaBornSum
(
gpu
);
kReduceObcGbsaBornSum
(
gpu
);
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaEwaldByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJObcGbsaEwaldForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaEwaldForces"
);
kCalculatePME
(
gpu
);
}
}
}
}
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
View file @
24148147
...
@@ -30,6 +30,9 @@
...
@@ -30,6 +30,9 @@
* different versions of the kernels.
* different versions of the kernels.
*/
*/
/* Cuda compiler on Windows does not recognized "static const float" values */
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
__global__
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
)
__global__
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
Atom
sA
[];
...
@@ -45,7 +48,7 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
...
@@ -45,7 +48,7 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#endif
#endif
#ifdef USE_EWALD
#ifdef USE_EWALD
const
float
SQRT_PI
=
sqrt
(
PI
);
const
float
TWO_OVER_
SQRT_PI
=
2
.
0
f
/
sqrt
(
LOCAL_HACK_
PI
);
#endif
#endif
unsigned
int
lasty
=
-
0xFFFFFFFF
;
unsigned
int
lasty
=
-
0xFFFFFFFF
;
...
@@ -110,9 +113,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
...
@@ -110,9 +113,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
alphaR
=
cSim
.
alphaEwald
*
r
;
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfc
(
alphaR
)
+
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
float
erfcAlphaR
=
erfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
/* E */
/* E */
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfc
(
a
lphaR
)
;
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfc
A
lphaR
;
#else
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
/* E */
/* E */
...
@@ -191,16 +195,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
...
@@ -191,16 +195,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
alphaR
=
cSim
.
alphaEwald
*
r
;
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfc
(
alphaR
)
+
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
float
erfcAlphaR
=
erfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
/* E */
/* E */
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfc
(
a
lphaR
)
;
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfc
A
lphaR
;
bool
needCorrection
=
!
(
excl
&
0x1
)
&&
x
+
tgx
!=
y
+
j
&&
x
+
tgx
<
cSim
.
atoms
&&
y
+
j
<
cSim
.
atoms
;
bool
needCorrection
=
!
(
excl
&
0x1
)
&&
x
+
tgx
!=
y
+
j
&&
x
+
tgx
<
cSim
.
atoms
&&
y
+
j
<
cSim
.
atoms
;
if
(
needCorrection
)
if
(
needCorrection
)
{
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR
=
-
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erf
(
a
lphaR
)
-
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
dEdR
=
-
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
(
1
.
0
f
-
erf
cA
lphaR
)
-
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_
SQRT_PI
);
CDLJObcGbsa_energy
=
-
apos
.
w
*
psA
[
j
].
q
*
invR
*
erf
(
a
lphaR
);
CDLJObcGbsa_energy
=
-
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
1
.
0
f
-
erf
cA
lphaR
);
}
}
#else
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
...
@@ -345,9 +350,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
...
@@ -345,9 +350,10 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
alphaR
=
cSim
.
alphaEwald
*
r
;
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
erfc
(
alphaR
)
+
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
float
erfcAlphaR
=
erfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
/* E */
/* E */
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
erfc
(
a
lphaR
)
;
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
erfc
A
lphaR
;
#else
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
/* E */
/* E */
...
@@ -433,8 +439,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
...
@@ -433,8 +439,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
alphaR
=
cSim
.
alphaEwald
*
r
;
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfc
(
alphaR
)
+
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
float
erfcAlphaR
=
erfc
(
alphaR
);
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfcAlphaR
;
#else
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
/* E */
/* E */
...
@@ -565,16 +572,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
...
@@ -565,16 +572,17 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
#ifdef USE_EWALD
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
alphaR
=
cSim
.
alphaEwald
*
r
;
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
erfc
(
alphaR
)
+
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
float
erfcAlphaR
=
erfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
/* E */
/* E */
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
erfc
(
a
lphaR
)
;
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
erfc
A
lphaR
;
bool
needCorrection
=
!
(
excl
&
0x1
)
&&
x
+
tgx
!=
y
+
tj
&&
x
+
tgx
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
;
bool
needCorrection
=
!
(
excl
&
0x1
)
&&
x
+
tgx
!=
y
+
tj
&&
x
+
tgx
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
;
if
(
needCorrection
)
if
(
needCorrection
)
{
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR
=
-
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
erf
(
a
lphaR
)
-
2
.
0
f
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)
/
SQRT_PI
);
dEdR
=
-
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
(
1
.
0
f
-
erf
cA
lphaR
)
-
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_
SQRT_PI
);
CDLJObcGbsa_energy
=
-
apos
.
w
*
psA
[
tj
].
q
*
invR
*
erf
(
a
lphaR
);
CDLJObcGbsa_energy
=
-
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
1
.
0
f
-
erf
cA
lphaR
);
}
}
#else
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment