Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b9d12c46
"wrappers/python/vscode:/vscode.git/clone" did not exist on "1032094fb124d11e382ea253fba25aff88d387c7"
Commit
b9d12c46
authored
Oct 25, 2011
by
Mark Friedrichs
Browse files
Clean code and several bug fixes to GB/VI and Obc for nonbonded methods w/ cutoffs
parent
ce74c6ae
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
314 additions
and
360 deletions
+314
-360
platforms/cuda/src/kernels/cudaKernels.h
platforms/cuda/src/kernels/cudaKernels.h
+4
-0
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
+31
-49
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
+70
-159
platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
+34
-54
platforms/cuda/src/kernels/kCalculateGBVIForces2.cu
platforms/cuda/src/kernels/kCalculateGBVIForces2.cu
+14
-49
platforms/cuda/src/kernels/kCalculateGBVIForces2.h
platforms/cuda/src/kernels/kCalculateGBVIForces2.h
+7
-6
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
+40
-11
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
+25
-28
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.cu
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.cu
+2
-0
platforms/cuda/src/kernels/kForces.cu
platforms/cuda/src/kernels/kForces.cu
+1
-3
platforms/reference/src/gbsa/CpuGBVI.cpp
platforms/reference/src/gbsa/CpuGBVI.cpp
+1
-1
platforms/reference/src/gbsa/CpuObc.cpp
platforms/reference/src/gbsa/CpuObc.cpp
+64
-0
platforms/reference/src/gbsa/CpuObc.h
platforms/reference/src/gbsa/CpuObc.h
+21
-0
No files found.
platforms/cuda/src/kernels/cudaKernels.h
View file @
b9d12c46
...
...
@@ -133,3 +133,7 @@ extern void SetCustomExternalGlobalParams(const std::vector<float>& paramValues)
extern
void
SetCustomNonbondedForceExpression
(
const
Expression
<
256
>&
expression
);
extern
void
SetCustomNonbondedEnergyExpression
(
const
Expression
<
256
>&
expression
);
extern
void
SetCustomNonbondedGlobalParams
(
const
std
::
vector
<
float
>&
paramValues
);
extern
void
kPrintGBVI
(
gpuContext
gpu
,
std
::
string
callId
,
int
call
,
FILE
*
log
);
extern
void
kPrintObc
(
gpuContext
gpu
,
std
::
string
callId
,
int
call
,
FILE
*
log
);
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
View file @
b9d12c46
...
...
@@ -64,16 +64,6 @@ void GetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu)
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
texture
<
float
,
1
,
cudaReadModeElementType
>
tabulatedErfcRef
;
static
__device__
float
fastErfc
(
float
r
)
{
float
normalized
=
cSim
.
tabulatedErfcScale
*
r
;
int
index
=
(
int
)
normalized
;
float
fract2
=
normalized
-
index
;
float
fract1
=
1.0
f
-
fract2
;
return
fract1
*
tex1Dfetch
(
tabulatedErfcRef
,
index
)
+
fract2
*
tex1Dfetch
(
tabulatedErfcRef
,
index
+
1
);
}
// Include versions of the kernel for N^2 calculations.
...
...
@@ -134,9 +124,11 @@ extern void kCalculatePME(gpuContext gpu);
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
)
{
// printf("kCalculateCDLJObcGbsaForces1\n");
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
case
NO_CUTOFF
:
if
(
gpu
->
bRecalculateBornRadii
)
{
if
(
gpu
->
bIncludeGBVI
){
...
...
@@ -148,16 +140,19 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
}
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaN2ByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateCDLJObcGbsaN2Forces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaN2Forces1"
);
break
;
case
CUTOFF
:
kFindBlockBoundsCutoff_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsCutoff"
);
kFindBlocksWithInteractionsCutoff_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
...
...
@@ -165,20 +160,31 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bRecalculateBornRadii
)
{
kCalculateObcGbsaBornSum
(
gpu
);
kReduceObcGbsaBornSum
(
gpu
);
if
(
gpu
->
bIncludeGBVI
){
kCalculateGBVIBornSum
(
gpu
);
kReduceGBVIBornSum
(
gpu
);
}
else
{
kCalculateObcGbsaBornSum
(
gpu
);
kReduceObcGbsaBornSum
(
gpu
);
}
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaCutoffByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJObcGbsaCutoffForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaCutoffForces1"
);
break
;
case
PERIODIC
:
kFindBlockBoundsPeriodic_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsPeriodic"
);
kFindBlocksWithInteractionsPeriodic_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
...
...
@@ -186,52 +192,28 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bRecalculateBornRadii
)
{
kCalculateObcGbsaBornSum
(
gpu
);
kReduceObcGbsaBornSum
(
gpu
);
if
(
gpu
->
bIncludeGBVI
){
kCalculateGBVIBornSum
(
gpu
);
kReduceGBVIBornSum
(
gpu
);
}
else
{
kCalculateObcGbsaBornSum
(
gpu
);
kReduceObcGbsaBornSum
(
gpu
);
}
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaPeriodicByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJObcGbsaPeriodicForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaPeriodicForces1"
);
break
;
case
EWALD
:
case
PARTICLE_MESH_EWALD
:
kFindBlockBoundsPeriodic_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsPeriodic"
);
kFindBlocksWithInteractionsPeriodic_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsPeriodic"
);
compactStream
(
gpu
->
compactPlan
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
,
gpu
->
sim
.
pInteractionCount
);
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kFindInteractionsWithinBlocksPeriodic"
);
if
(
gpu
->
bRecalculateBornRadii
)
{
kCalculateObcGbsaBornSum
(
gpu
);
kReduceObcGbsaBornSum
(
gpu
);
}
cudaChannelFormatDesc
channelDesc
=
cudaCreateChannelDesc
<
float
>
();
cudaBindTexture
(
NULL
,
&
tabulatedErfcRef
,
gpu
->
psTabulatedErfc
->
_pDevData
,
&
channelDesc
,
gpu
->
psTabulatedErfc
->
_length
*
sizeof
(
float
));
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaEwaldByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJObcGbsaEwaldForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaEwaldForces"
);
if
(
gpu
->
sim
.
nonbondedMethod
==
EWALD
)
{
// Ewald summation
kCalculateEwaldFastCosSinSums_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastCosSinSums"
);
kCalculateEwaldFastForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
update_threads_per_block
>>>
();
LAUNCHERROR
(
"kCalculateEwaldFastForces"
);
}
else
kCalculatePME
(
gpu
);
}
}
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
View file @
b9d12c46
...
...
@@ -30,9 +30,6 @@
* different versions of the kernels.
*/
/* Cuda compiler on Windows does not recognized "static const float" values */
#define LOCAL_HACK_PI 3.1415926535897932384626433832795
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
...
...
@@ -41,25 +38,21 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
)
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
float
CDLJObcGbsa_energy
;
float
energy
=
0
.
0
f
;
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
#ifdef USE_EWALD
const
float
TWO_OVER_SQRT_PI
=
2
.
0
f
/
sqrt
(
LOCAL_HACK_PI
);
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
unsigned
int
lasty
=
-
0xFFFFFFFF
;
unsigned
int
lasty
=
-
0xFFFFFFFF
;
while
(
pos
<
end
)
{
...
...
@@ -101,9 +94,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
...
...
@@ -115,26 +108,14 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
float
sig6
=
sig2
*
sig2
*
sig2
;
float
eps
=
a
.
y
*
psA
[
j
].
eps
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
/* E */
CDLJObcGbsa_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
CDLJObcGbsa_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
erfcAlphaR
=
fastErfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
/* E */
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfcAlphaR
;
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
/* E */
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
+
cSim
.
reactionFieldK
*
r2
-
cSim
.
reactionFieldC
);
#endif
#else
float
factorX
=
apos
.
w
*
psA
[
j
].
q
*
invR
;
dEdR
+=
factorX
;
/* E */
CDLJObcGbsa_energy
+=
factorX
;
#endif
dEdR
*=
invR
*
invR
;
...
...
@@ -149,18 +130,18 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
CDLJObcGbsa_energy
+=
(
q2
*
psA
[
j
].
q
)
/
denominator
;
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
if
(
i
>=
cSim
.
atoms
||
(
x
+
j
)
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
(
x
+
j
)
>=
cSim
.
atoms
)
#endif
{
dEdR
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
#endif
if
(
i
<
cSim
.
atoms
)
{
energy
+=
0
.
5
f
*
CDLJObcGbsa_energy
;
}
energy
+=
0
.
5
f
*
CDLJObcGbsa_energy
;
// Add Forces
dx
*=
dEdR
;
dy
*=
dEdR
;
...
...
@@ -169,10 +150,11 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
af
.
y
-=
dy
;
af
.
z
-=
dz
;
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
j
].
br
;
}
}
else
// bExclusion
{
}
else
{
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
xi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
xi
*
(
xi
+
1
)
/
2
;
unsigned
int
excl
=
cSim
.
pExclusion
[
cSim
.
pExclusionIndex
[
cell
]
+
tgx
];
...
...
@@ -182,11 +164,11 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
// CDLJ part
float
invR
=
1
.
0
f
/
sqrt
(
r2
);
...
...
@@ -198,24 +180,8 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJObcGbsa_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
erfcAlphaR
=
fastErfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfcAlphaR
;
bool
needCorrection
=
!
(
excl
&
0x1
)
&&
x
+
tgx
!=
y
+
j
&&
x
+
tgx
<
cSim
.
atoms
&&
y
+
j
<
cSim
.
atoms
;
if
(
needCorrection
)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR
=
-
apos
.
w
*
psA
[
j
].
q
*
invR
*
((
1
.
0
f
-
erfcAlphaR
)
-
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
CDLJObcGbsa_energy
=
-
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
1
.
0
f
-
erfcAlphaR
);
}
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
+
cSim
.
reactionFieldK
*
r2
-
cSim
.
reactionFieldC
);
#endif
#else
float
factorX
=
apos
.
w
*
psA
[
j
].
q
*
invR
;
...
...
@@ -223,13 +189,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
CDLJObcGbsa_energy
+=
factorX
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_EWALD
if
(
!
(
excl
&
0x1
)
&&
!
needCorrection
)
#else
if
(
!
(
excl
&
0x1
))
#endif
{
dEdR
=
0
.
0
f
;
dEdR
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
...
...
@@ -243,26 +205,18 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
CDLJObcGbsa_energy
+=
(
q2
*
psA
[
j
].
q
)
/
denominator
;
#if defined USE_
PERIODIC
#if defined USE_
CUTOFF
if
(
i
>=
cSim
.
atoms
||
x
+
j
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
#elif defined USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
#else
if
(
i
>=
cSim
.
atoms
||
x
+
j
>=
cSim
.
atoms
)
#endif
/* E */
if
(
i
<
cSim
.
atoms
)
{
energy
+=
0
.
5
f
*
CDLJObcGbsa_energy
;
dEdR
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
energy
+=
0
.
5
f
*
CDLJObcGbsa_energy
;
// Add Forces
dx
*=
dEdR
;
dy
*=
dEdR
;
...
...
@@ -288,9 +242,8 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
of
.
w
+=
af
.
w
;
cSim
.
pForce4
[
offset
]
=
of
;
cSim
.
pBornForce
[
offset
]
=
of
.
w
;
}
else
// 100% utilization
{
}
else
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
...
...
@@ -330,9 +283,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
...
...
@@ -346,16 +299,8 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJObcGbsa_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
erfcAlphaR
=
fastErfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
erfcAlphaR
;
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
+
cSim
.
reactionFieldK
*
r2
-
cSim
.
reactionFieldC
);
#endif
#else
float
factorX
=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
...
...
@@ -375,18 +320,17 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
CDLJObcGbsa_energy
+=
(
q2
*
psA
[
tj
].
q
)
/
denominator
;
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
if
(
i
>=
cSim
.
atoms
||
(
y
+
tj
)
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
(
y
+
tj
)
>=
cSim
.
atoms
)
#endif
{
dEdR
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
#endif
/* E */
if
(
i
<
cSim
.
atoms
)
{
energy
+=
CDLJObcGbsa_energy
;
}
energy
+=
CDLJObcGbsa_energy
;
// Add forces
dx
*=
dEdR
;
dy
*=
dEdR
;
...
...
@@ -399,7 +343,10 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
psA
[
tj
].
fb
+=
dGpol_dalpha2_ij
*
br
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
#ifdef USE_CUTOFF
...
...
@@ -431,16 +378,8 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJObcGbsa_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
erfcAlphaR
=
fastErfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
j
].
q
*
invR
*
erfcAlphaR
;
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
+
cSim
.
reactionFieldK
*
r2
-
cSim
.
reactionFieldC
);
#endif
#else
float
factorX
=
apos
.
w
*
psA
[
j
].
q
*
invR
;
...
...
@@ -461,17 +400,17 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
CDLJObcGbsa_energy
+=
(
q2
*
psA
[
j
].
q
)
/
denominator
;
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
if
(
i
>=
cSim
.
atoms
||
(
y
+
j
)
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
(
y
+
j
)
>=
cSim
.
atoms
)
#endif
if
(
i
<
cSim
.
atoms
)
{
energy
+=
CDLJObcGbsa_energy
;
dEdR
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
energy
+=
CDLJObcGbsa_energy
;
// Add forces
dx
*=
dEdR
;
dy
*=
dEdR
;
...
...
@@ -535,9 +474,7 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
}
}
#endif
}
else
// bExclusion
{
}
else
{
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
yi
=
y
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
yi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
yi
*
(
yi
+
1
)
/
2
;
...
...
@@ -549,9 +486,9 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
...
...
@@ -565,38 +502,18 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
CDLJObcGbsa_energy
=
eps
*
(
sig6
-
1
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
#ifdef USE_EWALD
float
r
=
sqrt
(
r2
);
float
alphaR
=
cSim
.
alphaEwald
*
r
;
float
erfcAlphaR
=
fastErfc
(
alphaR
);
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
erfcAlphaR
+
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
*
erfcAlphaR
;
bool
needCorrection
=
!
(
excl
&
0x1
)
&&
x
+
tgx
!=
y
+
tj
&&
x
+
tgx
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
;
if
(
needCorrection
)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
dEdR
=
-
apos
.
w
*
psA
[
tj
].
q
*
invR
*
((
1
.
0
f
-
erfcAlphaR
)
-
alphaR
*
exp
(
-
alphaR
*
alphaR
)
*
TWO_OVER_SQRT_PI
);
CDLJObcGbsa_energy
=
-
apos
.
w
*
psA
[
tj
].
q
*
invR
*
(
1
.
0
f
-
erfcAlphaR
);
}
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
CDLJObcGbsa_energy
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
+
cSim
.
reactionFieldK
*
r2
-
cSim
.
reactionFieldC
);
#endif
#else
float
factorX
=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
dEdR
+=
factorX
;
CDLJObcGbsa_energy
+=
factorX
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_EWALD
if
(
!
(
excl
&
0x1
)
&&
!
needCorrection
)
#else
if
(
!
(
excl
&
0x1
))
#endif
{
dEdR
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
dEdR
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
// ObcGbsaForce1 part
...
...
@@ -609,26 +526,19 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
CDLJObcGbsa_energy
+=
(
q2
*
psA
[
tj
].
q
)
/
denominator
;
#if defined USE_PERIODIC
#ifdef USE_CUTOFF
if
(
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
)
#endif
{
dEdR
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
#elif defined USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
dGpol_dalpha2_ij
=
0
.
0
f
;
CDLJObcGbsa_energy
=
0
.
0
f
;
}
#endif
/* E */
if
(
i
<
cSim
.
atoms
)
{
energy
+=
CDLJObcGbsa_energy
;
}
energy
+=
CDLJObcGbsa_energy
;
// Add forces
dx
*=
dEdR
;
dy
*=
dEdR
;
...
...
@@ -642,6 +552,7 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit)
psA
[
tj
].
fz
+=
dz
;
psA
[
tj
].
fb
+=
dGpol_dalpha2_ij
*
br
;
excl
>>=
1
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
...
...
platforms/cuda/src/kernels/kCalculateGBVIBornSum.cu
View file @
b9d12c46
...
...
@@ -237,71 +237,48 @@ __global__ void kReduceGBVIBornSum_kernel()
void
kReduceGBVIBornSum
(
gpuContext
gpu
)
{
//printf("kReduceGBVIBornSum\n");
#define GBVI_DEBUG 0
#if ( GBVI_DEBUG == 1 )
gpu
->
psGBVIData
->
Download
();
gpu
->
psBornSum
->
Download
();
gpu
->
psPosq4
->
Download
();
(
void
)
fprintf
(
stderr
,
"
\n
kReduceGBVIBornSum: Post BornSum %s Born radii & params
\n
"
,
(
gpu
->
bIncludeGBVI
?
"GBVI"
:
"Obc"
)
);
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
stderr
,
"%d bSum=%14.6e param[%14.6e %14.6e %14.6e] x[%14.6f %14.6f %14.6f %14.6f]
\n
"
,
ii
,
gpu
->
psBornSum
->
_pSysStream
[
0
][
ii
],
gpu
->
psGBVIData
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psGBVIData
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psGBVIData
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
w
);
}
#endif
#undef GBVI_DEBUG
kReduceGBVIBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
gpu
->
bRecalculateBornRadii
=
false
;
LAUNCHERROR
(
"kReduceGBVIBornSum"
);
}
void
kPrintGBVI
(
gpuContext
gpu
,
std
::
string
callId
,
int
call
,
FILE
*
log
)
{
gpu
->
psGBVIData
->
Download
();
gpu
->
psBornRadii
->
Download
();
gpu
->
psBornForce
->
Download
();
gpu
->
psPosq4
->
Download
();
gpu
->
psSigEps2
->
Download
();
(
void
)
fprintf
(
log
,
"kPrintGBVI Cuda comp bR bF prm sigeps2
\n
"
);
(
void
)
fprintf
(
stderr
,
"kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=%u wu=%u %u shrd=%u
\n
"
,
gpu
->
bOutputBufferPerWarp
,
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
gpu
->
sim
.
workUnits
,
gpu
->
psWorkUnit
->
_pSysStream
[
0
][
0
],
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
);
for
(
int
ii
=
0
;
ii
<
gpu
->
sim
.
paddedNumberOfAtoms
;
ii
++
){
(
void
)
fprintf
(
log
,
"%6d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e
\n
"
,
ii
,
gpu
->
psBornRadii
->
_pSysData
[
ii
],
gpu
->
psBornForce
->
_pSysData
[
ii
],
gpu
->
psGBVIData
->
_pSysData
[
ii
].
x
,
gpu
->
psGBVIData
->
_pSysData
[
ii
].
y
,
gpu
->
psGBVIData
->
_pSysData
[
ii
].
z
,
gpu
->
psGBVIData
->
_pSysData
[
ii
].
w
,
gpu
->
psSigEps2
->
_pSysData
[
ii
].
x
,
gpu
->
psSigEps2
->
_pSysData
[
ii
].
y
);
}
}
void
kCalculateGBVIBornSum
(
gpuContext
gpu
)
{
//printf("kCalculateGBVIBornSum\n");
//size_t numWithInteractions;
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
case
NO_CUTOFF
:
#define GBVI 0
#if GBVI == 1
int
maxPrint
=
10
;
gpu
->
psWorkUnit
->
Download
();
fprintf
(
stderr
,
"kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=%u wu=%u %u shrd=%u
\n
"
,
gpu
->
bOutputBufferPerWarp
,
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
gpu
->
sim
.
workUnits
,
gpu
->
psWorkUnit
->
_pSysStream
[
0
][
0
],
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
);
gpu
->
psGBVIData
->
Download
();
gpu
->
psBornSum
->
Download
();
gpu
->
psPosq4
->
Download
();
(
void
)
fprintf
(
stderr
,
"
\n
kCalculateGBVIBornSum: pre BornSum %s Born radii & params
\n
"
,
(
gpu
->
bIncludeGBVI
?
"GBVI"
:
"Obc"
)
);
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
stderr
,
"%d bSum=%14.6e param[%14.6e %14.6e %14.6e] x[%14.6f %14.6f %14.6f %14.6f]
\n
"
,
ii
,
gpu
->
psBornSum
->
_pSysStream
[
0
][
ii
],
gpu
->
psGBVIData
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psGBVIData
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psGBVIData
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
w
);
if
(
(
ii
==
maxPrint
)
&&
(
ii
<
(
gpu
->
natoms
-
maxPrint
))
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
#endif
#undef GBVI
if
(
gpu
->
bOutputBufferPerWarp
){
kCalculateGBVIN2ByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
...
...
@@ -310,6 +287,7 @@ fprintf( stderr, "kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
}
break
;
case
CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVICutoffByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
...
...
@@ -318,6 +296,7 @@ fprintf( stderr, "kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=
kCalculateGBVICutoffBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
case
PERIODIC
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVIPeriodicByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
...
...
@@ -326,6 +305,7 @@ fprintf( stderr, "kCalculateGBVIBornSum: bOutputBufferPerWarp=%u blks=%u th/blk=
kCalculateGBVIPeriodicBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
}
LAUNCHERROR
(
"kCalculateGBVIBornSum"
);
}
platforms/cuda/src/kernels/kCalculateGBVIForces2.cu
View file @
b9d12c46
...
...
@@ -167,76 +167,41 @@ __global__ void kCalculateGBVIForces2a_kernel()
void
kCalculateGBVIForces2
(
gpuContext
gpu
)
{
//printf("kCalculateGBVIForces2\n");
size_t
numWithInteractions
;
#if 0
kClearForces(gpu);
(void) fprintf( stderr, "\nkCalculateGBVIForces2: cleared force prior loop2\n" ); (void) fflush( stderr );
kCalculateGBVIForces2a_kernel<<<gpu->sim.blocks, 384>>>();
(void) fprintf( stderr, "\ncalled kCalculateGBVIForces2a\n" ); (void) fflush( stderr );
return;
#endif
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
case
NO_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVIN2ByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
workUnits
);
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateGBVIN2Forces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
workUnits
);
//(void) fprintf( stderr, "\nkCalculateGBVIForces2: Born radii/force forces warp=%u\n", gpu->bOutputBufferPerWarp ); (void) fflush( stderr );
#define GBVI_DEBUG 0
#if ( GBVI_DEBUG == 1 )
(
void
)
fprintf
(
stderr
,
"
\n
kCalculateGBVIForces2: Born radii/force forces:
\n
"
);
(
void
)
fflush
(
stderr
);
gpu
->
psBornForce
->
Download
();
gpu
->
psForce4
->
Download
();
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
stderr
,
"%d bF=%14.6e Fa[%14.6e %14.6e %14.6e] Fb[%14.6e %14.6e %14.6e]
\n
"
,
ii
,
gpu
->
psBornForce
->
_pSysStream
[
0
][
ii
],
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psForce4
->
_pSysStream
[
1
][
ii
].
x
,
gpu
->
psForce4
->
_pSysStream
[
1
][
ii
].
y
,
gpu
->
psForce4
->
_pSysStream
[
1
][
ii
].
z
);
}
for
(
int
ii
=
0
;
ii
<
gpu
->
sim
.
paddedNumberOfAtoms
*
2
;
ii
++
){
(
void
)
fprintf
(
stderr
,
"%d bF=%14.6e Fa[%14.6e %14.6e %14.6e %14.6e]
\n
"
,
ii
,
gpu
->
psBornForce
->
_pSysStream
[
0
][
ii
],
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
w
);
}
#endif
#undef GBVI_DEBUG
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
break
;
case
CUTOFF
:
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVICutoffByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateGBVICutoffForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
case
PERIODIC
:
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVIPeriodicByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateGBVIPeriodicForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
}
LAUNCHERROR
(
"kCalculateGBVIForces2"
);
//kPrintGBVI( gpu, "kCalculateGBVIForces2", 0, stderr);
}
platforms/cuda/src/kernels/kCalculateGBVIForces2.h
View file @
b9d12c46
...
...
@@ -45,15 +45,16 @@ __launch_bounds__(GT2XX_BORNFORCE2_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__
(
G8X_BORNFORCE2_THREADS_PER_BLOCK
,
1
)
#endif
METHOD_NAME
(
kCalculateGBVI
,
Forces2_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
numWorkUnits
)
METHOD_NAME
(
kCalculateGBVI
,
Forces2_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
bornForce2_threads_per_block
];
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
bornForce2_threads_per_block
];
#endif
unsigned
int
lasty
=
-
0xFFFFFFFF
;
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
View file @
b9d12c46
...
...
@@ -35,9 +35,6 @@ using namespace std;
#include "gputypes.h"
#define UNROLLXX 0
#define UNROLLXY 0
struct
Atom
{
float
x
;
float
y
;
...
...
@@ -105,19 +102,17 @@ void kReduceObcGbsaBornSum_kernel()
while
(
pos
<
cSim
.
atoms
)
{
float
sum
=
0.0
f
;
float
*
pSt
=
cSim
.
pBornSum
+
pos
;
float
sum
=
0.0
f
;
float
*
pSt
=
cSim
.
pBornSum
+
pos
;
float2
atom
=
cSim
.
pObcData
[
pos
];
// Get summed Born data
for
(
int
i
=
0
;
i
<
cSim
.
nonbondOutputBuffers
;
i
++
)
{
sum
+=
*
pSt
;
// printf("%4d %4d A: %9.4f\n", pos, i, *pSt);
pSt
+=
cSim
.
stride
;
}
// Now calculate Born radius and OBC term.
sum
*=
0.5
f
*
atom
.
x
;
float
sum2
=
sum
*
sum
;
...
...
@@ -127,20 +122,49 @@ void kReduceObcGbsaBornSum_kernel()
float
bornRadius
=
1.0
f
/
(
1.0
f
/
atom
.
x
-
tanhSum
/
nonOffsetRadii
);
float
obcChain
=
atom
.
x
*
(
cSim
.
alphaOBC
-
2.0
f
*
cSim
.
betaOBC
*
sum
+
3.0
f
*
cSim
.
gammaOBC
*
sum2
);
obcChain
=
(
1.0
f
-
tanhSum
*
tanhSum
)
*
obcChain
/
nonOffsetRadii
;
cSim
.
pBornRadii
[
pos
]
=
bornRadius
;
cSim
.
pObcChain
[
pos
]
=
obcChain
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
cSim
.
pBornRadii
[
pos
]
=
bornRadius
;
cSim
.
pObcChain
[
pos
]
=
obcChain
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
OPENMMCUDA_EXPORT
kReduceObcGbsaBornSum
(
gpuContext
gpu
)
{
// printf("kReduceObcGbsaBornSum\n");
kReduceObcGbsaBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
gpu
->
bRecalculateBornRadii
=
false
;
LAUNCHERROR
(
"kReduceObcGbsaBornSum"
);
}
void
kPrintObc
(
gpuContext
gpu
,
std
::
string
callId
,
int
call
,
FILE
*
log
)
{
gpu
->
psObcData
->
Download
();
gpu
->
psBornRadii
->
Download
();
gpu
->
psObcChain
->
Download
();
gpu
->
psBornForce
->
Download
();
gpu
->
psPosq4
->
Download
();
gpu
->
psSigEps2
->
Download
();
(
void
)
fprintf
(
log
,
"kPrintObc Cuda bCh bR bF prm[2] sigeps[2]
\n
"
);
(
void
)
fprintf
(
stderr
,
"bOutputWarp=%u blks=%u th/blk=%u wu=%u %u shrd=%u
\n
"
,
gpu
->
bOutputBufferPerWarp
,
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
gpu
->
sim
.
workUnits
,
gpu
->
psWorkUnit
->
_pSysStream
[
0
][
0
],
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
);
for
(
int
ii
=
0
;
ii
<
gpu
->
sim
.
paddedNumberOfAtoms
;
ii
++
){
(
void
)
fprintf
(
log
,
"%6d %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e %15.7e
\n
"
,
ii
,
gpu
->
psObcChain
->
_pSysData
[
ii
],
gpu
->
psBornRadii
->
_pSysData
[
ii
],
gpu
->
psBornForce
->
_pSysData
[
ii
],
gpu
->
psObcData
->
_pSysData
[
ii
].
x
,
gpu
->
psObcData
->
_pSysData
[
ii
].
y
,
gpu
->
psSigEps2
->
_pSysData
[
ii
].
x
,
gpu
->
psSigEps2
->
_pSysData
[
ii
].
y
);
}
}
void
OPENMMCUDA_EXPORT
kCalculateObcGbsaBornSum
(
gpuContext
gpu
)
{
// printf("kCalculateObcgbsaBornSum\n");
...
...
@@ -155,7 +179,9 @@ void OPENMMCUDA_EXPORT kCalculateObcGbsaBornSum(gpuContext gpu)
kCalculateObcGbsaN2BornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
break
;
case
CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaCutoffByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
...
...
@@ -163,7 +189,9 @@ void OPENMMCUDA_EXPORT kCalculateObcGbsaBornSum(gpuContext gpu)
kCalculateObcGbsaCutoffBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
case
PERIODIC
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaPeriodicByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
...
...
@@ -172,5 +200,6 @@ void OPENMMCUDA_EXPORT kCalculateObcGbsaBornSum(gpuContext gpu)
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
}
LAUNCHERROR
(
"kCalculateBornSum"
);
}
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
View file @
b9d12c46
...
...
@@ -41,15 +41,12 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
void
METHOD_NAME
(
kCalculateObcGbsa
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
// unsigned int warp = (blockIdx.x*blockDim.x+threadIdx.x)/GRID;
#endif
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
...
...
@@ -69,8 +66,8 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
...
...
@@ -91,15 +88,15 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#if defined USE_
PERIODIC
#if defined USE_
CUTOFF
if
(
i
<
cSim
.
atoms
&&
x
+
j
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#el
if defined USE_CUTOFF
if
(
r2
<
cSim
.
nonbondedCutoffSqr
)
#el
se
if
(
i
<
cSim
.
atoms
&&
x
+
j
<
cSim
.
atoms
)
#endif
{
r
=
sqrt
(
r2
);
...
...
@@ -118,7 +115,7 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
psA
[
j
].
sr
*
psA
[
j
].
sr
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
float
rj
=
psA
[
j
].
r
;
float
rj
=
psA
[
j
].
s
r
;
if
(
ar
.
x
<
(
rj
-
r
))
{
apos
.
w
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
...
...
@@ -170,15 +167,15 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#ifdef USE_
PERIODIC
#ifdef USE_
CUTOFF
if
(
i
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#el
if defined USE_CUTOFF
if
(
r2
<
cSim
.
nonbondedCutoffSqr
)
#el
se
if
(
i
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
)
#endif
{
r
=
sqrt
(
r2
);
...
...
@@ -243,15 +240,15 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
dx
-=
floor
(
dx
*
cSim
.
invPeriodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
*
cSim
.
invPeriodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
*
cSim
.
invPeriodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#ifdef USE_
PERIODIC
#ifdef USE_
CUTOFF
if
(
i
<
cSim
.
atoms
&&
y
+
j
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#el
if defined USE_CUTOFF
if
(
r2
<
cSim
.
nonbondedCutoffSqr
)
#el
se
if
(
i
<
cSim
.
atoms
&&
y
+
j
<
cSim
.
atoms
)
#endif
{
r
=
sqrt
(
r2
);
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.cu
View file @
b9d12c46
...
...
@@ -130,4 +130,6 @@ void OPENMMCUDA_EXPORT kCalculateObcGbsaForces2(gpuContext gpu)
break
;
}
LAUNCHERROR
(
"kCalculateObcGbsaForces2"
);
//kPrintObc( gpu, "Post kCalculateObcGbsaForces2", 0, stderr );
}
platforms/cuda/src/kernels/kForces.cu
View file @
b9d12c46
...
...
@@ -223,7 +223,7 @@ void kReduceBornSumAndForces_kernel()
void
kReduceBornSumAndForces
(
gpuContext
gpu
)
{
//
printf("kReduceBornSumAndForces\n");
f
printf
(
stderr
,
"kReduceBornSumAndForces
\n
"
);
kReduceBornSumAndForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceBornSumAndForces"
);
}
...
...
@@ -428,8 +428,6 @@ void kReduceGBVIBornForces_kernel()
void
kReduceObcGbsaBornForces
(
gpuContext
gpu
)
{
//fprintf( stderr, "kReduceObcGbsaBornForces %u %u %u %u %u\n", gpu->sim.blocks, gpu->sim.bsf_reduce_threads_per_block, GF1XX_THREADS_PER_BLOCK,
// GT2XX_THREADS_PER_BLOCK, G8X_THREADS_PER_BLOCK ); fflush( stderr );
if
(
gpu
->
bIncludeGBSA
){
kReduceObcGbsaBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceObcGbsaBornForces"
);
...
...
platforms/reference/src/gbsa/CpuGBVI.cpp
View file @
b9d12c46
...
...
@@ -254,7 +254,7 @@ void CpuGBVI::computeBornRadii( const vector<RealVec>& atomCoordinates, RealOpen
}
RealOpenMM
atomicRadius3
=
POW
(
radiusI
,
minusThree
);
if
(
_gbviParameters
->
getBornRadiusScalingMethod
()
=
=
GBVIParameters
::
QuinticSpline
){
if
(
_gbviParameters
->
getBornRadiusScalingMethod
()
!
=
GBVIParameters
::
QuinticSpline
){
sum
=
atomicRadius3
-
sum
;
bornRadii
[
atomI
]
=
POW
(
sum
,
minusOneThird
);
switchDeriviatives
[
atomI
]
=
one
;
...
...
platforms/reference/src/gbsa/CpuObc.cpp
View file @
b9d12c46
...
...
@@ -495,5 +495,69 @@ RealOpenMM CpuObc::computeBornEnergyForces( const vector<RealVec>& atomCoordinat
}
//printObc( atomCoordinates, partialCharges, bornRadii, bornForces, inputForces, "Obc Post loop2", stderr );
return
obcEnergy
;
}
/**---------------------------------------------------------------------------------------
Print Obc parameters, radii, forces, ...
@param atomCoordinates atomic coordinates
@param partialCharges partial charges
@param bornRadii Born radii (may be empty)
@param bornForces Born forces (may be empty)
@param forces forces (may be empty)
@param idString id string (who is calling)
@param log log file
--------------------------------------------------------------------------------------- */
void
CpuObc
::
printObc
(
const
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
const
RealOpenMMVector
&
partialCharges
,
const
RealOpenMMVector
&
bornRadii
,
const
RealOpenMMVector
&
bornForces
,
const
std
::
vector
<
OpenMM
::
RealVec
>&
forces
,
const
std
::
string
&
idString
,
FILE
*
log
){
// ---------------------------------------------------------------------------------------
const
ObcParameters
*
obcParameters
=
getObcParameters
();
const
int
numberOfAtoms
=
obcParameters
->
getNumberOfAtoms
();
const
RealOpenMMVector
&
atomicRadii
=
obcParameters
->
getAtomicRadii
();
const
RealOpenMM
preFactor
=
2.0
*
obcParameters
->
getElectricConstant
();
const
RealOpenMMVector
&
obcChain
=
getObcChain
();
const
RealOpenMMVector
&
scaledRadiusFactor
=
obcParameters
->
getScaledRadiusFactors
();
const
RealOpenMM
alphaObc
=
obcParameters
->
getAlphaObc
();
const
RealOpenMM
betaObc
=
obcParameters
->
getBetaObc
();
const
RealOpenMM
gammaObc
=
obcParameters
->
getGammaObc
();
// ---------------------------------------------------------------------------------------
(
void
)
fprintf
(
log
,
"Reference Obc %s atoms=%d
\n
"
,
idString
.
c_str
(),
numberOfAtoms
);
(
void
)
fprintf
(
log
,
" preFactor %15.7e
\n
"
,
preFactor
);
(
void
)
fprintf
(
log
,
" alpha %15.7e
\n
"
,
alphaObc
);
(
void
)
fprintf
(
log
,
" beta %15.7e
\n
"
,
betaObc
);
(
void
)
fprintf
(
log
,
" gamma %15.7e
\n
"
,
gammaObc
);
for
(
int
atomI
=
0
;
atomI
<
numberOfAtoms
;
atomI
++
){
(
void
)
fprintf
(
log
,
"%6d r=%15.7e q=%6.3f"
,
atomI
,
atomicRadii
[
atomI
],
partialCharges
[
atomI
]
);
if
(
obcChain
.
size
()
>
atomI
){
(
void
)
fprintf
(
log
,
" bChn=%15.7e"
,
obcChain
[
atomI
]
);
}
if
(
bornRadii
.
size
()
>
atomI
){
(
void
)
fprintf
(
log
,
" bR=%15.7e"
,
bornRadii
[
atomI
]
);
}
if
(
bornForces
.
size
()
>
atomI
){
(
void
)
fprintf
(
log
,
" bF=%15.7e"
,
bornForces
[
atomI
]
);
}
(
void
)
fprintf
(
log
,
"
\n
"
);
}
return
;
}
platforms/reference/src/gbsa/CpuObc.h
View file @
b9d12c46
...
...
@@ -158,6 +158,27 @@ class CpuObc {
RealOpenMM
computeBornEnergyForces
(
const
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
const
RealOpenMMVector
&
partialCharges
,
std
::
vector
<
OpenMM
::
RealVec
>&
forces
);
/**---------------------------------------------------------------------------------------
Print Obc parameters, radii, forces, ...
@param atomCoordinates atomic coordinates
@param partialCharges partial charges
@param bornRadii Born radii (may be empty)
@param bornForces Born forces (may be empty)
@param forces forces (may be empty)
@param idString id string (who is calling)
@param log log file
--------------------------------------------------------------------------------------- */
void
printObc
(
const
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
const
RealOpenMMVector
&
partialCharges
,
const
RealOpenMMVector
&
bornRadii
,
const
RealOpenMMVector
&
bornForces
,
const
std
::
vector
<
OpenMM
::
RealVec
>&
forces
,
const
std
::
string
&
idString
,
FILE
*
log
);
};
// ---------------------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment