Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
7b89b1bf
Commit
7b89b1bf
authored
Nov 11, 2009
by
Mark Friedrichs
Browse files
Mods to remove requirement that nonbonded force buffer size should be doubled for GB/VI and OBC
parent
67b261f6
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
879 additions
and
31 deletions
+879
-31
plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.cpp
...s/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.cpp
+32
-25
plugins/freeEnergy/platforms/cuda/src/kernels/GpuFreeEnergyCudaKernels.h
...rgy/platforms/cuda/src/kernels/GpuFreeEnergyCudaKernels.h
+8
-0
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreAux.h
...gy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreAux.h
+133
-0
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.cu
...atforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.cu
+243
-0
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.h
...latforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.h
+375
-0
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.cu
...platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.cu
+82
-0
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreForces2.h
...forms/cuda/src/kernels/kCalculateObcGbsaSoftcoreForces2.h
+6
-6
No files found.
plugins/freeEnergy/platforms/cuda/src/CudaFreeEnergyKernels.cpp
View file @
7b89b1bf
...
...
@@ -386,6 +386,8 @@ void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeForces(ContextImpl&
// ---------------------------------------------------------------------------------------
_gpuContext
*
gpu
=
data
.
gpu
;
// static int call = 0;
// call++;
// write array, ... address's to board
...
...
@@ -398,7 +400,6 @@ void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeForces(ContextImpl&
}
SetCalculateLocalSoftcoreGpuSim
(
gpu
);
SetCalculateCDLJSoftcoreGpuSim
(
gpu
);
// (void) fprintf( log, "Calling SetCalculateLocalSoftcoreGpuSim\n" ); fflush( stderr );
// flip strides (unsure if this is needed)
...
...
@@ -413,20 +414,20 @@ void CudaFreeEnergyCalcNonbondedSoftcoreForceKernel::executeForces(ContextImpl&
}
}
#endif
}
// calculate nonbonded ixns here, only if implicit solvent is inactive
if
(
!
getIncludeGBSA
()
&&
!
getIncludeGBVI
()
)
{
//kClearForces(gpu);
kCalculateCDLJSoftcoreForces
(
gpu
);
}
// local LJ-14 forces
kCalculateLocalSoftcoreForces
(
gpu
);
//kPrintForces(gpu, "Post kCalculateLocalSoftcoreForces", call );
//kReduceForces(gpu);
//exit(0);
}
double
CudaFreeEnergyCalcNonbondedSoftcoreForceKernel
::
executeEnergy
(
ContextImpl
&
context
)
{
...
...
@@ -527,6 +528,7 @@ void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces(ContextImpl& co
_gpuContext
*
gpu
=
data
.
gpu
;
int
debug
=
1
;
static
int
call
=
0
;
// send address's of arrays, ... to device on first call
// required since force/energy buffers not set when CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::initialize() was called
...
...
@@ -535,8 +537,6 @@ void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces(ContextImpl& co
setSim
++
;
SetCalculateObcGbsaSoftcoreBornSumSim
(
gpu
);
SetCalculateCDLJObcGbsaSoftcoreGpu1Sim
(
gpu
);
//SetCalculateObcGbsaForces2Sim( gpu );
SetCalculateObcGbsaSoftcoreForces2Sim
(
gpu
);
}
...
...
@@ -546,8 +546,11 @@ void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces(ContextImpl& co
// calculate Born radii and first loop of Obc forces
if
(
debug
&&
log
){
(
void
)
fprintf
(
stderr
,
"
\n
%s: calling kCalculateCDLJObcGbsaSoftcoreForces1
\n
"
,
methodName
.
c_str
()
);
(
void
)
fflush
(
stderr
);
call
++
;
if
(
log
){
(
void
)
fprintf
(
log
,
"
\n
%s: calling kCalculateCDLJObcGbsaSoftcoreForces1
\n
"
,
methodName
.
c_str
()
);
(
void
)
fflush
(
log
);
}
}
kClearBornForces
(
gpu
);
...
...
@@ -555,9 +558,10 @@ void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces(ContextImpl& co
kReduceObcGbsaBornSum
(
gpu
);
kCalculateCDLJObcGbsaSoftcoreForces1
(
gpu
);
//kPrintForces(gpu, "Post kCalculateCDLJObcGbsaSoftcoreForces1", call );
if
(
debug
&&
log
){
(
void
)
fprintf
(
stderr
,
"
\n
%s: calling kReduceObcGbsaBornForces
\n
"
,
methodName
.
c_str
()
);
(
void
)
fflush
(
stderr
);
(
void
)
fprintf
(
log
,
"
\n
%s: calling kReduceObcGbsaBornForces
\n
"
,
methodName
.
c_str
()
);
(
void
)
fflush
(
log
);
}
// compute Born forces
...
...
@@ -565,13 +569,12 @@ void CudaFreeEnergyCalcGBSAOBCSoftcoreForceKernel::executeForces(ContextImpl& co
kReduceObcGbsaSoftcoreBornForces
(
gpu
);
if
(
debug
&&
log
){
(
void
)
fprintf
(
stderr
,
"
\n
%s calling kCalculateObcGbsaForces2
\n
"
,
methodName
.
c_str
()
);
(
void
)
fflush
(
stderr
);
(
void
)
fprintf
(
log
,
"
\n
%s calling kCalculateObcGbsaForces2
\n
"
,
methodName
.
c_str
()
);
(
void
)
fflush
(
log
);
}
// second loop of Obc GBSA forces
//kCalculateObcGbsaForces2(gpu);
kCalculateObcGbsaSoftcoreForces2
(
gpu
);
}
...
...
@@ -668,6 +671,7 @@ void CudaFreeEnergyCalcGBVISoftcoreForceKernel::executeForces(ContextImpl& conte
_gpuContext
*
gpu
=
data
.
gpu
;
int
debug
=
1
;
static
int
call
=
0
;
// send address's of arrays, ... to device on first call
// required since force/energy buffers not set when CudaFreeEnergyCalcGBVISoftcoreForceKernel::initialize() was called
...
...
@@ -676,36 +680,40 @@ void CudaFreeEnergyCalcGBVISoftcoreForceKernel::executeForces(ContextImpl& conte
setSim
++
;
SetCalculateGBVISoftcoreBornSumGpuSim
(
gpu
);
SetCalculateCDLJObcGbsaSoftcoreGpu1Sim
(
gpu
);
SetCalculateGBVIForces2Sim
(
gpu
);
SetCalculateGBVI
Softcore
Forces2Sim
(
gpu
);
}
// required!!
// required?
gpu
->
bRecalculateBornRadii
=
true
;
// fixed
//kClearForces(gpu);
// calculate Born radii and first loop of GB/VI forces
if
(
debug
&&
log
){
(
void
)
fprintf
(
stderr
,
"
\n
%s: calling kCalculateCDLJObcGbsaSoftcoreForces1 & %s
\n
"
,
methodName
.
c_str
(),
getQuinticScaling
()
?
"kReduceGBVIBornSumQuinticScaling"
:
"kReduceGBVIBornSum"
);
(
void
)
fflush
(
stderr
);
call
++
;
if
(
log
){
(
void
)
fprintf
(
log
,
"
\n
%s: calling kCalculateCDLJObcGbsaSoftcoreForces1 & %s
\n
"
,
methodName
.
c_str
(),
getQuinticScaling
()
?
"kReduceGBVIBornSumQuinticScaling"
:
"kReduceGBVIBornSum"
);
(
void
)
fflush
(
log
);
}
}
kClearBornForces
(
gpu
);
kCalculateGBVISoftcoreBornSum
(
gpu
);
if
(
getQuinticScaling
()
){
//(void) fprintf( stderr, "\n%s: calling kReduceGBVIBornSumQuinticScaling\n", methodName.c_str() ); fflush( stderr );
kReduceGBVIBornSumQuinticScaling
(
gpu
,
gpuGBVISoftcore
);
}
else
{
kReduceGBVIBornSum
(
gpu
);
}
kCalculateCDLJObcGbsaSoftcoreForces1
(
gpu
);
//kPrintForces(gpu, "Post kCalculateCDLJObcGbsaSoftcoreForces1", call );
if
(
debug
&&
log
){
(
void
)
fprintf
(
stderr
,
"
\n
%s: calling %s
\n
"
,
methodName
.
c_str
(),
(
void
)
fprintf
(
log
,
"
\n
%s: calling %s
\n
"
,
methodName
.
c_str
(),
getQuinticScaling
()
?
"kReduceGBVIBornForcesQuinticScaling"
:
"kReduceObcGbsaBornForces"
);
(
void
)
fflush
(
stderr
);
(
void
)
fflush
(
log
);
}
// compute Born forces
...
...
@@ -719,14 +727,13 @@ void CudaFreeEnergyCalcGBVISoftcoreForceKernel::executeForces(ContextImpl& conte
}
if
(
debug
&&
log
){
(
void
)
fprintf
(
stderr
,
"
\n
%s: calling kCalculateGBVIForces2
\n
"
,
methodName
.
c_str
()
);
(
void
)
fflush
(
stderr
);
(
void
)
fprintf
(
log
,
"
\n
%s: calling kCalculateGBVIForces2
\n
"
,
methodName
.
c_str
()
);
(
void
)
fflush
(
log
);
}
// second loop of GB/VI forces
kCalculateGBVIForces2
(
gpu
);
//kReduceForces(gpu);
kCalculateGBVISoftcoreForces2
(
gpu
);
}
double
CudaFreeEnergyCalcGBVISoftcoreForceKernel
::
executeEnergy
(
ContextImpl
&
context
)
{
...
...
plugins/freeEnergy/platforms/cuda/src/kernels/GpuFreeEnergyCudaKernels.h
View file @
7b89b1bf
...
...
@@ -103,11 +103,18 @@ void SetCalculateGBVISoftcoreBornSumGpuSim( gpuContext gpu);
extern
"C"
void
SetCalculateGBVISoftcoreSupplementarySim
(
GpuGBVISoftcore
*
gpuGBVISoftcore
);
extern
"C"
void
SetCalculateGBVISoftcoreForces2Sim
(
gpuContext
gpu
);
extern
"C"
void
GetCalculateGBVISoftcoreForces2Sim
(
gpuContext
gpu
);
// kernel calls to device
extern
void
kReduceGBVIBornSumQuinticScaling
(
gpuContext
gpu
,
GpuGBVISoftcore
*
gpuGBVISoftcore
);
extern
void
kCalculateGBVISoftcoreBornSum
(
gpuContext
gpu
);
extern
void
kReduceGBVIBornForcesQuinticScaling
(
gpuContext
gpu
);
extern
void
kCalculateGBVISoftcoreForces2
(
gpuContext
gpu
);
// Obc softcore
...
...
@@ -156,6 +163,7 @@ extern void kCalculateObcGbsaSoftcoreBornSum( gpuContext gpu );
// this method is not needed; the OpenMM version can be used
extern
void
kCalculateObcGbsaSoftcoreForces2
(
gpuContext
gpu
);
extern
void
kPrintForces
(
gpuContext
gpu
,
std
::
string
idString
,
int
call
);
// shared
...
...
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreAux.h
0 → 100755
View file @
7b89b1bf
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Mark Friedrichs *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#ifndef __GpuGBVIAUX_H__
#define __GpuGBVIAUX_H__
/**
* This file contains subroutines used in evaluating quantities associated w/ the GB/VI function
*/
__device__
float
getGBVI_L
(
float
r
,
float
x
,
float
S
)
{
float
rInv
=
1
.
0
f
/
r
;
float
xInv
=
1
.
0
f
/
x
;
float
xInv2
=
xInv
*
xInv
;
float
diff2
=
(
r
+
S
)
*
(
r
-
S
);
return
(
1
.
5
f
*
xInv2
)
*
(
(
0
.
25
f
*
rInv
)
-
(
xInv
/
3
.
0
f
)
+
(
0
.
125
f
*
diff2
*
xInv2
*
rInv
)
);
}
__device__
float
getGBVI_Volume
(
float
r_ij
,
float
R
,
float
S
)
{
float
upperBound
=
r_ij
+
S
;
float
rdiffS
=
r_ij
-
S
;
float
lowerBound
=
R
>
rdiffS
?
R
:
rdiffS
;
float
L_upper
=
getGBVI_L
(
r_ij
,
upperBound
,
S
);
float
L_lower
=
getGBVI_L
(
r_ij
,
lowerBound
,
S
);
float
mask
=
r_ij
<
(
R
-
S
)
?
0
.
0
f
:
1
.
0
f
;
float
addOn
=
r_ij
<
(
S
-
R
)
?
(
1
.
0
f
/
(
R
*
R
*
R
))
:
0
.
0
f
;
return
(
mask
*
(
L_upper
-
L_lower
)
+
addOn
);
}
__device__
float
getGBVI_dL_dr
(
float
r
,
float
x
,
float
S
)
{
float
rInv
=
1
.
0
f
/
r
;
float
rInv2
=
rInv
*
rInv
;
float
xInv
=
1
.
0
f
/
x
;
float
xInv2
=
xInv
*
xInv
;
float
xInv3
=
xInv2
*
xInv
;
float
diff2
=
(
r
+
S
)
*
(
r
-
S
);
return
(
(
-
1
.
5
f
*
xInv2
*
rInv2
)
*
(
0
.
25
f
+
0
.
125
f
*
diff2
*
xInv2
)
+
0
.
375
f
*
xInv3
*
xInv
);
//return 0.0f;
}
__device__
float
getGBVI_dL_dx
(
float
r
,
float
x
,
float
S
)
{
float
rInv
=
1
.
0
f
/
r
;
float
xInv
=
1
.
0
f
/
x
;
float
xInv2
=
xInv
*
xInv
;
float
xInv3
=
xInv2
*
xInv
;
float
diff
=
(
r
+
S
)
*
(
r
-
S
);
return
(
(
-
1
.
5
f
*
xInv3
)
*
(
(
0
.
5
f
*
rInv
)
-
xInv
+
(
0
.
5
f
*
diff
*
xInv2
*
rInv
)
));
}
__device__
float
getGBVI_dE2
(
float
r
,
float
R
,
float
S
,
float
bornForce
)
{
float
diff
=
S
-
R
;
float
absDiff
=
fabsf
(
S
-
R
);
float
dE
=
getGBVI_dL_dr
(
r
,
r
+
S
,
S
)
+
getGBVI_dL_dx
(
r
,
r
+
S
,
S
);
float
mask
;
float
lowerBound
;
if
(
(
R
>
(
r
-
S
))
&&
(
absDiff
<
r
)
){
mask
=
0
.
0
f
;
lowerBound
=
R
;
}
else
{
mask
=
1
.
0
f
;
lowerBound
=
(
r
-
S
);
}
dE
-=
getGBVI_dL_dr
(
r
,
lowerBound
,
S
)
+
mask
*
getGBVI_dL_dx
(
r
,
lowerBound
,
S
);
dE
=
(
absDiff
>=
r
)
&&
r
>=
diff
?
0
.
0
f
:
dE
;
dE
*=
(
(
r
>
1.0e-08
f
)
?
(
bornForce
/
r
)
:
0
.
0
f
);
return
(
-
dE
);
}
__device__
float
getGBVIBornForce2
(
float
bornRadius
,
float
R
,
float
bornForce
,
float
gamma
)
{
float
ratio
=
(
R
/
bornRadius
);
float
returnBornForce
=
bornForce
+
(
3
.
0
f
*
gamma
*
ratio
*
ratio
*
ratio
)
/
bornRadius
;
// 'cavity' term
float
br2
=
bornRadius
*
bornRadius
;
returnBornForce
*=
(
1
.
0
f
/
3
.
0
f
)
*
br2
*
br2
;
return
returnBornForce
;
}
#endif // __GpuGBVIAUX_H__
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.cu
0 → 100755
View file @
7b89b1bf
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using
namespace
std
;
#include "gputypes.h"
#include "cudaKernels.h"
#include "GpuFreeEnergyCudaKernels.h"
struct
Atom
{
float
x
;
float
y
;
float
z
;
float
r
;
float
sr
;
float
fx
;
float
fy
;
float
fz
;
float
fb
;
};
static
__constant__
cudaGmxSimulation
cSim
;
void
SetCalculateGBVISoftcoreForces2Sim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetCalculateGBVISoftcoreForces2Sim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
#include "kCalculateGBVISoftcoreAux.h"
/**
* This file contains the kernel for evalauating the second stage of GBSA. It is included
* several times in kCalculateGBVIForces2.cu with different #defines to generate
* different versions of the kernels.
*/
__global__
void
kCalculateGBVISoftcoreForces2a_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
if
(
pos
>=
cSim
.
atoms
)
return
;
float4
apos
=
cSim
.
pPosq
[
pos
];
float4
ar
=
cSim
.
pGBVIData
[
pos
];
float
fb
=
cSim
.
pBornForce
[
pos
];
unsigned
int
posJ
=
0
;
float4
force
;
force
.
x
=
force
.
y
=
force
.
z
=
force
.
w
=
0.0
f
;
while
(
posJ
<
cSim
.
atoms
)
{
float4
aposJ
=
cSim
.
pPosq
[
posJ
];
float4
arJ
=
cSim
.
pGBVIData
[
posJ
];
float
fbJ
=
cSim
.
pBornForce
[
posJ
];
float
dx
=
aposJ
.
x
-
apos
.
x
;
float
dy
=
aposJ
.
y
-
apos
.
y
;
float
dz
=
aposJ
.
z
-
apos
.
z
;
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
float
dE
=
getGBVI_dE2
(
r
,
ar
.
x
,
arJ
.
y
,
fb
);
dE
=
r
>
1.0e-08
f
?
dE
:
0.0
f
;
//dx = dy = dz = 1.0f;
float
d
=
dx
*
dE
;
force
.
x
-=
d
;
d
=
dy
*
dE
;
force
.
y
-=
d
;
d
=
dz
*
dE
;
force
.
z
-=
d
;
#if 1
dE
=
getGBVI_dE2
(
r
,
arJ
.
x
,
ar
.
y
,
fbJ
);
dE
=
r
>
1.0e-08
f
?
dE
:
0.0
f
;
d
=
dx
*
dE
;
force
.
x
-=
d
;
d
=
dy
*
dE
;
force
.
y
-=
d
;
d
=
dz
*
dE
;
force
.
z
-=
d
;
#endif
posJ
+=
1
;
}
// Write results
cSim
.
pForce4a
[
pos
]
=
force
;
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateGBVISoftcoreForces2.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateGBVISoftcoreForces2.h"
// Include versions of the kernels with cutoffs.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateGBVISoftcoreForces2.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateGBVISoftcoreForces2.h"
// Include versions of the kernels with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateGBVISoftcoreForces2.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateGBVISoftcoreForces2.h"
void
kCalculateGBVISoftcoreForces2
(
gpuContext
gpu
)
{
//printf("kCalculateGBVISoftcoreForces2\n");
size_t
numWithInteractions
;
#if 0
kClearForces(gpu);
(void) fprintf( stderr, "\nkCalculateGBVISoftcoreForces2: cleared force prior loop2\n" ); (void) fflush( stderr );
kCalculateGBVISoftcoreForces2a_kernel<<<gpu->sim.blocks, 384>>>();
(void) fprintf( stderr, "\ncalled kCalculateGBVISoftcoreForces2a\n" ); (void) fflush( stderr );
return;
#endif
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
case
NO_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVISoftcoreN2ByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
workUnits
);
else
kCalculateGBVISoftcoreN2Forces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
workUnits
);
//(void) fprintf( stderr, "\nkCalculateGBVIForces2: Born radii/force forces warp=%u\n", gpu->bOutputBufferPerWarp ); (void) fflush( stderr );
#define GBVI_DEBUG 0
#if ( GBVI_DEBUG == 1 )
(
void
)
fprintf
(
stderr
,
"
\n
kCalculateGBVISoftcoreForces2: Born radii/force forces:
\n
"
);
(
void
)
fflush
(
stderr
);
gpu
->
psBornForce
->
Download
();
gpu
->
psForce4
->
Download
();
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
stderr
,
"%d bF=%14.6e Fa[%14.6e %14.6e %14.6e] Fb[%14.6e %14.6e %14.6e]
\n
"
,
ii
,
gpu
->
psBornForce
->
_pSysStream
[
0
][
ii
],
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psForce4
->
_pSysStream
[
1
][
ii
].
x
,
gpu
->
psForce4
->
_pSysStream
[
1
][
ii
].
y
,
gpu
->
psForce4
->
_pSysStream
[
1
][
ii
].
z
);
}
for
(
int
ii
=
0
;
ii
<
gpu
->
sim
.
paddedNumberOfAtoms
*
2
;
ii
++
){
(
void
)
fprintf
(
stderr
,
"%d bF=%14.6e Fa[%14.6e %14.6e %14.6e %14.6e]
\n
"
,
ii
,
gpu
->
psBornForce
->
_pSysStream
[
0
][
ii
],
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
w
);
}
#endif
#undef GBVI_DEBUG
break
;
case
CUTOFF
:
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVISoftcoreCutoffByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
else
kCalculateGBVISoftcoreCutoffForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
break
;
case
PERIODIC
:
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateGBVISoftcorePeriodicByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
else
kCalculateGBVISoftcorePeriodicForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
break
;
}
LAUNCHERROR
(
"kCalculateGBVISoftcoreForces2"
);
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateGBVISoftcoreForces2.h
0 → 100644
View file @
7b89b1bf
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "kCalculateGBVIAux.h"
/**
* This file contains the kernel for evalauating the second stage of GBSA. It is included
* several times in kCalculateGBVIForces2.cu with different #defines to generate
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateGBVISoftcore
,
Forces2_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
numWorkUnits
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
bornForce2_threads_per_block
];
#endif
unsigned
int
lasty
=
-
0xFFFFFFFF
;
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
i
=
x
+
tgx
;
float4
apos
=
cSim
.
pPosq
[
i
];
float4
ar
=
cSim
.
pGBVIData
[
i
];
float
fb
=
cSim
.
pBornForce
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
float3
af
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fz
=
af
.
z
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
r
=
ar
.
x
;
sA
[
threadIdx
.
x
].
sr
=
ar
.
y
;
sA
[
threadIdx
.
x
].
fb
=
fb
;
for
(
unsigned
int
j
=
(
tgx
+
1
)
&
(
GRID
-
1
);
j
!=
tgx
;
j
=
(
j
+
1
)
&
(
GRID
-
1
))
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
// Atom I Born forces and sum
float
dE
=
getGBVI_dE2
(
r
,
ar
.
x
,
psA
[
j
].
sr
,
fb
);
#if defined USE_PERIODIC
if
(
i
>=
cSim
.
atoms
||
x
+
j
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dE
=
0
.
0
f
;
}
#endif
#if defined USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dE
=
0
.
0
f
;
}
#endif
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
psA
[
j
].
fx
+=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
psA
[
j
].
fy
+=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
psA
[
j
].
fz
+=
d
;
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
af
.
x
+
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
af
.
y
+
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
af
.
z
+
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4
[
offset
]
=
of
;
#else
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
af
.
x
+
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
af
.
y
+
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
af
.
z
+
sA
[
threadIdx
.
x
].
fz
;
of
.
w
=
0
.
0
f
;
cSim
.
pForce4
[
offset
]
=
of
;
#endif
}
else
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
float4
temp1
=
cSim
.
pGBVIData
[
j
];
float
fb
=
cSim
.
pBornForce
[
j
];
sA
[
threadIdx
.
x
].
fb
=
fb
;
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
r
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
}
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
if
(
flags
==
0xFFFFFFFF
)
#endif
{
// Compute all interactions within this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
float
dE
=
getGBVI_dE2
(
r
,
ar
.
x
,
psA
[
tj
].
sr
,
fb
);
#if defined USE_PERIODIC
if
(
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dE
=
0
.
0
f
;
}
#endif
#if defined USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dE
=
0
.
0
f
;
}
#endif
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
psA
[
tj
].
fx
+=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
psA
[
tj
].
fy
+=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
psA
[
tj
].
fz
+=
d
;
// Atom J Born sum term
dE
=
getGBVI_dE2
(
r
,
psA
[
tj
].
r
,
ar
.
y
,
psA
[
tj
].
fb
);
#ifdef USE_PERIODIC
if
(
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dE
=
0
.
0
f
;
}
#endif
#if defined USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dE
=
0
.
0
f
;
}
#endif
dx
*=
dE
;
dy
*=
dE
;
dz
*=
dE
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
// Interleaved Atom I and J Born Forces and sum components
float
dE
=
getGBVI_dE2
(
r
,
ar
.
x
,
psA
[
j
].
sr
,
fb
);
#if defined USE_PERIODIC
if
(
i
>=
cSim
.
atoms
||
y
+
j
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dE
=
0
.
0
f
;
}
#endif
#if defined USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dE
=
0
.
0
f
;
}
#endif
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
tempBuffer
[
threadIdx
.
x
].
x
=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
tempBuffer
[
threadIdx
.
x
].
y
=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
tempBuffer
[
threadIdx
.
x
].
z
=
d
;
// Atom J Born sum term
dE
=
getGBVI_dE2
(
r
,
psA
[
j
].
r
,
ar
.
y
,
psA
[
j
].
fb
);
#ifdef USE_PERIODIC
if
(
i
>=
cSim
.
atoms
||
y
+
j
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dE
=
0
.
0
f
;
}
#endif
#if defined USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dE
=
0
.
0
f
;
}
#endif
dx
*=
dE
;
dy
*=
dE
;
dz
*=
dE
;
tempBuffer
[
threadIdx
.
x
].
x
+=
dx
;
tempBuffer
[
threadIdx
.
x
].
y
+=
dy
;
tempBuffer
[
threadIdx
.
x
].
z
+=
dz
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
// Sum the forces on atom j.
if
(
tgx
%
2
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
1
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
1
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
1
].
z
;
}
if
(
tgx
%
4
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
2
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
2
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
2
].
z
;
}
if
(
tgx
%
8
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
4
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
4
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
4
].
z
;
}
if
(
tgx
%
16
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
8
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
8
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
8
].
z
;
}
if
(
tgx
==
0
)
{
psA
[
j
].
fx
+=
tempBuffer
[
threadIdx
.
x
].
x
+
tempBuffer
[
threadIdx
.
x
+
16
].
x
;
psA
[
j
].
fy
+=
tempBuffer
[
threadIdx
.
x
].
y
+
tempBuffer
[
threadIdx
.
x
+
16
].
y
;
psA
[
j
].
fz
+=
tempBuffer
[
threadIdx
.
x
].
z
+
tempBuffer
[
threadIdx
.
x
+
16
].
z
;
}
}
}
}
#endif
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4
[
offset
]
=
of
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4
[
offset
]
=
of
;
#else
unsigned
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
of
.
w
=
0
.
0
f
;
cSim
.
pForce4
[
offset
]
=
of
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4
[
offset
]
=
of
;
#endif
}
lasty
=
y
;
pos
++
;
}
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateNonbondedSoftcore.cu
View file @
7b89b1bf
...
...
@@ -395,3 +395,85 @@ fprintf( stderr, "kCalculateCDLJSoftcoreForces: bOutputBufferPerWarp=%u blks=%u
#endif
}
}
void
kPrintForces
(
gpuContext
gpu
,
std
::
string
idString
,
int
call
)
{
// printf("kReduceForces\n");
#define GBVI_DEBUG 4
#if ( GBVI_DEBUG == 4 )
gpu
->
psBornRadii
->
Download
();
gpu
->
psObcData
->
Download
();
gpu
->
psObcChain
->
Download
();
gpu
->
psBornForce
->
Download
();
gpu
->
psForce4
->
Download
();
gpu
->
psPosq4
->
Download
();
int
maxPrint
=
0
;
int
nanHit
=
0
;
int
targetIndex
=
852
;
float
maxForce
=
3.0e+04
;
float
maxPosition
=
2.0e+02
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
int
hit
=
0
;
float
dist
=
sqrtf
(
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
x
*
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
x
+
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
y
*
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
y
+
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
z
*
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
z
);
if
(
fabs
(
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
x
)
>
maxForce
||
fabs
(
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
y
)
>
maxForce
||
fabs
(
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
z
)
>
maxForce
||
// gpu->psBornRadii->_pSysStream[0][ii] <= 0.0 ||
dist
>
maxPosition
||
isnan
(
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
x
)
||
isnan
(
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
y
)
||
isnan
(
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
z
)
){
hit
=
1
;
}
else
{
hit
=
0
;
}
if
(
ii
==
targetIndex
||
ii
==
(
targetIndex
+
1
)
||
ii
==
(
targetIndex
+
2
)
)
hit
=
1
;
if
(
isnan
(
gpu
->
psBornForce
->
_pSysStream
[
0
][
ii
]
)
||
isnan
(
gpu
->
psBornRadii
->
_pSysStream
[
0
][
ii
]
)
||
isnan
(
gpu
->
psObcChain
->
_pSysStream
[
0
][
ii
]
)
||
isnan
(
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
x
)
||
isnan
(
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
y
)
||
isnan
(
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
z
)
){
hit
=
1
;
nanHit
=
1
;
}
//if( hit || ii < maxPrint || ii >= (gpu->natoms - maxPrint) )
if
(
hit
){
static
int
firstHit
=
1
;
if
(
firstHit
){
firstHit
=
0
;
(
void
)
fprintf
(
stderr
,
"
\n
kPrintForces: %d [r, scl q] b[r/c/f] f[] x[] Born radii/force (%p %p)
\n
"
,
call
,
gpu
->
psBornForce
,
gpu
->
psBornForce
->
_pDevStream
[
0
]
);
}
(
void
)
fprintf
(
stderr
,
"%6d [%8.3f %8.3f %8.3f] b[%13.6e %13.6e %13.6e] f[%13.6e %13.6e %13.6e] x[%13.6e %13.6e %13.6e] %10.3e %s %s %d
\n
"
,
ii
,
(
gpu
->
psObcData
->
_pSysStream
[
0
][
ii
].
x
+
0.009
f
),
(
gpu
->
psObcData
->
_pSysStream
[
0
][
ii
].
y
/
gpu
->
psObcData
->
_pSysStream
[
0
][
ii
].
x
),
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
w
,
gpu
->
psBornRadii
->
_pSysStream
[
0
][
ii
],
gpu
->
psObcChain
->
_pSysStream
[
0
][
ii
],
gpu
->
psBornForce
->
_pSysStream
[
0
][
ii
],
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psForce4
->
_pSysStream
[
0
][
ii
].
z
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
x
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
y
,
gpu
->
psPosq4
->
_pSysStream
[
0
][
ii
].
z
,
dist
,
(
hit
?
"XXXXXXX"
:
""
),
idString
.
c_str
(),
call
);
}
}
(
void
)
fflush
(
stderr
);
// if( nanHit )exit(0);
#endif
}
plugins/freeEnergy/platforms/cuda/src/kernels/kCalculateObcGbsaSoftcoreForces2.h
View file @
7b89b1bf
...
...
@@ -135,12 +135,12 @@ __global__ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned
#else
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
#endif
of
=
cSim
.
pForce4
b
[
offset
];
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
af
.
x
+
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
af
.
y
+
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
af
.
z
+
sA
[
threadIdx
.
x
].
fz
;
of
.
w
=
0
.
0
f
;
cSim
.
pForce4
b
[
offset
]
=
of
;
cSim
.
pForce4
[
offset
]
=
of
;
}
else
{
...
...
@@ -400,22 +400,22 @@ __global__ void METHOD_NAME(kCalculateObcGbsaSoftcore, Forces2_kernel)(unsigned
#else
unsigned
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
#endif
of
=
cSim
.
pForce4
b
[
offset
];
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4
b
[
offset
]
=
of
;
cSim
.
pForce4
[
offset
]
=
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
#else
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
#endif
of
=
cSim
.
pForce4
b
[
offset
];
of
=
cSim
.
pForce4
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4
b
[
offset
]
=
of
;
cSim
.
pForce4
[
offset
]
=
of
;
}
lasty
=
y
;
pos
++
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment