Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
150134f3
Commit
150134f3
authored
Jun 17, 2011
by
Mark Friedrichs
Browse files
Update to files corrupted in r2814
parent
9c4717f1
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
441 additions
and
19 deletions
+441
-19
platforms/cuda/src/kernels/kForces.cu
platforms/cuda/src/kernels/kForces.cu
+441
-19
No files found.
platforms/cuda/src/kernels/kForces.cu
View file @
150134f3
Vim:
Warning:
Output
is
not
to
a
terminal
[
?
1049h
[
?
1h
=
[
1
;
59
r
[
?
12
;
25h
[
?
12l
[
?
25h
[
27
m
[
m
[
H
[
2
J
[
?
25l
[
59
;
1
H
"svn-commit.tmp"
15L
,
601
C
[
1
;
1
H
[
33
m
1
2
[
m
[
32
m
--
This
line
,
and
those
below
,
will
be
ignored
--
[
m
[
33
m
3
4
[
m
[
1
m
[
35
mM
plugins
/
amoeba
/
platforms
/
cuda
/
src
/
AmoebaCudaKernelFactory
.
cpp
[
m
[
33
m
5
[
m
[
1
m
[
35
mM
plugins
/
freeEnergy
/
platforms
/
reference
/
src
/
gbsa
/
CpuGBVISoftcore
.
cpp
[
m
[
33
m
6
[
m
[
1
m
[
35
mM
openmmapi
/
include
/
openmm
/
GBVIForce
.
h
[
m
[
33
m
7
[
m
[
1
m
[
35
mM
openmmapi
/
src
/
GBVIForce
.
cpp
[
m
[
33
m
8
[
m
[
1
m
[
35
mM
olla
/
src
/
Platform
.
cpp
[
m
[
33
m
9
[
m
[
1
m
[
35
mM
platforms
/
opencl
/
src
/
OpenCLContext
.
h
[
m
[
33
m
10
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
CudaKernels
.
cpp
[
m
[
33
m
11
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kCalculateGBVIBornSum
.
cu
[
m
[
33
m
12
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gputypes
.
h
[
m
[
33
m
13
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
cudatypes
.
h
[
m
[
33
m
14
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
kForces
.
cu
[
m
[
33
m
15
[
m
[
1
m
[
35
mM
platforms
/
cuda
/
src
/
kernels
/
gpu
.
cpp
[
m
[
1
m
[
34
m
~
[
17
;
1
H
~
[
18
;
1
H
~
[
19
;
1
H
~
[
20
;
1
H
~
[
21
;
1
H
~
[
22
;
1
H
~
[
23
;
1
H
~
[
24
;
1
H
~
[
25
;
1
H
~
[
26
;
1
H
~
[
27
;
1
H
~
[
28
;
1
H
~
[
29
;
1
H
~
[
30
;
1
H
~
[
31
;
1
H
~
[
32
;
1
H
~
[
33
;
1
H
~
[
34
;
1
H
~
[
35
;
1
H
~
[
36
;
1
H
~
[
37
;
1
H
~
[
38
;
1
H
~
[
39
;
1
H
~
[
40
;
1
H
~
[
41
;
1
H
~
[
42
;
1
H
~
[
43
;
1
H
~
[
44
;
1
H
~
[
45
;
1
H
~
[
46
;
1
H
~
[
47
;
1
H
~
[
48
;
1
H
~
[
49
;
1
H
~
[
50
;
1
H
~
[
51
;
1
H
~
[
52
;
1
H
~
[
53
;
1
H
~
[
54
;
1
H
~
[
55
;
1
H
~
[
56
;
1
H
~
[
57
;
1
H
~
[
58
;
1
H
~
[
m
[
59
;
217
H1
,
0
-
1
[
9
CAll
[
1
;
5
H
[
?
12l
[
?
25h
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
:
[
?
12l
[
?
25h
q
!
[
?
25l
[
59
;
1
H
[
K
[
59
;
1
H
[
?
1l
>
[
?
12l
[
?
25h
[
?
1049l
Log
message
unchanged
or
not
specified
a
)
bort
,
c
)
ontinue
,
e
)
dit
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using
namespace
std
;
#include "gputypes.h"
#define FABS(a) ((a) > 0.0f ? (a) : -(a))
static
__constant__
cudaGmxSimulation
cSim
;
void
OPENMMCUDA_EXPORT
SetForcesSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetForcesSim copy to cSim failed"
);
}
void
GetForcesSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: GetForcesSim copy from cSim failed"
);
}
__global__
__launch_bounds__
(
384
,
1
)
void
kClearForces_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
outputBuffers
)
{
cSim
.
pForce4
[
pos
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
);
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
OPENMMCUDA_EXPORT
kClearForces
(
gpuContext
gpu
)
{
// printf("kClearForces\n");
kClearForces_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
LAUNCHERROR
(
"kClearForces"
);
}
__global__
__launch_bounds__
(
384
,
1
)
void
kClearBornSumAndForces_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
stride
*
cSim
.
nonbondOutputBuffers
)
{
cSim
.
pBornSum
[
pos
]
=
0.0
f
;
cSim
.
pBornForce
[
pos
]
=
0.0
f
;
cSim
.
pForce4
[
pos
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
);
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
while
(
pos
<
cSim
.
stride
*
cSim
.
outputBuffers
)
{
cSim
.
pForce4
[
pos
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
);
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kClearBornSumAndForces
(
gpuContext
gpu
)
{
// printf("kClearBornSumAndForces\n");
kClearBornSumAndForces_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
LAUNCHERROR
(
"kClearBornSumAndForces"
);
}
__global__
__launch_bounds__
(
384
,
1
)
void
kClearEnergy_kernel
()
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
pos
<
cSim
.
energyOutputBuffers
)
{
((
float
*
)
cSim
.
pEnergy
)[
pos
]
=
0.0
f
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kClearEnergy
(
gpuContext
gpu
)
{
// printf("kClearEnergy\n");
kClearEnergy_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
LAUNCHERROR
(
"kClearEnergy"
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceBornSumAndForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
// Reduce forces
while
(
pos
<
cSim
.
stride4
)
{
float
totalForce
=
0.0
f
;
float
*
pFt
=
(
float
*
)
cSim
.
pForce4
+
pos
;
int
i
=
cSim
.
outputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
pFt
=
(
float
*
)
cSim
.
pForce4
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
// Reduce Born Sum
while
(
pos
-
cSim
.
stride4
<
cSim
.
atoms
)
{
float
sum
=
0.0
f
;
float
*
pSt
=
cSim
.
pBornSum
+
pos
-
cSim
.
stride4
;
float2
atom
=
cSim
.
pObcData
[
pos
-
cSim
.
stride4
];
// Get summed Born data
int
i
=
cSim
.
nonbondOutputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pSt
;
pSt
+=
cSim
.
stride
;
float
f2
=
*
pSt
;
pSt
+=
cSim
.
stride
;
float
f3
=
*
pSt
;
pSt
+=
cSim
.
stride
;
float
f4
=
*
pSt
;
pSt
+=
cSim
.
stride
;
sum
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pSt
;
pSt
+=
cSim
.
stride
;
float
f2
=
*
pSt
;
pSt
+=
cSim
.
stride
;
sum
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
sum
+=
*
pSt
;
}
// Now calculate Born radius and OBC term.
cSim
.
pBornSum
[
pos
-
cSim
.
stride4
]
=
sum
;
sum
*=
0.5
f
*
atom
.
x
;
float
sum2
=
sum
*
sum
;
float
sum3
=
sum
*
sum2
;
float
tanhSum
=
tanh
(
cSim
.
alphaOBC
*
sum
-
cSim
.
betaOBC
*
sum2
+
cSim
.
gammaOBC
*
sum3
);
float
nonOffsetRadii
=
atom
.
x
+
cSim
.
dielectricOffset
;
float
bornRadius
=
1.0
f
/
(
1.0
f
/
atom
.
x
-
tanhSum
/
nonOffsetRadii
);
float
obcChain
=
atom
.
x
*
(
cSim
.
alphaOBC
-
2.0
f
*
cSim
.
betaOBC
*
sum
+
3.0
f
*
cSim
.
gammaOBC
*
sum2
);
obcChain
=
(
1.0
f
-
tanhSum
*
tanhSum
)
*
obcChain
/
nonOffsetRadii
;
cSim
.
pBornRadii
[
pos
-
cSim
.
stride4
]
=
bornRadius
;
cSim
.
pObcChain
[
pos
-
cSim
.
stride4
]
=
obcChain
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
kReduceBornSumAndForces
(
gpuContext
gpu
)
{
//printf("kReduceBornSumAndForces\n");
kReduceBornSumAndForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceBornSumAndForces"
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
// Reduce forces
while
(
pos
<
cSim
.
stride4
)
{
float
totalForce
=
0.0
f
;
float
*
pFt
=
(
float
*
)
cSim
.
pForce4
+
pos
;
int
i
=
cSim
.
outputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride4
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
pFt
=
(
float
*
)
cSim
.
pForce4
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
void
OPENMMCUDA_EXPORT
kReduceForces
(
gpuContext
gpu
)
{
// printf("kReduceForces\n");
kReduceForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceForces"
);
}
double
kReduceEnergy
(
gpuContext
gpu
)
{
//printf("kReduceEnergy\n");
gpu
->
psEnergy
->
Download
();
double
sum
=
0.0
;
for
(
int
i
=
0
;
i
<
gpu
->
sim
.
energyOutputBuffers
;
i
++
){
sum
+=
(
*
gpu
->
psEnergy
)[
i
];
}
return
sum
;
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceObcGbsaBornForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
while
(
pos
<
cSim
.
atoms
)
{
float
bornRadius
=
cSim
.
pBornRadii
[
pos
];
float
obcChain
=
cSim
.
pObcChain
[
pos
];
float2
obcData
=
cSim
.
pObcData
[
pos
];
float
totalForce
=
0.0
f
;
float
*
pFt
=
cSim
.
pBornForce
+
pos
;
int
i
=
cSim
.
nonbondOutputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
float
r
=
(
obcData
.
x
+
cSim
.
dielectricOffset
+
cSim
.
probeRadius
);
float
ratio6
=
pow
((
obcData
.
x
+
cSim
.
dielectricOffset
)
/
bornRadius
,
6.0
f
);
float
saTerm
=
cSim
.
surfaceAreaFactor
*
r
*
r
*
ratio6
;
totalForce
+=
saTerm
/
bornRadius
;
totalForce
*=
bornRadius
*
bornRadius
*
obcChain
;
energy
+=
saTerm
;
pFt
=
cSim
.
pBornForce
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
// correct for surface area factor of -6
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
/
-
6.0
f
;
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 120)
__launch_bounds__
(
GT2XX_UPDATE_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_UPDATE_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceGBVIBornForces_kernel
()
{
unsigned
int
pos
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
float
energy
=
0.0
f
;
while
(
pos
<
cSim
.
atoms
)
{
float
bornRadius
=
cSim
.
pBornRadii
[
pos
];
float4
gbviData
=
cSim
.
pGBVIData
[
pos
];
float
totalForce
=
0.0
f
;
float
*
pFt
=
cSim
.
pBornForce
+
pos
;
int
i
=
cSim
.
nonbondOutputBuffers
;
while
(
i
>=
4
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f3
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f4
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
+
f3
+
f4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
float
f1
=
*
pFt
;
pFt
+=
cSim
.
stride
;
float
f2
=
*
pFt
;
pFt
+=
cSim
.
stride
;
totalForce
+=
f1
+
f2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalForce
+=
*
pFt
;
}
float
ratio
=
(
gbviData
.
x
/
bornRadius
);
float
ratio3
=
ratio
*
ratio
*
ratio
;
energy
-=
gbviData
.
z
*
ratio3
;
totalForce
+=
(
3.0
f
*
gbviData
.
z
*
ratio3
)
/
bornRadius
;
// 'cavity' term
float
br2
=
bornRadius
*
bornRadius
;
totalForce
*=
(
1.0
f
/
3.0
f
)
*
br2
*
br2
;
pFt
=
cSim
.
pBornForce
+
pos
;
*
pFt
=
totalForce
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
cSim
.
pEnergy
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
void
kReduceObcGbsaBornForces
(
gpuContext
gpu
)
{
//printf("kReduceObcGbsaBornForces\n");
if
(
gpu
->
bIncludeGBSA
){
kReduceObcGbsaBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceObcGbsaBornForces"
);
}
else
if
(
gpu
->
bIncludeGBVI
){
kReduceGBVIBornForces_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
bsf_reduce_threads_per_block
>>>
();
LAUNCHERROR
(
"kReduceGBVIBornForces"
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment